1 files changed, 433 insertions, 0 deletions
diff --git a/src/libs/graphics/sdl/scaleint.h b/src/libs/graphics/sdl/scaleint.h
new file mode 100644
index 0000000..e54de80
--- /dev/null
+++ b/src/libs/graphics/sdl/scaleint.h
@@ -0,0 +1,433 @@
+/*
+ * Copyright (C) 2005  Alex Volkov (codepro@usa.net)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+// Scalers Internals
+
+#ifndef SCALEINT_H_
+#define SCALEINT_H_
+
+#include "libs/graphics/sdl/sdl_common.h"
+#include "types.h"
+
+
+// Plain C names
+#define SCALE_(name) Scale ## _ ## name
+
+// These are defaults
+#define SCALE_GETPIX(p)        ( *(Uint32 *)(p) )
+#define SCALE_SETPIX(p, c)     ( *(Uint32 *)(p) = (c) )
+
+// Plain C defaults
+#define SCALE_CMPRGB(p1, p2) \
+			SCALE_(GetRGBDelta) (fmt, p1, p2)
+
+#define SCALE_TOYUV(p) \
+			SCALE_(RGBtoYUV) (fmt, p)
+
+#define SCALE_CMPYUV(p1, p2, toler) \
+			SCALE_(CmpYUV) (fmt, p1, p2, toler)
+
+#define SCALE_DIFFYUV(p1, p2) \
+			SCALE_(DiffYUV) (p1, p2)
+#define SCALE_DIFFYUV_TY 0x40
+#define SCALE_DIFFYUV_TU 0x12
+#define SCALE_DIFFYUV_TV 0x0c
+
+#define SCALE_GETY(p) \
+			SCALE_(GetPixY) (fmt, p)
+
+#define SCALE_BILINEAR_BLEND4(r0, r1, dst, dlen) \
+			SCALE_(Blend_bilinear) (r0, r1, dst, dlen)
+
+#define NO_PREFETCH     0
+#define INTEL_PREFETCH  1
+#define AMD_PREFETCH    2
+
+typedef enum
+{
+	YUV_XFORM_R = 0,
+	YUV_XFORM_G = 1,
+	YUV_XFORM_B = 2,
+	YUV_XFORM_Y = 0,
+	YUV_XFORM_U = 1,
+	YUV_XFORM_V = 2
+} RGB_YUV_INDEX;
+
+extern const int YUV_matrix[3][3];
+
+// pre-computed transformations for 8 bits per channel
+extern int RGB_to_YUV[/*RGB*/ 3][/*YUV*/ 3][ /*mult-res*/ 256];
+extern sint16 dRGB_to_dYUV[/*RGB*/ 3][/*YUV*/ 3][ /*mult-res*/ 512];
+
+typedef Uint32 YUV_VECTOR;
+// pre-computed transformations for RGB555
+extern YUV_VECTOR RGB15_to_YUV[0x8000];
+
+
+// Platform+Scaler function lookups
+//
+typedef struct
+{
+	int flag;
+	TFB_ScaleFunc func;
+} Scale_FuncDef_t;
+
+
+// expands the given rectangle in all directions by 'expansion'
+// guarded by 'limits'
+extern void Scale_ExpandRect (SDL_Rect* rect, int expansion,
+				const SDL_Rect* limits);
+
+
+// Standard plain C versions of support functions
+
+// Initialize various platform-specific features
+static inline void
+SCALE_(PlatInit) (void)
+{
+}
+
+// Finish with various platform-specific features
+static inline void
+SCALE_(PlatDone) (void)
+{
+}
+
+#if 0
+static inline void
+SCALE_(Prefetch) (const void* p)
+{
+	/* no-op in pure C */
+	(void)p;
+}
+#else
+#	define Scale_Prefetch(p)
+#endif
+
+// compute the RGB distance squared between 2 pixels
+// Plain C version
+static inline int
+SCALE_(GetRGBDelta) (const SDL_PixelFormat* fmt, Uint32 pix1, Uint32 pix2)
+{
+	int c;
+	int delta;
+
+	c = ((pix1 >> fmt->Rshift) & 0xff) - ((pix2 >> fmt->Rshift) & 0xff);
+	delta = c * c;
+
+	c = ((pix1 >> fmt->Gshift) & 0xff) - ((pix2 >> fmt->Gshift) & 0xff);
+	delta += c * c;
+
+	c = ((pix1 >> fmt->Bshift) & 0xff) - ((pix2 >> fmt->Bshift) & 0xff);
+	delta += c * c;
+
+	return delta;
+}
+
+// retrieve the Y (intensity) component of pixel's YUV
+// Plain C version
+static inline int
+SCALE_(GetPixY) (const SDL_PixelFormat* fmt, Uint32 pix)
+{
+	Uint32 r, g, b;
+
+	r = (pix >> fmt->Rshift) & 0xff;
+	g = (pix >> fmt->Gshift) & 0xff;
+	b = (pix >> fmt->Bshift) & 0xff;
+
+	return RGB_to_YUV [YUV_XFORM_R][YUV_XFORM_Y][r]
+			+ RGB_to_YUV [YUV_XFORM_G][YUV_XFORM_Y][g]
+			+ RGB_to_YUV [YUV_XFORM_B][YUV_XFORM_Y][b];
+}
+
+static inline YUV_VECTOR
+SCALE_(RGBtoYUV) (const SDL_PixelFormat* fmt, Uint32 pix)
+{
+	return RGB15_to_YUV[
+			(((pix >> (fmt->Rshift + 3)) & 0x1f) << 10) |
+			(((pix >> (fmt->Gshift + 3)) & 0x1f) <<  5) |
+			(((pix >> (fmt->Bshift + 3)) & 0x1f)      )
+			];
+}
+
+// compare 2 pixels with respect to their YUV representations
+// tolerance set by toler arg
+// returns true: close; false: distant (-gt toler)
+// Plain C version
+static inline bool
+SCALE_(CmpYUV) (const SDL_PixelFormat* fmt, Uint32 pix1, Uint32 pix2, int toler)
+#if 1
+{
+	int dr, dg, db;
+	int delta;
+
+	dr = ((pix1 >> fmt->Rshift) & 0xff) - ((pix2 >> fmt->Rshift) & 0xff) + 255;
+	dg = ((pix1 >> fmt->Gshift) & 0xff) - ((pix2 >> fmt->Gshift) & 0xff) + 255;
+	db = ((pix1 >> fmt->Bshift) & 0xff) - ((pix2 >> fmt->Bshift) & 0xff) + 255;
+	
+	// compute Y delta
+	delta = abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_Y][dr]
+			+ dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_Y][dg]
+			+ dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_Y][db]);
+	if (delta > toler)
+		return false;
+
+	// compute U delta
+	delta += abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_U][dr]
+			+ dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_U][dg]
+			+ dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_U][db]);
+	if (delta > toler)
+		return false;
+	
+	// compute V delta
+	delta += abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_V][dr]
+			+ dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_V][dg]
+			+ dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_V][db]);
+
+	return delta <= toler;
+}
+#else
+{
+	int delta;
+	Uint32 yuv1, yuv2;
+
+	yuv1 = RGB15_to_YUV[
+			(((pix1 >> (fmt->Rshift + 3)) & 0x1f) << 10) |
+			(((pix1 >> (fmt->Gshift + 3)) & 0x1f) <<  5) |
+			(((pix1 >> (fmt->Bshift + 3)) & 0x1f)      )
+			];
+
+	yuv2 = RGB15_to_YUV[
+			(((pix2 >> (fmt->Rshift + 3)) & 0x1f) << 10) |
+			(((pix2 >> (fmt->Gshift + 3)) & 0x1f) <<  5) |
+			(((pix2 >> (fmt->Bshift + 3)) & 0x1f)      )
+			];
+
+	// compute Y delta
+	delta = abs ((yuv1 & 0xff0000) - (yuv2 & 0xff0000)) >> 16;
+	if (delta > toler)
+		return false;
+
+	// compute U delta
+	delta += abs ((yuv1 & 0x00ff00) - (yuv2 & 0x00ff00)) >> 8;
+	if (delta > toler)
+		return false;
+	
+	// compute V delta
+	delta += abs ((yuv1 & 0x0000ff) - (yuv2 & 0x0000ff));
+
+	return delta <= toler;
+}
+#endif
+
+// Check if 2 pixels are different with respect to their
+// YUV representations
+// returns 0: close; ~0: distant
+static inline int
+SCALE_(DiffYUV) (Uint32 yuv1, Uint32 yuv2)
+{
+	// non-branching version -- assumes 2's complement integers
+	// delta math only needs 25 bits and we have 32 available;
+	// only interested in the sign bits after subtraction
+	sint32 delta, ret;
+
+	if (yuv1 == yuv2)
+		return 0;
+
+	// compute Y delta
+	delta = abs ((yuv1 & 0xff0000) - (yuv2 & 0xff0000));
+	ret = (SCALE_DIFFYUV_TY << 16) - delta; // save sign bit
+	
+	// compute U delta
+	delta = abs ((yuv1 & 0x00ff00) - (yuv2 & 0x00ff00));
+	ret |= (SCALE_DIFFYUV_TU << 8) - delta; // save sign bit
+	
+	// compute V delta
+	delta = abs ((yuv1 & 0x0000ff) - (yuv2 & 0x0000ff));
+	ret |= SCALE_DIFFYUV_TV - delta; // save sign bit
+
+	return (ret >> 31);
+}
+
+// blends two pixels with 1:1 ratio
+static inline Uint32
+SCALE_(Blend_11) (Uint32 pix1, Uint32 pix2)
+{
+	/* (pix1 + pix2) >> 1 */
+	return  
+		/*	lower bits can be safely ignored - the error is minimal
+			expression that calcs them is left for posterity
+			(pix1 & pix2 & low_mask) +
+		*/
+			((pix1 & 0xfefefefe) >> 1) + ((pix2 & 0xfefefefe) >> 1);
+}
+
+// blends four pixels with 1:1:1:1 ratio
+static inline Uint32
+SCALE_(Blend_1111) (Uint32 pix1, Uint32 pix2,
+						Uint32 pix3, Uint32 pix4)
+{
+	/* (pix1 + pix2 + pix3 + pix4) >> 2 */
+	return
+		/*	lower bits can be safely ignored - the error is minimal
+			expression that calcs them is left for posterity
+			((((pix1 & low_mask) + (pix2 & low_mask) +
+			   (pix3 & low_mask) + (pix4 & low_mask)
+			  ) >> 2) & low_mask) +
+		*/
+			((pix1 & 0xfcfcfcfc) >> 2) + ((pix2 & 0xfcfcfcfc) >> 2) +
+			((pix3 & 0xfcfcfcfc) >> 2) + ((pix4 & 0xfcfcfcfc) >> 2);
+}
+
+// blends pixels with 3:1 ratio
+static inline Uint32
+Scale_Blend_31 (Uint32 pix1, Uint32 pix2)
+{
+	/* (pix1 * 3 + pix2) / 4 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) +
+			((pix2 & 0xfcfcfcfc) >> 2);
+}
+
+// blends pixels with 2:1:1 ratio
+static inline Uint32
+Scale_Blend_211 (Uint32 pix1, Uint32 pix2, Uint32 pix3)
+{
+	/* (pix1 * 2 + pix2 + pix3) / 4 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfefefefe) >> 1) +
+			((pix2 & 0xfcfcfcfc) >> 2) +
+			((pix3 & 0xfcfcfcfc) >> 2);
+}
+
+// blends pixels with 5:2:1 ratio
+static inline Uint32
+Scale_Blend_521 (Uint32 pix1, Uint32 pix2, Uint32 pix3)
+{
+	/* (pix1 * 5 + pix2 * 2 + pix3) / 8 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xf8f8f8f8) >> 3) +
+			((pix2 & 0xfcfcfcfc) >> 2) +
+			((pix3 & 0xf8f8f8f8) >> 3) +
+			0x02020202 /* half-error */;
+}
+
+// blends pixels with 6:1:1 ratio
+static inline Uint32
+Scale_Blend_611 (Uint32 pix1, Uint32 pix2, Uint32 pix3)
+{
+	/* (pix1 * 6 + pix2 + pix3) / 8 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) +
+			((pix2 & 0xf8f8f8f8) >> 3) +
+			((pix3 & 0xf8f8f8f8) >> 3) +
+			0x02020202 /* half-error */;
+}
+
+// blends pixels with 2:3:3 ratio
+static inline Uint32
+Scale_Blend_233 (Uint32 pix1, Uint32 pix2, Uint32 pix3)
+{
+	/* (pix1 * 2 + pix2 * 3 + pix3 * 3) / 8 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfcfcfcfc) >> 2) +
+			((pix2 & 0xfcfcfcfc) >> 2) + ((pix2 & 0xf8f8f8f8) >> 3) +
+			((pix3 & 0xfcfcfcfc) >> 2) + ((pix3 & 0xf8f8f8f8) >> 3) +
+			0x02020202 /* half-error */;
+}
+
+// blends pixels with 14:1:1 ratio
+static inline Uint32
+Scale_Blend_e11 (Uint32 pix1, Uint32 pix2, Uint32 pix3)
+{
+	/* (pix1 * 14 + pix2 + pix3) >> 4 */
+	/*	lower bits can be safely ignored - the error is minimal */
+	return  ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) +
+				((pix1 & 0xf8f8f8f8) >> 3) +
+			((pix2 & 0xf0f0f0f0) >> 4) +
+			((pix3 & 0xf0f0f0f0) >> 4) +
+			0x03030303 /* half-error */;
+}
+
+// Halfs the pixel's intensity
+static inline Uint32
+SCALE_(HalfPixel) (Uint32 pix)
+{
+	return ((pix & 0xfefefefe) >> 1);
+}
+
+
+// Bilinear weighted blend of four pixels
+// Function produces 4 blended pixels and writes them
+// out to the surface (in 2x2 matrix)
+// Pixels are computed using expanded weight matrix like so:
+//	('sp' - source pixel, 'dp' - destination pixel)
+//	dp[0] = (9*sp[0] + 3*sp[1] + 3*sp[2] + 1*sp[3]) / 16
+//	dp[1] = (3*sp[0] + 9*sp[1] + 1*sp[2] + 3*sp[3]) / 16
+//	dp[2] = (3*sp[0] + 1*sp[1] + 9*sp[2] + 3*sp[3]) / 16
+//	dp[3] = (1*sp[0] + 3*sp[1] + 3*sp[2] + 9*sp[3]) / 16
+static inline void
+SCALE_(Blend_bilinear) (const Uint32* row0, const Uint32* row1,
+					Uint32* dst_p, Uint32 dlen)
+{
+	// We loose some lower bits here and try to compensate for
+	// that by adding half-error values.
+	// In general, the error is minimal (+-7)
+	// The >>4 reduction is achieved gradually
+#	define BL_PACKED_HALF(p) \
+			(((p) & 0xfefefefe) >> 1)
+#	define BL_SUM(p1, p2) \
+			(BL_PACKED_HALF(p1) + BL_PACKED_HALF(p2))
+#	define BL_HALF_ERR  0x01010101
+#	define BL_SUM_WERR(p1, p2) \
+			(BL_PACKED_HALF(p1) + BL_PACKED_HALF(p2) + BL_HALF_ERR)
+
+	Uint32 sum1111, sum1331, sum3113;
+	
+	// cache p[0] + 3*(p[1] + p[2]) + p[3] in sum1331
+	// cache p[1] + 3*(p[0] + p[3]) + p[2] in sum3113
+	sum1331 = BL_SUM (row0[1], row1[0]);
+	sum3113 = BL_SUM (row0[0], row1[1]);
+	
+	// cache p[0] + p[1] + p[2] + p[3] in sum1111
+	sum1111 = BL_SUM_WERR (sum1331, sum3113);
+
+	sum1331 = BL_SUM_WERR (sum1331, sum1111);
+	sum1331 = BL_PACKED_HALF (sum1331);
+	sum3113 = BL_SUM_WERR (sum3113, sum1111);
+	sum3113 = BL_PACKED_HALF (sum3113);
+
+	// pixel 0 math -- (9*p[0] + 3*(p[1] + p[2]) + p[3]) / 16
+	dst_p[0] = BL_PACKED_HALF (row0[0]) + sum1331;
+
+	// pixel 1 math -- (9*p[1] + 3*(p[0] + p[3]) + p[2]) / 16
+	dst_p[1] = BL_PACKED_HALF (row0[1]) + sum3113;
+
+	// pixel 2 math -- (9*p[2] + 3*(p[0] + p[3]) + p[1]) / 16
+	dst_p[dlen] = BL_PACKED_HALF (row1[0]) + sum3113;
+
+	// pixel 3 math -- (9*p[3] + 3*(p[1] + p[2]) + p[0]) / 16
+	dst_p[dlen + 1] = BL_PACKED_HALF (row1[1]) + sum1331;
+
+#	undef BL_PACKED_HALF
+#	undef BL_SUM
+#	undef BL_HALF_ERR
+#	undef BL_SUM_WERR
+}
+
+#endif /* SCALEINT_H_ */