From 7f6002caba3f0a6749820c2772161caf55b8d267 Mon Sep 17 00:00:00 2001 From: neonloop Date: Fri, 7 May 2021 20:00:12 +0000 Subject: Initial commit (uqm-0.8.0) --- src/libs/graphics/sdl/scaleint.h | 433 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 src/libs/graphics/sdl/scaleint.h (limited to 'src/libs/graphics/sdl/scaleint.h') diff --git a/src/libs/graphics/sdl/scaleint.h b/src/libs/graphics/sdl/scaleint.h new file mode 100644 index 0000000..e54de80 --- /dev/null +++ b/src/libs/graphics/sdl/scaleint.h @@ -0,0 +1,433 @@ +/* + * Copyright (C) 2005 Alex Volkov (codepro@usa.net) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +// Scalers Internals + +#ifndef SCALEINT_H_ +#define SCALEINT_H_ + +#include "libs/graphics/sdl/sdl_common.h" +#include "types.h" + + +// Plain C names +#define SCALE_(name) Scale ## _ ## name + +// These are defaults +#define SCALE_GETPIX(p) ( *(Uint32 *)(p) ) +#define SCALE_SETPIX(p, c) ( *(Uint32 *)(p) = (c) ) + +// Plain C defaults +#define SCALE_CMPRGB(p1, p2) \ + SCALE_(GetRGBDelta) (fmt, p1, p2) + +#define SCALE_TOYUV(p) \ + SCALE_(RGBtoYUV) (fmt, p) + +#define SCALE_CMPYUV(p1, p2, toler) \ + SCALE_(CmpYUV) (fmt, p1, p2, toler) + +#define SCALE_DIFFYUV(p1, p2) \ + SCALE_(DiffYUV) (p1, p2) +#define SCALE_DIFFYUV_TY 0x40 +#define SCALE_DIFFYUV_TU 0x12 +#define SCALE_DIFFYUV_TV 0x0c + +#define SCALE_GETY(p) \ + SCALE_(GetPixY) (fmt, p) + +#define SCALE_BILINEAR_BLEND4(r0, r1, dst, dlen) \ + SCALE_(Blend_bilinear) (r0, r1, dst, dlen) + +#define NO_PREFETCH 0 +#define INTEL_PREFETCH 1 +#define AMD_PREFETCH 2 + +typedef enum +{ + YUV_XFORM_R = 0, + YUV_XFORM_G = 1, + YUV_XFORM_B = 2, + YUV_XFORM_Y = 0, + YUV_XFORM_U = 1, + YUV_XFORM_V = 2 +} RGB_YUV_INDEX; + +extern const int YUV_matrix[3][3]; + +// pre-computed transformations for 8 bits per channel +extern int RGB_to_YUV[/*RGB*/ 3][/*YUV*/ 3][ /*mult-res*/ 256]; +extern sint16 dRGB_to_dYUV[/*RGB*/ 3][/*YUV*/ 3][ /*mult-res*/ 512]; + +typedef Uint32 YUV_VECTOR; +// pre-computed transformations for RGB555 +extern YUV_VECTOR RGB15_to_YUV[0x8000]; + + +// Platform+Scaler function lookups +// +typedef struct +{ + int flag; + TFB_ScaleFunc func; +} Scale_FuncDef_t; + + +// expands the given rectangle in all directions by 'expansion' +// guarded by 'limits' +extern void Scale_ExpandRect (SDL_Rect* rect, int expansion, + const SDL_Rect* limits); + + +// Standard plain C versions of support functions + +// Initialize various platform-specific features +static inline void +SCALE_(PlatInit) (void) +{ +} + +// Finish with various platform-specific features +static inline void +SCALE_(PlatDone) (void) +{ +} + +#if 0 +static inline void +SCALE_(Prefetch) (const void* p) +{ + /* no-op in pure C */ + (void)p; +} +#else +# define Scale_Prefetch(p) +#endif + +// compute the RGB distance squared between 2 pixels +// Plain C version +static inline int +SCALE_(GetRGBDelta) (const SDL_PixelFormat* fmt, Uint32 pix1, Uint32 pix2) +{ + int c; + int delta; + + c = ((pix1 >> fmt->Rshift) & 0xff) - ((pix2 >> fmt->Rshift) & 0xff); + delta = c * c; + + c = ((pix1 >> fmt->Gshift) & 0xff) - ((pix2 >> fmt->Gshift) & 0xff); + delta += c * c; + + c = ((pix1 >> fmt->Bshift) & 0xff) - ((pix2 >> fmt->Bshift) & 0xff); + delta += c * c; + + return delta; +} + +// retrieve the Y (intensity) component of pixel's YUV +// Plain C version +static inline int +SCALE_(GetPixY) (const SDL_PixelFormat* fmt, Uint32 pix) +{ + Uint32 r, g, b; + + r = (pix >> fmt->Rshift) & 0xff; + g = (pix >> fmt->Gshift) & 0xff; + b = (pix >> fmt->Bshift) & 0xff; + + return RGB_to_YUV [YUV_XFORM_R][YUV_XFORM_Y][r] + + RGB_to_YUV [YUV_XFORM_G][YUV_XFORM_Y][g] + + RGB_to_YUV [YUV_XFORM_B][YUV_XFORM_Y][b]; +} + +static inline YUV_VECTOR +SCALE_(RGBtoYUV) (const SDL_PixelFormat* fmt, Uint32 pix) +{ + return RGB15_to_YUV[ + (((pix >> (fmt->Rshift + 3)) & 0x1f) << 10) | + (((pix >> (fmt->Gshift + 3)) & 0x1f) << 5) | + (((pix >> (fmt->Bshift + 3)) & 0x1f) ) + ]; +} + +// compare 2 pixels with respect to their YUV representations +// tolerance set by toler arg +// returns true: close; false: distant (-gt toler) +// Plain C version +static inline bool +SCALE_(CmpYUV) (const SDL_PixelFormat* fmt, Uint32 pix1, Uint32 pix2, int toler) +#if 1 +{ + int dr, dg, db; + int delta; + + dr = ((pix1 >> fmt->Rshift) & 0xff) - ((pix2 >> fmt->Rshift) & 0xff) + 255; + dg = ((pix1 >> fmt->Gshift) & 0xff) - ((pix2 >> fmt->Gshift) & 0xff) + 255; + db = ((pix1 >> fmt->Bshift) & 0xff) - ((pix2 >> fmt->Bshift) & 0xff) + 255; + + // compute Y delta + delta = abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_Y][dr] + + dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_Y][dg] + + dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_Y][db]); + if (delta > toler) + return false; + + // compute U delta + delta += abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_U][dr] + + dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_U][dg] + + dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_U][db]); + if (delta > toler) + return false; + + // compute V delta + delta += abs (dRGB_to_dYUV [YUV_XFORM_R][YUV_XFORM_V][dr] + + dRGB_to_dYUV [YUV_XFORM_G][YUV_XFORM_V][dg] + + dRGB_to_dYUV [YUV_XFORM_B][YUV_XFORM_V][db]); + + return delta <= toler; +} +#else +{ + int delta; + Uint32 yuv1, yuv2; + + yuv1 = RGB15_to_YUV[ + (((pix1 >> (fmt->Rshift + 3)) & 0x1f) << 10) | + (((pix1 >> (fmt->Gshift + 3)) & 0x1f) << 5) | + (((pix1 >> (fmt->Bshift + 3)) & 0x1f) ) + ]; + + yuv2 = RGB15_to_YUV[ + (((pix2 >> (fmt->Rshift + 3)) & 0x1f) << 10) | + (((pix2 >> (fmt->Gshift + 3)) & 0x1f) << 5) | + (((pix2 >> (fmt->Bshift + 3)) & 0x1f) ) + ]; + + // compute Y delta + delta = abs ((yuv1 & 0xff0000) - (yuv2 & 0xff0000)) >> 16; + if (delta > toler) + return false; + + // compute U delta + delta += abs ((yuv1 & 0x00ff00) - (yuv2 & 0x00ff00)) >> 8; + if (delta > toler) + return false; + + // compute V delta + delta += abs ((yuv1 & 0x0000ff) - (yuv2 & 0x0000ff)); + + return delta <= toler; +} +#endif + +// Check if 2 pixels are different with respect to their +// YUV representations +// returns 0: close; ~0: distant +static inline int +SCALE_(DiffYUV) (Uint32 yuv1, Uint32 yuv2) +{ + // non-branching version -- assumes 2's complement integers + // delta math only needs 25 bits and we have 32 available; + // only interested in the sign bits after subtraction + sint32 delta, ret; + + if (yuv1 == yuv2) + return 0; + + // compute Y delta + delta = abs ((yuv1 & 0xff0000) - (yuv2 & 0xff0000)); + ret = (SCALE_DIFFYUV_TY << 16) - delta; // save sign bit + + // compute U delta + delta = abs ((yuv1 & 0x00ff00) - (yuv2 & 0x00ff00)); + ret |= (SCALE_DIFFYUV_TU << 8) - delta; // save sign bit + + // compute V delta + delta = abs ((yuv1 & 0x0000ff) - (yuv2 & 0x0000ff)); + ret |= SCALE_DIFFYUV_TV - delta; // save sign bit + + return (ret >> 31); +} + +// blends two pixels with 1:1 ratio +static inline Uint32 +SCALE_(Blend_11) (Uint32 pix1, Uint32 pix2) +{ + /* (pix1 + pix2) >> 1 */ + return + /* lower bits can be safely ignored - the error is minimal + expression that calcs them is left for posterity + (pix1 & pix2 & low_mask) + + */ + ((pix1 & 0xfefefefe) >> 1) + ((pix2 & 0xfefefefe) >> 1); +} + +// blends four pixels with 1:1:1:1 ratio +static inline Uint32 +SCALE_(Blend_1111) (Uint32 pix1, Uint32 pix2, + Uint32 pix3, Uint32 pix4) +{ + /* (pix1 + pix2 + pix3 + pix4) >> 2 */ + return + /* lower bits can be safely ignored - the error is minimal + expression that calcs them is left for posterity + ((((pix1 & low_mask) + (pix2 & low_mask) + + (pix3 & low_mask) + (pix4 & low_mask) + ) >> 2) & low_mask) + + */ + ((pix1 & 0xfcfcfcfc) >> 2) + ((pix2 & 0xfcfcfcfc) >> 2) + + ((pix3 & 0xfcfcfcfc) >> 2) + ((pix4 & 0xfcfcfcfc) >> 2); +} + +// blends pixels with 3:1 ratio +static inline Uint32 +Scale_Blend_31 (Uint32 pix1, Uint32 pix2) +{ + /* (pix1 * 3 + pix2) / 4 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) + + ((pix2 & 0xfcfcfcfc) >> 2); +} + +// blends pixels with 2:1:1 ratio +static inline Uint32 +Scale_Blend_211 (Uint32 pix1, Uint32 pix2, Uint32 pix3) +{ + /* (pix1 * 2 + pix2 + pix3) / 4 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfefefefe) >> 1) + + ((pix2 & 0xfcfcfcfc) >> 2) + + ((pix3 & 0xfcfcfcfc) >> 2); +} + +// blends pixels with 5:2:1 ratio +static inline Uint32 +Scale_Blend_521 (Uint32 pix1, Uint32 pix2, Uint32 pix3) +{ + /* (pix1 * 5 + pix2 * 2 + pix3) / 8 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xf8f8f8f8) >> 3) + + ((pix2 & 0xfcfcfcfc) >> 2) + + ((pix3 & 0xf8f8f8f8) >> 3) + + 0x02020202 /* half-error */; +} + +// blends pixels with 6:1:1 ratio +static inline Uint32 +Scale_Blend_611 (Uint32 pix1, Uint32 pix2, Uint32 pix3) +{ + /* (pix1 * 6 + pix2 + pix3) / 8 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) + + ((pix2 & 0xf8f8f8f8) >> 3) + + ((pix3 & 0xf8f8f8f8) >> 3) + + 0x02020202 /* half-error */; +} + +// blends pixels with 2:3:3 ratio +static inline Uint32 +Scale_Blend_233 (Uint32 pix1, Uint32 pix2, Uint32 pix3) +{ + /* (pix1 * 2 + pix2 * 3 + pix3 * 3) / 8 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfcfcfcfc) >> 2) + + ((pix2 & 0xfcfcfcfc) >> 2) + ((pix2 & 0xf8f8f8f8) >> 3) + + ((pix3 & 0xfcfcfcfc) >> 2) + ((pix3 & 0xf8f8f8f8) >> 3) + + 0x02020202 /* half-error */; +} + +// blends pixels with 14:1:1 ratio +static inline Uint32 +Scale_Blend_e11 (Uint32 pix1, Uint32 pix2, Uint32 pix3) +{ + /* (pix1 * 14 + pix2 + pix3) >> 4 */ + /* lower bits can be safely ignored - the error is minimal */ + return ((pix1 & 0xfefefefe) >> 1) + ((pix1 & 0xfcfcfcfc) >> 2) + + ((pix1 & 0xf8f8f8f8) >> 3) + + ((pix2 & 0xf0f0f0f0) >> 4) + + ((pix3 & 0xf0f0f0f0) >> 4) + + 0x03030303 /* half-error */; +} + +// Halfs the pixel's intensity +static inline Uint32 +SCALE_(HalfPixel) (Uint32 pix) +{ + return ((pix & 0xfefefefe) >> 1); +} + + +// Bilinear weighted blend of four pixels +// Function produces 4 blended pixels and writes them +// out to the surface (in 2x2 matrix) +// Pixels are computed using expanded weight matrix like so: +// ('sp' - source pixel, 'dp' - destination pixel) +// dp[0] = (9*sp[0] + 3*sp[1] + 3*sp[2] + 1*sp[3]) / 16 +// dp[1] = (3*sp[0] + 9*sp[1] + 1*sp[2] + 3*sp[3]) / 16 +// dp[2] = (3*sp[0] + 1*sp[1] + 9*sp[2] + 3*sp[3]) / 16 +// dp[3] = (1*sp[0] + 3*sp[1] + 3*sp[2] + 9*sp[3]) / 16 +static inline void +SCALE_(Blend_bilinear) (const Uint32* row0, const Uint32* row1, + Uint32* dst_p, Uint32 dlen) +{ + // We loose some lower bits here and try to compensate for + // that by adding half-error values. + // In general, the error is minimal (+-7) + // The >>4 reduction is achieved gradually +# define BL_PACKED_HALF(p) \ + (((p) & 0xfefefefe) >> 1) +# define BL_SUM(p1, p2) \ + (BL_PACKED_HALF(p1) + BL_PACKED_HALF(p2)) +# define BL_HALF_ERR 0x01010101 +# define BL_SUM_WERR(p1, p2) \ + (BL_PACKED_HALF(p1) + BL_PACKED_HALF(p2) + BL_HALF_ERR) + + Uint32 sum1111, sum1331, sum3113; + + // cache p[0] + 3*(p[1] + p[2]) + p[3] in sum1331 + // cache p[1] + 3*(p[0] + p[3]) + p[2] in sum3113 + sum1331 = BL_SUM (row0[1], row1[0]); + sum3113 = BL_SUM (row0[0], row1[1]); + + // cache p[0] + p[1] + p[2] + p[3] in sum1111 + sum1111 = BL_SUM_WERR (sum1331, sum3113); + + sum1331 = BL_SUM_WERR (sum1331, sum1111); + sum1331 = BL_PACKED_HALF (sum1331); + sum3113 = BL_SUM_WERR (sum3113, sum1111); + sum3113 = BL_PACKED_HALF (sum3113); + + // pixel 0 math -- (9*p[0] + 3*(p[1] + p[2]) + p[3]) / 16 + dst_p[0] = BL_PACKED_HALF (row0[0]) + sum1331; + + // pixel 1 math -- (9*p[1] + 3*(p[0] + p[3]) + p[2]) / 16 + dst_p[1] = BL_PACKED_HALF (row0[1]) + sum3113; + + // pixel 2 math -- (9*p[2] + 3*(p[0] + p[3]) + p[1]) / 16 + dst_p[dlen] = BL_PACKED_HALF (row1[0]) + sum3113; + + // pixel 3 math -- (9*p[3] + 3*(p[1] + p[2]) + p[0]) / 16 + dst_p[dlen + 1] = BL_PACKED_HALF (row1[1]) + sum1331; + +# undef BL_PACKED_HALF +# undef BL_SUM +# undef BL_HALF_ERR +# undef BL_SUM_WERR +} + +#endif /* SCALEINT_H_ */ -- cgit v1.2.3