From 74e3448a4cdb49154e7fe794bff90226ef4b0828 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Sun, 21 Sep 2003 12:10:32 +0000 Subject: added HQ3x filter svn-id: r10346 --- backends/gp32/gp32.cpp | 6 +- backends/sdl/sdl.cpp | 8 +- backends/sdl/sdl_gl.cpp | 8 +- base/gameDetector.cpp | 1 + common/scaler.cpp | 3831 ++++++++++++++++++++++++++++++++++++++++++++++- common/scaler.h | 4 +- 6 files changed, 3839 insertions(+), 19 deletions(-) diff --git a/backends/gp32/gp32.cpp b/backends/gp32/gp32.cpp index d81fbc2b8e..e78f185aa5 100644 --- a/backends/gp32/gp32.cpp +++ b/backends/gp32/gp32.cpp @@ -434,11 +434,11 @@ normal_mode:; /* // Distinguish 555 and 565 mode if (sdl_hwscreen->format->Rmask == 0x7C00) - Init_2xSaI(555); + InitScalers(555); else - Init_2xSaI(565); + InitScalers(565); */ - //Init_2xSaI(555); // ph0x fixme? + //InitScalers(555); // ph0x fixme? //ph0x fixme - tmpscreen needed? // Need some extra bytes around when using 2xSaI diff --git a/backends/sdl/sdl.cpp b/backends/sdl/sdl.cpp index daf4a58b0d..25e5aec488 100644 --- a/backends/sdl/sdl.cpp +++ b/backends/sdl/sdl.cpp @@ -90,6 +90,10 @@ void OSystem_SDL::load_gfx_mode() { _scaleFactor = 2; _scaler_proc = DotMatrix; break; + case GFX_HQ3X: + _scaleFactor = 3; + _scaler_proc = HQ3x; + break; case GFX_BILINEAR: case GFX_DOUBLESIZE: @@ -135,9 +139,9 @@ void OSystem_SDL::load_gfx_mode() { // Distinguish 555 and 565 mode if (_hwscreen->format->Rmask == 0x7C00) - Init_2xSaI(555); + InitScalers(555); else - Init_2xSaI(565); + InitScalers(565); // Need some extra bytes around when using 2xSaI uint16 *tmp_screen = (uint16 *)calloc(_tmpScreenWidth * (_screenHeight + 3), sizeof(uint16)); diff --git a/backends/sdl/sdl_gl.cpp b/backends/sdl/sdl_gl.cpp index 33dbd76dd2..6c1eaed351 100644 --- a/backends/sdl/sdl_gl.cpp +++ b/backends/sdl/sdl_gl.cpp @@ -147,6 +147,10 @@ void OSystem_SDL_OpenGL::load_gfx_mode() { _scaleFactor = 2; _scaler_proc = DotMatrix; break; + case GFX_HQ3X: + _scaleFactor = 3; + _scaler_proc = HQ3x; + break; default: error("unknown gfx mode %d", _mode); @@ -200,9 +204,9 @@ void OSystem_SDL_OpenGL::load_gfx_mode() { // Distinguish 555 and 565 mode if (_hwscreen->format->Rmask == 0x7C00) - Init_2xSaI(555); + InitScalers(555); else - Init_2xSaI(565); + InitScalers(565); } // diff --git a/base/gameDetector.cpp b/base/gameDetector.cpp index 1b88b0a37d..0a7e392a60 100644 --- a/base/gameDetector.cpp +++ b/base/gameDetector.cpp @@ -115,6 +115,7 @@ static const struct GraphicsMode gfx_modes[] = { {"advmame3x", "AdvMAME3x", GFX_ADVMAME3X}, {"tv2x", "TV2x", GFX_TV2X}, {"dotmatrix", "DotMatrix", GFX_DOTMATRIX}, + {"hq3x", "HQ3x", GFX_HQ3X}, {"opengl", "OpenGL", GFX_BILINEAR}, #else {"flipping", "Page Flipping", GFX_FLIPPING}, diff --git a/common/scaler.cpp b/common/scaler.cpp index 3198e5bfac..34b35ca614 100644 --- a/common/scaler.cpp +++ b/common/scaler.cpp @@ -23,6 +23,8 @@ #include "stdafx.h" #include "common/scummsys.h" #include "common/scaler.h" +#include "common/util.h" + // TODO: get rid of the colorMask etc. variables and instead use templates. // This should give a respectable boost, since variable access (i.e. memory reads) @@ -55,7 +57,9 @@ static const uint16 dotmatrix_555[16] = { }; static const uint16 *dotmatrix; -int Init_2xSaI(uint32 BitFormat) { +static void InitLUTs(uint32 BitFormat); + +void InitScalers(uint32 BitFormat) { if (BitFormat == 565) { colorMask = 0xF7DEF7DE; lowPixelMask = 0x08210821; @@ -77,10 +81,10 @@ int Init_2xSaI(uint32 BitFormat) { blueMask = 0x001F; dotmatrix = dotmatrix_555; } else { - return 0; + error("Unknwon bit format %d\n", BitFormat); } - return 1; + InitLUTs(BitFormat); } static inline int GetResult(uint32 A, uint32 B, uint32 C, uint32 D) { @@ -475,10 +479,10 @@ void _2xSaI(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - unsigned int nextlineSrc = srcPitch / sizeof(uint16); + const unsigned int nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; - unsigned int nextlineDst = dstPitch / sizeof(uint16); + const unsigned int nextlineDst = dstPitch / sizeof(uint16); uint16 *q = (uint16 *)dstPtr; uint16 A, B, C; @@ -511,10 +515,10 @@ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - unsigned int nextlineSrc = srcPitch / sizeof(uint16); + const unsigned int nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; - unsigned int nextlineDst = dstPitch / sizeof(uint16); + const unsigned int nextlineDst = dstPitch / sizeof(uint16); uint16 *q = (uint16 *)dstPtr; uint16 A, B, C; @@ -606,10 +610,10 @@ void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit void TV2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - unsigned int nextlineSrc = srcPitch / sizeof(uint16); + const unsigned int nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; - unsigned int nextlineDst = dstPitch / sizeof(uint16); + const unsigned int nextlineDst = dstPitch / sizeof(uint16); uint16 *q = (uint16 *)dstPtr; while(height--) { @@ -637,10 +641,10 @@ static inline uint16 DOT_16(uint16 c, int j, int i) { void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - unsigned int nextlineSrc = srcPitch / sizeof(uint16); + const unsigned int nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; - unsigned int nextlineDst = dstPitch / sizeof(uint16); + const unsigned int nextlineDst = dstPitch / sizeof(uint16); uint16 *q = (uint16 *)dstPtr; for (int j = 0, jj = 0; j < height; ++j, jj += 2) { @@ -656,6 +660,3811 @@ void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi } } +// +// The HQ3x filter (C++ version) by Maxim Stepin (see http://www.hiend3d.com/hq3x.html). +// Adapted to 16 bit output and optimized by Max Horn +// +static int RGBtoYUV[65536]; + +#define INTERPOLATE_3_1(x, y) Q_INTERPOLATE(x, x, x, y) + +#define INTERPOLATE_2_1_1(x, y, z) Q_INTERPOLATE(x, x, y, z) + +static inline uint16 INTERPOLATE_7_1(uint16 w1, uint16 w2) { + return ((((w1 & redblueMask) * 7 + (w2 & redblueMask)) >> 3) & redblueMask) | + ((((w1 & greenMask) * 7 + (w2 & greenMask)) >> 3) & greenMask); +} + +static inline uint16 INTERPOLATE_2_7_7(uint16 w1, uint16 w2, uint16 w3) { + return ((((w1 & redblueMask) * 2 + ((w2 & redblueMask) + (w3 & redblueMask)) * 7) >> 4) & redblueMask) | + ((((w1 & greenMask) * 2 + ((w2 & greenMask) + (w3 & greenMask)) * 7) >> 4) & greenMask); +} + +#define INTERPOLATE_1_1(x, y) INTERPOLATE(x, y) + +#define PIXEL00_1M *(q) = INTERPOLATE_3_1(w[5], w[1]); +#define PIXEL00_1U *(q) = INTERPOLATE_3_1(w[5], w[2]); +#define PIXEL00_1L *(q) = INTERPOLATE_3_1(w[5], w[4]); +#define PIXEL00_2 *(q) = INTERPOLATE_2_1_1(w[5], w[4], w[2]); +#define PIXEL00_4 *(q) = INTERPOLATE_2_7_7(w[5], w[4], w[2]); +#define PIXEL00_5 *(q) = INTERPOLATE_1_1(w[4], w[2]); +#define PIXEL00_C *(q) = w[5]; + +#define PIXEL01_1 *(q+1) = INTERPOLATE_3_1(w[5], w[2]); +#define PIXEL01_3 *(q+1) = INTERPOLATE_7_1(w[5], w[2]); +#define PIXEL01_6 *(q+1) = INTERPOLATE_3_1(w[2], w[5]); +#define PIXEL01_C *(q+1) = w[5]; + +#define PIXEL02_1M *(q+2) = INTERPOLATE_3_1(w[5], w[3]); +#define PIXEL02_1U *(q+2) = INTERPOLATE_3_1(w[5], w[2]); +#define PIXEL02_1R *(q+2) = INTERPOLATE_3_1(w[5], w[6]); +#define PIXEL02_2 *(q+2) = INTERPOLATE_2_1_1(w[5], w[2], w[6]); +#define PIXEL02_4 *(q+2) = INTERPOLATE_2_7_7(w[5], w[2], w[6]); +#define PIXEL02_5 *(q+2) = INTERPOLATE_1_1(w[2], w[6]); +#define PIXEL02_C *(q+2) = w[5]; + +#define PIXEL10_1 *(q+nextlineDst) = INTERPOLATE_3_1(w[5], w[4]); +#define PIXEL10_3 *(q+nextlineDst) = INTERPOLATE_7_1(w[5], w[4]); +#define PIXEL10_6 *(q+nextlineDst) = INTERPOLATE_3_1(w[4], w[5]); +#define PIXEL10_C *(q+nextlineDst) = w[5]; + +#define PIXEL11 *(q+1+nextlineDst) = w[5]; + +#define PIXEL12_1 *(q+2+nextlineDst) = INTERPOLATE_3_1(w[5], w[6]); +#define PIXEL12_3 *(q+2+nextlineDst) = INTERPOLATE_7_1(w[5], w[6]); +#define PIXEL12_6 *(q+2+nextlineDst) = INTERPOLATE_3_1(w[6], w[5]); +#define PIXEL12_C *(q+2+nextlineDst) = w[5]; + +#define PIXEL20_1M *(q+nextlineDst2) = INTERPOLATE_3_1(w[5], w[7]); +#define PIXEL20_1D *(q+nextlineDst2) = INTERPOLATE_3_1(w[5], w[8]); +#define PIXEL20_1L *(q+nextlineDst2) = INTERPOLATE_3_1(w[5], w[4]); +#define PIXEL20_2 *(q+nextlineDst2) = INTERPOLATE_2_1_1(w[5], w[8], w[4]); +#define PIXEL20_4 *(q+nextlineDst2) = INTERPOLATE_2_7_7(w[5], w[8], w[4]); +#define PIXEL20_5 *(q+nextlineDst2) = INTERPOLATE_1_1(w[8], w[4]); +#define PIXEL20_C *(q+nextlineDst2) = w[5]; + +#define PIXEL21_1 *(q+1+nextlineDst2) = INTERPOLATE_3_1(w[5], w[8]); +#define PIXEL21_3 *(q+1+nextlineDst2) = INTERPOLATE_7_1(w[5], w[8]); +#define PIXEL21_6 *(q+1+nextlineDst2) = INTERPOLATE_3_1(w[8], w[5]); +#define PIXEL21_C *(q+1+nextlineDst2) = w[5]; + +#define PIXEL22_1M *(q+2+nextlineDst2) = INTERPOLATE_3_1(w[5], w[9]); +#define PIXEL22_1D *(q+2+nextlineDst2) = INTERPOLATE_3_1(w[5], w[8]); +#define PIXEL22_1R *(q+2+nextlineDst2) = INTERPOLATE_3_1(w[5], w[6]); +#define PIXEL22_2 *(q+2+nextlineDst2) = INTERPOLATE_2_1_1(w[5], w[6], w[8]); +#define PIXEL22_4 *(q+2+nextlineDst2) = INTERPOLATE_2_7_7(w[5], w[6], w[8]); +#define PIXEL22_5 *(q+2+nextlineDst2) = INTERPOLATE_1_1(w[6], w[8]); +#define PIXEL22_C *(q+2+nextlineDst2) = w[5]; + +static inline bool diffYUV(unsigned int yuv1, unsigned int yuv2) +{ + static const int Ymask = 0x00FF0000; + static const int Umask = 0x0000FF00; + static const int Vmask = 0x000000FF; + static const int trY = 0x00300000; + static const int trU = 0x00000700; + static const int trV = 0x00000006; + + return + ( ( abs((yuv1 & Ymask) - (yuv2 & Ymask)) > trY ) || + ( abs((yuv1 & Umask) - (yuv2 & Umask)) > trU ) || + ( abs((yuv1 & Vmask) - (yuv2 & Vmask)) > trV ) ); +} + +void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + int i, k; + int w[10]; + int yuv[10]; + + const unsigned int nextlineSrc = srcPitch / sizeof(uint16); + const uint16 *p = (const uint16 *)srcPtr; + + const unsigned int nextlineDst = dstPitch / sizeof(uint16); + const unsigned int nextlineDst2 = 2 * nextlineDst; + uint16 *q = (uint16 *)dstPtr; + + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + + while (height--) { + w[2] = *(p - 1 - nextlineSrc); + w[5] = *(p - 1); + w[8] = *(p - 1 + nextlineSrc); + + w[3] = *(p - nextlineSrc); + w[6] = *(p); + w[9] = *(p + nextlineSrc); + + for (i=0; i> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((-r + 2*g -b)>>3); + RGBtoYUV[ (i << 11) + (j << 5) + k ] = (Y<<16) + (u<<8) + v; + } + } + } +} + + +#pragma mark - + + #define kVeryFastAndUglyAspectMode 0 // No interpolation at all, but super-fast #define kFastAndNiceAspectMode 1 // Quite good quality with good speed #define kSlowAndPerfectAspectMode 2 // Accurate but slow code diff --git a/common/scaler.h b/common/scaler.h index 50cf196582..73ab6662ed 100644 --- a/common/scaler.h +++ b/common/scaler.h @@ -21,7 +21,7 @@ #ifndef COMMON_SCALER_H #define COMMON_SCALER_H -extern int Init_2xSaI (uint32 BitFormat); +extern void InitScalers(uint32 BitFormat); typedef void ScalerProc(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height); @@ -40,6 +40,7 @@ DECLARE_SCALER(Normal2x); DECLARE_SCALER(Normal3x); DECLARE_SCALER(TV2x); DECLARE_SCALER(DotMatrix); +DECLARE_SCALER(HQ3x); FORCEINLINE int real2Aspect(int y) { return y + (y + 1) / 5; @@ -64,6 +65,7 @@ enum { GFX_ADVMAME3X = 7, GFX_TV2X = 8, GFX_DOTMATRIX = 9, + GFX_HQ3X = 10, GFX_BILINEAR = 12, // OpenGL backend -- cgit v1.2.3