diff options
-rw-r--r-- | graphics/scaler.cpp | 23 | ||||
-rw-r--r-- | graphics/scaler/hq2x_i386.asm | 69 | ||||
-rw-r--r-- | graphics/scaler/hq3x_i386.asm | 60 | ||||
-rw-r--r-- | graphics/scaler/intern.h | 50 |
4 files changed, 128 insertions, 74 deletions
diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp index 08fbcf6d04..f540c60205 100644 --- a/graphics/scaler.cpp +++ b/graphics/scaler.cpp @@ -54,10 +54,15 @@ extern "C" { #if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__) #define RGBtoYUV _RGBtoYUV #define LUT16to32 _LUT16to32 +#define hqx_highbits _hqx_highbits +#define hqx_lowbits _hqx_lowbits #endif #endif +uint32 hqx_highbits = 0xF7DEF7DE; +uint32 hqx_lowbits = 0x0821; + // FIXME/TODO: The following two tables suck up 512 KB. This is bad. // In addition we never free them... // @@ -114,11 +119,25 @@ void InitLUT(Graphics::PixelFormat format) { void InitScalers(uint32 BitFormat) { gBitFormat = BitFormat; + #ifndef DISABLE_HQ_SCALERS - if (gBitFormat == 555) + #undef highBits; + #undef lowBits; + + if (gBitFormat == 555) { InitLUT(Graphics::createPixelFormat<555>()); - if (gBitFormat == 565) +#ifdef USE_NASM + hqx_highbits = Graphics::ColorMasks<555>::highBits; + hqx_lowbits = Graphics::ColorMasks<555>::lowBits & 0xFFFF; +#endif + } + if (gBitFormat == 565) { InitLUT(Graphics::createPixelFormat<565>()); +#ifdef USE_NASM + hqx_highbits = Graphics::ColorMasks<565>::highBits; + hqx_lowbits = Graphics::ColorMasks<565>::lowBits & 0xFFFF; +#endif + } #endif } diff --git a/graphics/scaler/hq2x_i386.asm b/graphics/scaler/hq2x_i386.asm index a4bc50c979..085df82ce5 100644 --- a/graphics/scaler/hq2x_i386.asm +++ b/graphics/scaler/hq2x_i386.asm @@ -22,6 +22,8 @@ GLOBAL _hq2x_16 EXTERN _LUT16to32 EXTERN _RGBtoYUV +EXTERN _hqx_highbits +EXTERN _hqx_lowbits SECTION .bss linesleft resd 1 @@ -47,10 +49,6 @@ const5 dd 0x00050005,0x00000005 const6 dd 0x00060006,0x00000006 const14 dd 0x000E000E,0x0000000E threshold dd 0x00300706,0x00000000 -; FIXME: zerlowbits assumes 565 mode. -; Also, in the code, the constant 0x0821 is used which also assumes 565 mode -highbits dd 0xF7DEF7DE -lowbits dd 0x0821 moduloSrc dd 0 moduloDst dd 0 @@ -135,17 +133,14 @@ SECTION .text %macro Interp1 3 mov edx,%2 mov ecx,%3 - cmp edx,ecx - je %%fin - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 -%%fin: mov %1,dx %endmacro @@ -154,19 +149,15 @@ SECTION .text %macro Interp2 4 mov edx,%3 mov ecx,%4 - cmp edx,ecx - je %%fin1 - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] + add ecx,[_hqx_lowbits] %%fin1: mov edx,%2 - cmp edx,ecx - je %%fin2 - and ecx,[highbits] - and edx,[highbits] + and ecx,[_hqx_highbits] + and edx,[_hqx_highbits] add edx,ecx shr edx,1 %%fin2: @@ -178,10 +169,8 @@ SECTION .text %macro Interp5 3 mov edx,%2 mov ecx,%3 - cmp edx,ecx - je %%fin - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 %%fin: @@ -1729,12 +1718,12 @@ _hq2x_16: shl eax,16 or eax,edx mov ecx,[w2] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov ecx,edx @@ -1746,12 +1735,12 @@ _hq2x_16: ..@cross2: shl eax,16 mov ecx,[w4] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 or eax,edx @@ -1760,12 +1749,12 @@ _hq2x_16: jmp .loopx_end ..@cross4: mov ecx,[w6] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 shl edx,16 @@ -1778,12 +1767,12 @@ _hq2x_16: shl eax,16 or eax,edx mov ecx,[w8] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov ecx,edx diff --git a/graphics/scaler/hq3x_i386.asm b/graphics/scaler/hq3x_i386.asm index 43bfd85540..3212ea93ac 100644 --- a/graphics/scaler/hq3x_i386.asm +++ b/graphics/scaler/hq3x_i386.asm @@ -22,6 +22,8 @@ GLOBAL _hq3x_16 EXTERN _LUT16to32 EXTERN _RGBtoYUV +EXTERN _hqx_highbits +EXTERN _hqx_lowbits SECTION .bss linesleft resd 1 @@ -44,10 +46,6 @@ SECTION .data reg_blank dd 0,0 const7 dd 0x00070007,0x00000007 threshold dd 0x00300706,0x00000000 -; FIXME: zerlowbits assumes 565 mode. -; Also, in the code, the constant 0x0821 is used which also assumes 565 mode -highbits dd 0xF7DEF7DE -lowbits dd 0x0821 moduloSrc dd 0 moduloDst dd 0 @@ -134,12 +132,12 @@ SECTION .text mov ecx,%3 cmp edx,ecx je %%fin - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 %%fin: @@ -153,17 +151,17 @@ SECTION .text mov ecx,%4 cmp edx,ecx je %%fin1 - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] + add ecx,[_hqx_lowbits] %%fin1: mov edx,%2 cmp edx,ecx je %%fin2 - and ecx,[highbits] - and edx,[highbits] + and ecx,[_hqx_highbits] + and edx,[_hqx_highbits] add edx,ecx shr edx,1 %%fin2: @@ -224,8 +222,8 @@ SECTION .text mov ecx,%3 cmp edx,ecx je %%fin - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 %%fin: @@ -2303,12 +2301,12 @@ _hq3x_16: shl eax,16 or eax,edx mov ecx,[w2] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov [edi],dx @@ -2324,12 +2322,12 @@ _hq3x_16: shl eax,16 or eax,edx mov ecx,[w4] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov [edi],dx @@ -2344,12 +2342,12 @@ _hq3x_16: shl eax,16 or eax,edx mov ecx,[w6] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov [edi],eax @@ -2364,12 +2362,12 @@ _hq3x_16: shl eax,16 or eax,edx mov ecx,[w8] - and edx,[highbits] - and ecx,[highbits] + and edx,[_hqx_highbits] + and ecx,[_hqx_highbits] add ecx,edx shr ecx,1 - add ecx,[lowbits] - and ecx,[highbits] + add ecx,[_hqx_lowbits] + and ecx,[_hqx_highbits] add edx,ecx shr edx,1 mov [edi],eax diff --git a/graphics/scaler/intern.h b/graphics/scaler/intern.h index 5955aa1bac..cb78bb5bb5 100644 --- a/graphics/scaler/intern.h +++ b/graphics/scaler/intern.h @@ -76,7 +76,6 @@ static inline uint32 interpolate32_1_1_1_1(uint32 A, uint32 B, uint32 C, uint32 return x + y; } - /** * Interpolate two 16 bit pixels with the weights specified in the template * parameters. Used by the hq scaler family. @@ -100,6 +99,55 @@ static inline uint16 interpolate16_3(uint16 p1, uint16 p2, uint16 p3) { } +template<int bitFormat> +static inline unsigned interpolate16_3_1(unsigned c1, unsigned c2) { + const unsigned lowbits=(((c1<<1)&(lowBits<<1))+(c1&qlowBits)+(c2&qlowBits))&qlowBits; + return ((c1*3+c2) - lowbits) >> 2; +} + +template<int bitFormat> +static inline unsigned interpolate16_2_1_1(unsigned c1, unsigned c2, unsigned c3) { + c1<<=1; + const unsigned lowbits=((c1&(lowBits<<1))+(c2&qlowBits)+(c3&qlowBits))&qlowBits; + return ((c1+c2+c3) - lowbits) >> 2; +} + +template<int bitFormat> +static inline unsigned interpolate16_1_1(unsigned c1, unsigned c2) { + return ( c1+c2 - ((c1^c2)&lowBits) ) >> 1; +} + +template<int bitFormat> +static inline unsigned interpolate16_5_2_1(unsigned c1, unsigned c2, unsigned c3) { + c2<<=1; + const unsigned lowbits=( ((c1<<2)&(lowBits<<2))+(c1&0x1CE7)+(c2&0x18C6)+(c3&0x1CE7) ) & 0x1CE7; + return ((c1*5+c2+c3) - lowbits) >> 3; +} + +template<int bitFormat> +static inline unsigned interpolate16_6_1_1(unsigned c1, unsigned c2, unsigned c3) { + const unsigned lowbits=(((((c1<<1)&(lowBits<<1))+(c1&qlowBits))<<1)+(c2&0x1CE7)+(c3&0x1CE7))&0x1CE7; + return ((c1*6+c2+c3) - lowbits) >> 3; +} + +template<int bitFormat> +static inline unsigned interpolate16_2_3_3(unsigned c1, unsigned c2, unsigned c3) { + c1<<=1; + const unsigned rb=(c1&(redblueMask<<1))+((c2&redblueMask)+(c3&redblueMask))*3; + const unsigned g=(c1&(greenMask<<1))+((c2&greenMask)+(c3&greenMask))*3; + return ((rb&(redblueMask<<3))|(g&(greenMask<<8)))>>3; +} + +template<int bitFormat> +static inline unsigned interpolate16_14_1_1(unsigned c1, unsigned c2, unsigned c3) { + const unsigned rb=(c1&redblueMask)*14+(c2&redblueMask)+(c3&redblueMask); + const unsigned g=(c1&greenMask)*14+(c2&greenMask)+(c3&greenMask); + return ((rb&(redblueMask<<4))|(g&(greenMask<<4)))>>4; +} + + + + /** * Compare two YUV values (encoded 8-8-8) and check if they differ by more than * a certain hard coded threshold. Used by the hq scaler family. |