diff options
| -rw-r--r-- | graphics/scaler.cpp | 23 | ||||
| -rw-r--r-- | graphics/scaler/hq2x_i386.asm | 69 | ||||
| -rw-r--r-- | graphics/scaler/hq3x_i386.asm | 60 | ||||
| -rw-r--r-- | graphics/scaler/intern.h | 50 | 
4 files changed, 128 insertions, 74 deletions
diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp index 08fbcf6d04..f540c60205 100644 --- a/graphics/scaler.cpp +++ b/graphics/scaler.cpp @@ -54,10 +54,15 @@ extern "C" {  #if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)  #define RGBtoYUV _RGBtoYUV  #define LUT16to32 _LUT16to32 +#define hqx_highbits _hqx_highbits +#define hqx_lowbits _hqx_lowbits  #endif  #endif +uint32 hqx_highbits = 0xF7DEF7DE; +uint32 hqx_lowbits = 0x0821; +  // FIXME/TODO: The following two tables suck up 512 KB. This is bad.  // In addition we never free them...  // @@ -114,11 +119,25 @@ void InitLUT(Graphics::PixelFormat format) {  void InitScalers(uint32 BitFormat) {  	gBitFormat = BitFormat; +  #ifndef DISABLE_HQ_SCALERS -	if (gBitFormat == 555) +	#undef highBits; +	#undef lowBits; + +	if (gBitFormat == 555) {  		InitLUT(Graphics::createPixelFormat<555>()); -	if (gBitFormat == 565) +#ifdef USE_NASM +		hqx_highbits = Graphics::ColorMasks<555>::highBits; +		hqx_lowbits = Graphics::ColorMasks<555>::lowBits & 0xFFFF; +#endif +	} +	if (gBitFormat == 565) {  		InitLUT(Graphics::createPixelFormat<565>()); +#ifdef USE_NASM +		hqx_highbits = Graphics::ColorMasks<565>::highBits; +		hqx_lowbits = Graphics::ColorMasks<565>::lowBits & 0xFFFF; +#endif +	}  #endif  } diff --git a/graphics/scaler/hq2x_i386.asm b/graphics/scaler/hq2x_i386.asm index a4bc50c979..085df82ce5 100644 --- a/graphics/scaler/hq2x_i386.asm +++ b/graphics/scaler/hq2x_i386.asm @@ -22,6 +22,8 @@ GLOBAL _hq2x_16  EXTERN _LUT16to32  EXTERN _RGBtoYUV +EXTERN _hqx_highbits +EXTERN _hqx_lowbits  SECTION .bss  linesleft resd 1 @@ -47,10 +49,6 @@ const5       dd  0x00050005,0x00000005  const6       dd  0x00060006,0x00000006  const14      dd  0x000E000E,0x0000000E  threshold    dd  0x00300706,0x00000000 -; FIXME: zerlowbits assumes 565 mode. -; Also, in the code, the constant 0x0821 is used which also assumes 565 mode -highbits     dd  0xF7DEF7DE -lowbits      dd  0x0821  moduloSrc    dd  0  moduloDst    dd  0 @@ -135,17 +133,14 @@ SECTION .text  %macro Interp1 3      mov edx,%2      mov ecx,%3 -    cmp edx,ecx -    je  %%fin -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1 -%%fin:      mov %1,dx  %endmacro @@ -154,19 +149,15 @@ SECTION .text  %macro Interp2 4      mov edx,%3      mov ecx,%4 -    cmp edx,ecx -    je  %%fin1 -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] +    add ecx,[_hqx_lowbits]  %%fin1:      mov edx,%2 -    cmp edx,ecx -    je  %%fin2 -    and ecx,[highbits] -    and edx,[highbits] +    and ecx,[_hqx_highbits] +    and edx,[_hqx_highbits]      add edx,ecx      shr edx,1  %%fin2: @@ -178,10 +169,8 @@ SECTION .text  %macro Interp5 3      mov edx,%2      mov ecx,%3 -    cmp edx,ecx -    je  %%fin -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1  %%fin: @@ -1729,12 +1718,12 @@ _hq2x_16:      shl     eax,16      or      eax,edx      mov     ecx,[w2] -    and     edx,[highbits] -    and     ecx,[highbits] +    and     edx,[_hqx_highbits] +    and     ecx,[_hqx_highbits]      add     ecx,edx      shr     ecx,1 -    add     ecx,[lowbits] -    and     ecx,[highbits] +    add     ecx,[_hqx_lowbits] +    and     ecx,[_hqx_highbits]      add     edx,ecx      shr     edx,1      mov     ecx,edx @@ -1746,12 +1735,12 @@ _hq2x_16:  ..@cross2:      shl     eax,16      mov     ecx,[w4] -    and     edx,[highbits] -    and     ecx,[highbits] +    and     edx,[_hqx_highbits] +    and     ecx,[_hqx_highbits]      add     ecx,edx      shr     ecx,1 -    add     ecx,[lowbits] -    and     ecx,[highbits] +    add     ecx,[_hqx_lowbits] +    and     ecx,[_hqx_highbits]      add     edx,ecx      shr     edx,1      or      eax,edx @@ -1760,12 +1749,12 @@ _hq2x_16:      jmp     .loopx_end  ..@cross4:      mov     ecx,[w6] -    and     edx,[highbits] -    and     ecx,[highbits] +    and     edx,[_hqx_highbits] +    and     ecx,[_hqx_highbits]      add     ecx,edx      shr     ecx,1 -    add     ecx,[lowbits] -    and     ecx,[highbits] +    add     ecx,[_hqx_lowbits] +    and     ecx,[_hqx_highbits]      add     edx,ecx      shr     edx,1      shl     edx,16 @@ -1778,12 +1767,12 @@ _hq2x_16:      shl     eax,16      or      eax,edx      mov     ecx,[w8] -    and     edx,[highbits] -    and     ecx,[highbits] +    and     edx,[_hqx_highbits] +    and     ecx,[_hqx_highbits]      add     ecx,edx      shr     ecx,1 -    add     ecx,[lowbits] -    and     ecx,[highbits] +    add     ecx,[_hqx_lowbits] +    and     ecx,[_hqx_highbits]      add     edx,ecx      shr     edx,1      mov     ecx,edx diff --git a/graphics/scaler/hq3x_i386.asm b/graphics/scaler/hq3x_i386.asm index 43bfd85540..3212ea93ac 100644 --- a/graphics/scaler/hq3x_i386.asm +++ b/graphics/scaler/hq3x_i386.asm @@ -22,6 +22,8 @@ GLOBAL _hq3x_16  EXTERN _LUT16to32  EXTERN _RGBtoYUV +EXTERN _hqx_highbits +EXTERN _hqx_lowbits  SECTION .bss  linesleft resd 1 @@ -44,10 +46,6 @@ SECTION .data  reg_blank    dd  0,0  const7       dd  0x00070007,0x00000007  threshold    dd  0x00300706,0x00000000 -; FIXME: zerlowbits assumes 565 mode. -; Also, in the code, the constant 0x0821 is used which also assumes 565 mode -highbits     dd  0xF7DEF7DE -lowbits      dd  0x0821  moduloSrc    dd  0  moduloDst    dd  0 @@ -134,12 +132,12 @@ SECTION .text      mov ecx,%3      cmp edx,ecx      je  %%fin -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1  %%fin: @@ -153,17 +151,17 @@ SECTION .text      mov ecx,%4      cmp edx,ecx      je  %%fin1 -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] +    add ecx,[_hqx_lowbits]  %%fin1:      mov edx,%2      cmp edx,ecx      je  %%fin2 -    and ecx,[highbits] -    and edx,[highbits] +    and ecx,[_hqx_highbits] +    and edx,[_hqx_highbits]      add edx,ecx      shr edx,1  %%fin2: @@ -224,8 +222,8 @@ SECTION .text      mov ecx,%3      cmp edx,ecx      je  %%fin -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1  %%fin: @@ -2303,12 +2301,12 @@ _hq3x_16:      shl eax,16      or  eax,edx      mov ecx,[w2] -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1      mov [edi],dx @@ -2324,12 +2322,12 @@ _hq3x_16:      shl eax,16      or  eax,edx      mov ecx,[w4] -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1      mov [edi],dx @@ -2344,12 +2342,12 @@ _hq3x_16:      shl eax,16      or  eax,edx      mov ecx,[w6] -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1      mov [edi],eax @@ -2364,12 +2362,12 @@ _hq3x_16:      shl eax,16      or  eax,edx      mov ecx,[w8] -    and edx,[highbits] -    and ecx,[highbits] +    and edx,[_hqx_highbits] +    and ecx,[_hqx_highbits]      add ecx,edx      shr ecx,1 -    add ecx,[lowbits] -    and ecx,[highbits] +    add ecx,[_hqx_lowbits] +    and ecx,[_hqx_highbits]      add edx,ecx      shr edx,1      mov [edi],eax diff --git a/graphics/scaler/intern.h b/graphics/scaler/intern.h index 5955aa1bac..cb78bb5bb5 100644 --- a/graphics/scaler/intern.h +++ b/graphics/scaler/intern.h @@ -76,7 +76,6 @@ static inline uint32 interpolate32_1_1_1_1(uint32 A, uint32 B, uint32 C, uint32  	return x + y;  } -  /**   * Interpolate two 16 bit pixels with the weights specified in the template   * parameters. Used by the hq scaler family. @@ -100,6 +99,55 @@ static inline uint16 interpolate16_3(uint16 p1, uint16 p2, uint16 p3) {  } +template<int bitFormat> +static inline unsigned interpolate16_3_1(unsigned c1, unsigned c2) {  +   const unsigned lowbits=(((c1<<1)&(lowBits<<1))+(c1&qlowBits)+(c2&qlowBits))&qlowBits;  +   return ((c1*3+c2) - lowbits) >> 2;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_2_1_1(unsigned c1, unsigned c2, unsigned c3) {  +   c1<<=1;  +   const unsigned lowbits=((c1&(lowBits<<1))+(c2&qlowBits)+(c3&qlowBits))&qlowBits;  +   return ((c1+c2+c3) - lowbits) >> 2;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_1_1(unsigned c1, unsigned c2) {  +   return ( c1+c2 - ((c1^c2)&lowBits) ) >> 1;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_5_2_1(unsigned c1, unsigned c2, unsigned c3) {  +   c2<<=1;  +   const unsigned lowbits=( ((c1<<2)&(lowBits<<2))+(c1&0x1CE7)+(c2&0x18C6)+(c3&0x1CE7) ) & 0x1CE7;  +   return ((c1*5+c2+c3) - lowbits) >> 3;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_6_1_1(unsigned c1, unsigned c2, unsigned c3) {  +   const unsigned lowbits=(((((c1<<1)&(lowBits<<1))+(c1&qlowBits))<<1)+(c2&0x1CE7)+(c3&0x1CE7))&0x1CE7;  +   return ((c1*6+c2+c3) - lowbits) >> 3;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_2_3_3(unsigned c1, unsigned c2, unsigned c3) {  +   c1<<=1;  +   const unsigned rb=(c1&(redblueMask<<1))+((c2&redblueMask)+(c3&redblueMask))*3;  +   const unsigned g=(c1&(greenMask<<1))+((c2&greenMask)+(c3&greenMask))*3;  +   return ((rb&(redblueMask<<3))|(g&(greenMask<<8)))>>3;  +}  + +template<int bitFormat> +static inline unsigned interpolate16_14_1_1(unsigned c1, unsigned c2, unsigned c3) {  +   const unsigned rb=(c1&redblueMask)*14+(c2&redblueMask)+(c3&redblueMask);  +   const unsigned g=(c1&greenMask)*14+(c2&greenMask)+(c3&greenMask);  +   return ((rb&(redblueMask<<4))|(g&(greenMask<<4)))>>4;  +} + + + +  /**   * Compare two YUV values (encoded 8-8-8) and check if they differ by more than   * a certain hard coded threshold. Used by the hq scaler family.  | 
