diff options
author | Max Horn | 2003-10-01 16:39:31 +0000 |
---|---|---|
committer | Max Horn | 2003-10-01 16:39:31 +0000 |
commit | b6f85de558e57bcae7932329523ed8478dd83760 (patch) | |
tree | dc10e775e24700ce6f3f48ca510f94147f115503 /common | |
parent | b75a9256409f6067798ff1657df3dc42682770ca (diff) | |
download | scummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.tar.gz scummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.tar.bz2 scummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.zip |
instead of using a stack array, use regular variables (marked with the register keyword to hint the optimizer) for the color data. At least over here this helps the optimizer *A LOT* - instead of keeping w[] on the stack, on the PowerPC all values stay in registers, for a nice speed boost. Shouldn't hurt x86 either
svn-id: r10522
Diffstat (limited to 'common')
-rw-r--r-- | common/scaler/hq2x.cpp | 147 | ||||
-rw-r--r-- | common/scaler/hq3x.cpp | 143 |
2 files changed, 146 insertions, 144 deletions
diff --git a/common/scaler/hq2x.cpp b/common/scaler/hq2x.cpp index e3c106fab4..92a80f5e6c 100644 --- a/common/scaler/hq2x.cpp +++ b/common/scaler/hq2x.cpp @@ -22,59 +22,59 @@ #include "common/scaler/intern.h" -#define PIXEL00_0 *(q) = w[5]; -#define PIXEL00_10 *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[1]); -#define PIXEL00_11 *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[4]); -#define PIXEL00_12 *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[2]); -#define PIXEL00_20 *(q) = interpolate16_3<bitFormat,2,1,1>(w[5], w[4], w[2]); -#define PIXEL00_21 *(q) = interpolate16_3<bitFormat,2,1,1>(w[5], w[1], w[2]); -#define PIXEL00_22 *(q) = interpolate16_3<bitFormat,2,1,1>(w[5], w[1], w[4]); -#define PIXEL00_60 *(q) = interpolate16_3<bitFormat,5,2,1>(w[5], w[2], w[4]); -#define PIXEL00_61 *(q) = interpolate16_3<bitFormat,5,2,1>(w[5], w[4], w[2]); -#define PIXEL00_70 *(q) = interpolate16_3<bitFormat,6,1,1>(w[5], w[4], w[2]); -#define PIXEL00_90 *(q) = interpolate16_3<bitFormat,2,3,3>(w[5], w[4], w[2]); -#define PIXEL00_100 *(q) = interpolate16_3<bitFormat,14,1,1>(w[5], w[4], w[2]); +#define PIXEL00_0 *(q) = w5; +#define PIXEL00_10 *(q) = interpolate16_2<bitFormat,3,1>(w5, w1); +#define PIXEL00_11 *(q) = interpolate16_2<bitFormat,3,1>(w5, w4); +#define PIXEL00_12 *(q) = interpolate16_2<bitFormat,3,1>(w5, w2); +#define PIXEL00_20 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w4, w2); +#define PIXEL00_21 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w1, w2); +#define PIXEL00_22 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w1, w4); +#define PIXEL00_60 *(q) = interpolate16_3<bitFormat,5,2,1>(w5, w2, w4); +#define PIXEL00_61 *(q) = interpolate16_3<bitFormat,5,2,1>(w5, w4, w2); +#define PIXEL00_70 *(q) = interpolate16_3<bitFormat,6,1,1>(w5, w4, w2); +#define PIXEL00_90 *(q) = interpolate16_3<bitFormat,2,3,3>(w5, w4, w2); +#define PIXEL00_100 *(q) = interpolate16_3<bitFormat,14,1,1>(w5, w4, w2); -#define PIXEL01_0 *(q+1) = w[5]; -#define PIXEL01_10 *(q+1) = interpolate16_2<bitFormat,3,1>(w[5], w[3]); -#define PIXEL01_11 *(q+1) = interpolate16_2<bitFormat,3,1>(w[5], w[2]); -#define PIXEL01_12 *(q+1) = interpolate16_2<bitFormat,3,1>(w[5], w[6]); -#define PIXEL01_20 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w[5], w[2], w[6]); -#define PIXEL01_21 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w[5], w[3], w[6]); -#define PIXEL01_22 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w[5], w[3], w[2]); -#define PIXEL01_60 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w[5], w[6], w[2]); -#define PIXEL01_61 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w[5], w[2], w[6]); -#define PIXEL01_70 *(q+1) = interpolate16_3<bitFormat,6,1,1>(w[5], w[2], w[6]); -#define PIXEL01_90 *(q+1) = interpolate16_3<bitFormat,2,3,3>(w[5], w[2], w[6]); -#define PIXEL01_100 *(q+1) = interpolate16_3<bitFormat,14,1,1>(w[5], w[2], w[6]); +#define PIXEL01_0 *(q+1) = w5; +#define PIXEL01_10 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w3); +#define PIXEL01_11 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w2); +#define PIXEL01_12 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w6); +#define PIXEL01_20 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w2, w6); +#define PIXEL01_21 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w3, w6); +#define PIXEL01_22 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w3, w2); +#define PIXEL01_60 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w5, w6, w2); +#define PIXEL01_61 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w5, w2, w6); +#define PIXEL01_70 *(q+1) = interpolate16_3<bitFormat,6,1,1>(w5, w2, w6); +#define PIXEL01_90 *(q+1) = interpolate16_3<bitFormat,2,3,3>(w5, w2, w6); +#define PIXEL01_100 *(q+1) = interpolate16_3<bitFormat,14,1,1>(w5, w2, w6); -#define PIXEL10_0 *(q+nextlineDst) = w[5]; -#define PIXEL10_10 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[7]); -#define PIXEL10_11 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[8]); -#define PIXEL10_12 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[4]); -#define PIXEL10_20 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[8], w[4]); -#define PIXEL10_21 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[7], w[4]); -#define PIXEL10_22 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[7], w[8]); -#define PIXEL10_60 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w[5], w[4], w[8]); -#define PIXEL10_61 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w[5], w[8], w[4]); -#define PIXEL10_70 *(q+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w[5], w[8], w[4]); -#define PIXEL10_90 *(q+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w[5], w[8], w[4]); -#define PIXEL10_100 *(q+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w[5], w[8], w[4]); +#define PIXEL10_0 *(q+nextlineDst) = w5; +#define PIXEL10_10 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w7); +#define PIXEL10_11 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w8); +#define PIXEL10_12 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w4); +#define PIXEL10_20 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w8, w4); +#define PIXEL10_21 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w7, w4); +#define PIXEL10_22 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w7, w8); +#define PIXEL10_60 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w4, w8); +#define PIXEL10_61 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w8, w4); +#define PIXEL10_70 *(q+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w5, w8, w4); +#define PIXEL10_90 *(q+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w5, w8, w4); +#define PIXEL10_100 *(q+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w5, w8, w4); -#define PIXEL11_0 *(q+1+nextlineDst) = w[5]; -#define PIXEL11_10 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[9]); -#define PIXEL11_11 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[6]); -#define PIXEL11_12 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[8]); -#define PIXEL11_20 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[6], w[8]); -#define PIXEL11_21 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[9], w[8]); -#define PIXEL11_22 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w[5], w[9], w[6]); -#define PIXEL11_60 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w[5], w[8], w[6]); -#define PIXEL11_61 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w[5], w[6], w[8]); -#define PIXEL11_70 *(q+1+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w[5], w[6], w[8]); -#define PIXEL11_90 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w[5], w[6], w[8]); -#define PIXEL11_100 *(q+1+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w[5], w[6], w[8]); +#define PIXEL11_0 *(q+1+nextlineDst) = w5; +#define PIXEL11_10 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w9); +#define PIXEL11_11 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w6); +#define PIXEL11_12 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w8); +#define PIXEL11_20 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w6, w8); +#define PIXEL11_21 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w9, w8); +#define PIXEL11_22 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w9, w6); +#define PIXEL11_60 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w8, w6); +#define PIXEL11_61 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w6, w8); +#define PIXEL11_70 *(q+1+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w5, w6, w8); +#define PIXEL11_90 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w5, w6, w8); +#define PIXEL11_100 *(q+1+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w5, w6, w8); -#define YUV(x) RGBtoYUV[w[x]] +#define YUV(x) RGBtoYUV[w ## x] /** * The HQ2x high quality 2x graphics filter. @@ -83,7 +83,8 @@ */ template<int bitFormat> void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - int w[10]; +// int w[10]; + register int w1, w2, w3, w4, w5, w6, w7, w8, w9; const uint32 nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; @@ -147,13 +148,13 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, while (height--) { - w[1] = *(p - 1 - nextlineSrc); - w[4] = *(p - 1); - w[7] = *(p - 1 + nextlineSrc); + w1 = *(p - 1 - nextlineSrc); + w4 = *(p - 1); + w7 = *(p - 1 + nextlineSrc); - w[2] = *(p - nextlineSrc); - w[5] = *(p); - w[8] = *(p + nextlineSrc); + w2 = *(p - nextlineSrc); + w5 = *(p); + w8 = *(p + nextlineSrc); #if USE_ALTIVEC // Load inital values of vecYUV1234 / vecYUV6789 @@ -168,9 +169,9 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, while (tmpWidth--) { p++; - w[3] = *(p - nextlineSrc); - w[6] = *(p); - w[9] = *(p + nextlineSrc); + w3 = *(p - nextlineSrc); + w6 = *(p); + w9 = *(p + nextlineSrc); int pattern = 0; @@ -242,14 +243,14 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, pattern = ((int *)&vSum)[3]; #else const int yuv5 = YUV(5); - if (w[5] != w[1] && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; - if (w[5] != w[2] && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; - if (w[5] != w[3] && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; - if (w[5] != w[4] && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; - if (w[5] != w[6] && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; - if (w[5] != w[7] && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; - if (w[5] != w[8] && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; - if (w[5] != w[9] && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; + if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; + if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; + if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; + if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; + if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; + if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; + if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; + if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; #endif switch (pattern) { @@ -2044,13 +2045,13 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, break; } - w[1] = w[2]; - w[4] = w[5]; - w[7] = w[8]; + w1 = w2; + w4 = w5; + w7 = w8; - w[2] = w[3]; - w[5] = w[6]; - w[8] = w[9]; + w2 = w3; + w5 = w6; + w8 = w9; q += 2; } diff --git a/common/scaler/hq3x.cpp b/common/scaler/hq3x.cpp index 864b8c31cf..39be5e4f98 100644 --- a/common/scaler/hq3x.cpp +++ b/common/scaler/hq3x.cpp @@ -23,61 +23,61 @@ #include "common/scaler/intern.h" -#define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[1]); -#define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[2]); -#define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[4]); -#define PIXEL00_2 *(q) = interpolate16_3<bitFormat,2,1,1>(w[5], w[4], w[2]); -#define PIXEL00_4 *(q) = interpolate16_3<bitFormat,2,7,7>(w[5], w[4], w[2]); -#define PIXEL00_5 *(q) = interpolate16_2<bitFormat,1,1>(w[4], w[2]); -#define PIXEL00_C *(q) = w[5]; +#define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w5, w1); +#define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w5, w2); +#define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w5, w4); +#define PIXEL00_2 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w4, w2); +#define PIXEL00_4 *(q) = interpolate16_3<bitFormat,2,7,7>(w5, w4, w2); +#define PIXEL00_5 *(q) = interpolate16_2<bitFormat,1,1>(w4, w2); +#define PIXEL00_C *(q) = w5; -#define PIXEL01_1 *(q+1) = interpolate16_2<bitFormat,3,1>(w[5], w[2]); -#define PIXEL01_3 *(q+1) = interpolate16_2<bitFormat,7,1>(w[5], w[2]); -#define PIXEL01_6 *(q+1) = interpolate16_2<bitFormat,3,1>(w[2], w[5]); -#define PIXEL01_C *(q+1) = w[5]; +#define PIXEL01_1 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w2); +#define PIXEL01_3 *(q+1) = interpolate16_2<bitFormat,7,1>(w5, w2); +#define PIXEL01_6 *(q+1) = interpolate16_2<bitFormat,3,1>(w2, w5); +#define PIXEL01_C *(q+1) = w5; -#define PIXEL02_1M *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[3]); -#define PIXEL02_1U *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[2]); -#define PIXEL02_1R *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[6]); -#define PIXEL02_2 *(q+2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[2], w[6]); -#define PIXEL02_4 *(q+2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[2], w[6]); -#define PIXEL02_5 *(q+2) = interpolate16_2<bitFormat,1,1>(w[2], w[6]); -#define PIXEL02_C *(q+2) = w[5]; +#define PIXEL02_1M *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w3); +#define PIXEL02_1U *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w2); +#define PIXEL02_1R *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w6); +#define PIXEL02_2 *(q+2) = interpolate16_3<bitFormat,2,1,1>(w5, w2, w6); +#define PIXEL02_4 *(q+2) = interpolate16_3<bitFormat,2,7,7>(w5, w2, w6); +#define PIXEL02_5 *(q+2) = interpolate16_2<bitFormat,1,1>(w2, w6); +#define PIXEL02_C *(q+2) = w5; -#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[4]); -#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2<bitFormat,7,1>(w[5], w[4]); -#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[4], w[5]); -#define PIXEL10_C *(q+nextlineDst) = w[5]; +#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w4); +#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w4); +#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w4, w5); +#define PIXEL10_C *(q+nextlineDst) = w5; -#define PIXEL11 *(q+1+nextlineDst) = w[5]; +#define PIXEL11 *(q+1+nextlineDst) = w5; -#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[6]); -#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2<bitFormat,7,1>(w[5], w[6]); -#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[6], w[5]); -#define PIXEL12_C *(q+2+nextlineDst) = w[5]; +#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w6); +#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w6); +#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w6, w5); +#define PIXEL12_C *(q+2+nextlineDst) = w5; -#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[7]); -#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]); -#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[4]); -#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[8], w[4]); -#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[8], w[4]); -#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w[8], w[4]); -#define PIXEL20_C *(q+nextlineDst2) = w[5]; +#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w7); +#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8); +#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w4); +#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w8, w4); +#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w8, w4); +#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w8, w4); +#define PIXEL20_C *(q+nextlineDst2) = w5; -#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]); -#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,7,1>(w[5], w[8]); -#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[8], w[5]); -#define PIXEL21_C *(q+1+nextlineDst2) = w[5]; +#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8); +#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,7,1>(w5, w8); +#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w8, w5); +#define PIXEL21_C *(q+1+nextlineDst2) = w5; -#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[9]); -#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]); -#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[6]); -#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[6], w[8]); -#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[6], w[8]); -#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w[6], w[8]); -#define PIXEL22_C *(q+2+nextlineDst2) = w[5]; +#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w9); +#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8); +#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w6); +#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w6, w8); +#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w6, w8); +#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w6, w8); +#define PIXEL22_C *(q+2+nextlineDst2) = w5; -#define YUV(x) RGBtoYUV[w[x]] +#define YUV(x) RGBtoYUV[w ## x] /** * The HQ3x high quality 3x graphics filter. @@ -86,8 +86,9 @@ */ template<int bitFormat> void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - int w[10]; - +// int w[10]; + register int w1, w2, w3, w4, w5, w6, w7, w8, w9; + const uint32 nextlineSrc = srcPitch / sizeof(uint16); const uint16 *p = (const uint16 *)srcPtr; @@ -120,41 +121,41 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, // +----+----+----+ while (height--) { - w[2] = *(p - 1 - nextlineSrc); - w[5] = *(p - 1); - w[8] = *(p - 1 + nextlineSrc); + w2 = *(p - 1 - nextlineSrc); + w5 = *(p - 1); + w8 = *(p - 1 + nextlineSrc); - w[3] = *(p - nextlineSrc); - w[6] = *(p); - w[9] = *(p + nextlineSrc); + w3 = *(p - nextlineSrc); + w6 = *(p); + w9 = *(p + nextlineSrc); int tmpWidth = width; while (tmpWidth--) { p++; - w[1] = w[2]; - w[4] = w[5]; - w[7] = w[8]; + w1 = w2; + w4 = w5; + w7 = w8; - w[2] = w[3]; - w[5] = w[6]; - w[8] = w[9]; + w2 = w3; + w5 = w6; + w8 = w9; - w[3] = *(p - nextlineSrc); - w[6] = *(p); - w[9] = *(p + nextlineSrc); + w3 = *(p - nextlineSrc); + w6 = *(p); + w9 = *(p + nextlineSrc); int pattern = 0; const int yuv5 = YUV(5); - if (w[5] != w[1] && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; - if (w[5] != w[2] && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; - if (w[5] != w[3] && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; - if (w[5] != w[4] && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; - if (w[5] != w[6] && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; - if (w[5] != w[7] && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; - if (w[5] != w[8] && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; - if (w[5] != w[9] && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; + if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; + if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; + if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; + if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; + if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; + if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; + if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; + if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; switch (pattern) { case 0: |