aboutsummaryrefslogtreecommitdiff
path: root/common/scaler/hq3x.cpp
diff options
context:
space:
mode:
authorMax Horn2003-10-01 16:39:31 +0000
committerMax Horn2003-10-01 16:39:31 +0000
commitb6f85de558e57bcae7932329523ed8478dd83760 (patch)
treedc10e775e24700ce6f3f48ca510f94147f115503 /common/scaler/hq3x.cpp
parentb75a9256409f6067798ff1657df3dc42682770ca (diff)
downloadscummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.tar.gz
scummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.tar.bz2
scummvm-rg350-b6f85de558e57bcae7932329523ed8478dd83760.zip
instead of using a stack array, use regular variables (marked with the register keyword to hint the optimizer) for the color data. At least over here this helps the optimizer *A LOT* - instead of keeping w[] on the stack, on the PowerPC all values stay in registers, for a nice speed boost. Shouldn't hurt x86 either
svn-id: r10522
Diffstat (limited to 'common/scaler/hq3x.cpp')
-rw-r--r--common/scaler/hq3x.cpp143
1 files changed, 72 insertions, 71 deletions
diff --git a/common/scaler/hq3x.cpp b/common/scaler/hq3x.cpp
index 864b8c31cf..39be5e4f98 100644
--- a/common/scaler/hq3x.cpp
+++ b/common/scaler/hq3x.cpp
@@ -23,61 +23,61 @@
#include "common/scaler/intern.h"
-#define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[1]);
-#define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[2]);
-#define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w[5], w[4]);
-#define PIXEL00_2 *(q) = interpolate16_3<bitFormat,2,1,1>(w[5], w[4], w[2]);
-#define PIXEL00_4 *(q) = interpolate16_3<bitFormat,2,7,7>(w[5], w[4], w[2]);
-#define PIXEL00_5 *(q) = interpolate16_2<bitFormat,1,1>(w[4], w[2]);
-#define PIXEL00_C *(q) = w[5];
+#define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
+#define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL00_2 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w4, w2);
+#define PIXEL00_4 *(q) = interpolate16_3<bitFormat,2,7,7>(w5, w4, w2);
+#define PIXEL00_5 *(q) = interpolate16_2<bitFormat,1,1>(w4, w2);
+#define PIXEL00_C *(q) = w5;
-#define PIXEL01_1 *(q+1) = interpolate16_2<bitFormat,3,1>(w[5], w[2]);
-#define PIXEL01_3 *(q+1) = interpolate16_2<bitFormat,7,1>(w[5], w[2]);
-#define PIXEL01_6 *(q+1) = interpolate16_2<bitFormat,3,1>(w[2], w[5]);
-#define PIXEL01_C *(q+1) = w[5];
+#define PIXEL01_1 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL01_3 *(q+1) = interpolate16_2<bitFormat,7,1>(w5, w2);
+#define PIXEL01_6 *(q+1) = interpolate16_2<bitFormat,3,1>(w2, w5);
+#define PIXEL01_C *(q+1) = w5;
-#define PIXEL02_1M *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[3]);
-#define PIXEL02_1U *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[2]);
-#define PIXEL02_1R *(q+2) = interpolate16_2<bitFormat,3,1>(w[5], w[6]);
-#define PIXEL02_2 *(q+2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[2], w[6]);
-#define PIXEL02_4 *(q+2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[2], w[6]);
-#define PIXEL02_5 *(q+2) = interpolate16_2<bitFormat,1,1>(w[2], w[6]);
-#define PIXEL02_C *(q+2) = w[5];
+#define PIXEL02_1M *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w3);
+#define PIXEL02_1U *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL02_1R *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL02_2 *(q+2) = interpolate16_3<bitFormat,2,1,1>(w5, w2, w6);
+#define PIXEL02_4 *(q+2) = interpolate16_3<bitFormat,2,7,7>(w5, w2, w6);
+#define PIXEL02_5 *(q+2) = interpolate16_2<bitFormat,1,1>(w2, w6);
+#define PIXEL02_C *(q+2) = w5;
-#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[4]);
-#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2<bitFormat,7,1>(w[5], w[4]);
-#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[4], w[5]);
-#define PIXEL10_C *(q+nextlineDst) = w[5];
+#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w4);
+#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w4, w5);
+#define PIXEL10_C *(q+nextlineDst) = w5;
-#define PIXEL11 *(q+1+nextlineDst) = w[5];
+#define PIXEL11 *(q+1+nextlineDst) = w5;
-#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[5], w[6]);
-#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2<bitFormat,7,1>(w[5], w[6]);
-#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w[6], w[5]);
-#define PIXEL12_C *(q+2+nextlineDst) = w[5];
+#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w6);
+#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w6, w5);
+#define PIXEL12_C *(q+2+nextlineDst) = w5;
-#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[7]);
-#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]);
-#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[4]);
-#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[8], w[4]);
-#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[8], w[4]);
-#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w[8], w[4]);
-#define PIXEL20_C *(q+nextlineDst2) = w[5];
+#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w7);
+#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w8, w4);
+#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w8, w4);
+#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w8, w4);
+#define PIXEL20_C *(q+nextlineDst2) = w5;
-#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]);
-#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,7,1>(w[5], w[8]);
-#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[8], w[5]);
-#define PIXEL21_C *(q+1+nextlineDst2) = w[5];
+#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,7,1>(w5, w8);
+#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w8, w5);
+#define PIXEL21_C *(q+1+nextlineDst2) = w5;
-#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[9]);
-#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[8]);
-#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w[5], w[6]);
-#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w[5], w[6], w[8]);
-#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w[5], w[6], w[8]);
-#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w[6], w[8]);
-#define PIXEL22_C *(q+2+nextlineDst2) = w[5];
+#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w9);
+#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w6, w8);
+#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w6, w8);
+#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w6, w8);
+#define PIXEL22_C *(q+2+nextlineDst2) = w5;
-#define YUV(x) RGBtoYUV[w[x]]
+#define YUV(x) RGBtoYUV[w ## x]
/**
* The HQ3x high quality 3x graphics filter.
@@ -86,8 +86,9 @@
*/
template<int bitFormat>
void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
- int w[10];
-
+// int w[10];
+ register int w1, w2, w3, w4, w5, w6, w7, w8, w9;
+
const uint32 nextlineSrc = srcPitch / sizeof(uint16);
const uint16 *p = (const uint16 *)srcPtr;
@@ -120,41 +121,41 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
// +----+----+----+
while (height--) {
- w[2] = *(p - 1 - nextlineSrc);
- w[5] = *(p - 1);
- w[8] = *(p - 1 + nextlineSrc);
+ w2 = *(p - 1 - nextlineSrc);
+ w5 = *(p - 1);
+ w8 = *(p - 1 + nextlineSrc);
- w[3] = *(p - nextlineSrc);
- w[6] = *(p);
- w[9] = *(p + nextlineSrc);
+ w3 = *(p - nextlineSrc);
+ w6 = *(p);
+ w9 = *(p + nextlineSrc);
int tmpWidth = width;
while (tmpWidth--) {
p++;
- w[1] = w[2];
- w[4] = w[5];
- w[7] = w[8];
+ w1 = w2;
+ w4 = w5;
+ w7 = w8;
- w[2] = w[3];
- w[5] = w[6];
- w[8] = w[9];
+ w2 = w3;
+ w5 = w6;
+ w8 = w9;
- w[3] = *(p - nextlineSrc);
- w[6] = *(p);
- w[9] = *(p + nextlineSrc);
+ w3 = *(p - nextlineSrc);
+ w6 = *(p);
+ w9 = *(p + nextlineSrc);
int pattern = 0;
const int yuv5 = YUV(5);
- if (w[5] != w[1] && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
- if (w[5] != w[2] && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
- if (w[5] != w[3] && diffYUV(yuv5, YUV(3))) pattern |= 0x0004;
- if (w[5] != w[4] && diffYUV(yuv5, YUV(4))) pattern |= 0x0008;
- if (w[5] != w[6] && diffYUV(yuv5, YUV(6))) pattern |= 0x0010;
- if (w[5] != w[7] && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
- if (w[5] != w[8] && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
- if (w[5] != w[9] && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
+ if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
+ if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
+ if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004;
+ if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008;
+ if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010;
+ if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
+ if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
+ if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
switch (pattern) {
case 0: