aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKostas Nakos2009-02-14 19:42:18 +0000
committerKostas Nakos2009-02-14 19:42:18 +0000
commit493eb0c5a43bf2ab7db4ae578cd8e768441cbdc3 (patch)
tree3500ee49a0490b3a276b40fee2caa6cef25d4d35
parentc228259fff347edafd19d8222593847ac23ef4c0 (diff)
downloadscummvm-rg350-493eb0c5a43bf2ab7db4ae578cd8e768441cbdc3.tar.gz
scummvm-rg350-493eb0c5a43bf2ab7db4ae578cd8e768441cbdc3.tar.bz2
scummvm-rg350-493eb0c5a43bf2ab7db4ae578cd8e768441cbdc3.zip
apply patch by Fingolfin to optimize scalers + more
svn-id: r36338
-rw-r--r--backends/platform/wince/CEScaler.cpp151
-rw-r--r--backends/platform/wince/CEScaler.h2
-rw-r--r--backends/platform/wince/wince-sdl.cpp1
3 files changed, 68 insertions, 86 deletions
diff --git a/backends/platform/wince/CEScaler.cpp b/backends/platform/wince/CEScaler.cpp
index 839ac26a17..4cc675e006 100644
--- a/backends/platform/wince/CEScaler.cpp
+++ b/backends/platform/wince/CEScaler.cpp
@@ -25,40 +25,8 @@
#include "graphics/scaler/intern.h"
#include "CEScaler.h"
-int redblueMasks[] = { 0x7C1F, 0xF81F };
-int greenMasks[] = { 0x03E0, 0x07E0 };
-
-static int maskUsed;
-
-void initCEScaler(void) {
- if (gBitFormat == 555)
- maskUsed = 0;
- else
- maskUsed = 1;
-}
-
-// FIXME: Fingolfin says: The following interpolation code is a lot slower than it needs
-// to be. The reason: Using the value of a global variable to index two global arrays is
-// extremly difficult if not impossible for the compiler to optimize. At the very least,
-// the two arrays should be 'static const', but even then, memory access is required.
-// To avoid this, one could use the techniques used by our other scalers. See also the
-// interpolate functions in graphics/scaler/intern.h.
-// Even if those can't be used directly for some reasons (e.g. the compiler has problems
-// with templates), then still the *techniques* could and should be used. I would exepct
-// that this way, even the C version of PocketPCPortrait() should get a big speed boost.
-
-static inline uint16 CEinterpolate16_4(uint16 p1, uint16 p2, uint16 p3, uint16 p4)
-{
- return ((((p1 & redblueMasks[maskUsed]) + (p2 & redblueMasks[maskUsed]) + (p3 & redblueMasks[maskUsed]) + (p4 & redblueMasks[maskUsed])) / 4) & redblueMasks[maskUsed]) |
- ((((p1 & greenMasks[maskUsed]) + (p2 & greenMasks[maskUsed]) + (p3 & greenMasks[maskUsed]) + (p4 & greenMasks[maskUsed])) / 4) & greenMasks[maskUsed]);
-}
-
-static inline uint16 CEinterpolate16_2(uint16 p1, int w1, uint16 p2, int w2) {
- return ((((p1 & redblueMasks[maskUsed]) * w1 + (p2 & redblueMasks[maskUsed]) * w2) / (w1 + w2)) & redblueMasks[maskUsed]) |
- ((((p1 & greenMasks[maskUsed]) * w1 + (p2 & greenMasks[maskUsed]) * w2) / (w1 + w2)) & greenMasks[maskUsed]);
-}
-
-void PocketPCPortrait(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void PocketPCPortraitTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
uint8 *work;
int i;
@@ -73,9 +41,9 @@ void PocketPCPortrait(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint3
uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
uint16 color4 = *(((const uint16 *)srcPtr) + (i + 3));
- *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
- *(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
- *(((uint16 *)work) + 2) = CEinterpolate16_2(color3, 1, color4, 3);
+ *(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
+ *(((uint16 *)work) + 1) = interpolate32_1_1<bitFormat>(color2, color3);
+ *(((uint16 *)work) + 2) = interpolate32_3_1<bitFormat>(color4, color3);
work += 3 * sizeof(uint16);
}
@@ -83,61 +51,66 @@ void PocketPCPortrait(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint3
dstPtr += dstPitch;
}
}
+MAKE_WRAPPER(PocketPCPortrait)
-// FIXME: Fingolfin says: Please document this function. What does it compute? How
-// does it differ from the code in aspect.cpp ? It would be nice to speed up this function
-// here using the ideas and tracks from aspect.cpp and the comment above, as right now, it
-// is rather hard for the compiler to optimize this code properly.
+// Our version of an aspect scaler. Main difference is the out-of-place
+// operation, omitting a straight blit step the sdl backend does. Also,
+// tests show unaligned access errors with the stock aspect scaler.
void PocketPCLandscapeAspect(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#define RB(x) ((x & redblueMasks[maskUsed])<<8)
-#define G(x) ((x & greenMasks[maskUsed])<<3)
+ const int redblueMasks[] = { 0x7C1F, 0xF81F };
+ const int greenMasks[] = { 0x03E0, 0x07E0 };
+ const int RBM = redblueMasks[gBitFormat == 565];
+ const int GM = greenMasks[gBitFormat == 565];
+
+ int i,j;
+ unsigned int p1, p2;
+ uint8 *inbuf, *outbuf, *instart, *outstart;
+
+#define RB(x) ((x & RBM)<<8)
+#define G(x) ((x & GM)<<3)
#define P20(x) (((x)>>2)-((x)>>4))
#define P40(x) (((x)>>1)-((x)>>3))
#define P60(x) (((x)>>1)+((x)>>3))
#define P80(x) (((x)>>1)+((x)>>2)+((x)>>4))
-#define MAKEPIXEL(rb,g) ((((rb)>>8) & redblueMasks[maskUsed] | ((g)>>3) & greenMasks[maskUsed]))
+#define MAKEPIXEL(rb,g) ((((rb)>>8) & RBM | ((g)>>3) & GM))
- int i,j;
- unsigned int p1;
- unsigned int p2;
- uint16 * inbuf;
- uint16 * outbuf;
- inbuf = (uint16 *)srcPtr;
- outbuf = (uint16 *)dstPtr;
-
- uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
- uint16 dstPitch16 = (uint16)(dstPitch / sizeof(uint16));
+ inbuf = (uint8 *)srcPtr;
+ outbuf = (uint8 *)dstPtr;
+ height /= 5;
- for (i = 0; i < height/5; i++) {
+ for (i = 0; i < height; i++) {
+ instart = inbuf;
+ outstart = outbuf;
for (j=0; j < width; j++) {
- p1 = *((uint16*)inbuf+j); inbuf += srcPitch16;
- *((uint16*)outbuf+j) = p1; outbuf += dstPitch16;
- p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
- *((uint16*)outbuf+j) = MAKEPIXEL(P20(RB(p1))+P80(RB(p2)),P20(G(p1))+P80(G(p2))); outbuf += dstPitch16;
+ p1 = *(uint16*)inbuf; inbuf += srcPitch;
+ *(uint16*)outbuf = p1; outbuf += dstPitch;
+
+ p2 = *(uint16*)inbuf; inbuf += srcPitch;
+ *(uint16*)outbuf = MAKEPIXEL(P20(RB(p1))+P80(RB(p2)),P20(G(p1))+P80(G(p2))); outbuf += dstPitch;
p1 = p2;
- p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
- *((uint16*)outbuf+j) = MAKEPIXEL(P40(RB(p1))+P60(RB(p2)),P40(G(p1))+P60(G(p2))); outbuf += dstPitch16;
+ p2 = *(uint16*)inbuf; inbuf += srcPitch;
+ *(uint16*)outbuf = MAKEPIXEL(P40(RB(p1))+P60(RB(p2)),P40(G(p1))+P60(G(p2))); outbuf += dstPitch;
p1 = p2;
- p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
- *((uint16*)outbuf+j) = MAKEPIXEL(P60(RB(p1))+P40(RB(p2)),P60(G(p1))+P40(G(p2))); outbuf += dstPitch16;
+ p2 = *(uint16*)inbuf; inbuf += srcPitch;
+ *(uint16*)outbuf = MAKEPIXEL(P60(RB(p1))+P40(RB(p2)),P60(G(p1))+P40(G(p2))); outbuf += dstPitch;
p1 = p2;
- p2 = *((uint16*)inbuf+j);
- *((uint16*)outbuf+j) = MAKEPIXEL(P80(RB(p1))+P20(RB(p2)),P80(G(p1))+P20(G(p2))); outbuf += dstPitch16;
+ p2 = *(uint16*)inbuf;
+ *(uint16*)outbuf = MAKEPIXEL(P80(RB(p1))+P20(RB(p2)),P80(G(p1))+P20(G(p2))); outbuf += dstPitch;
- *((uint16*)outbuf+j) = p2;
+ *(uint16*)outbuf = p2;
- inbuf = inbuf - srcPitch16*4;
- outbuf = outbuf - dstPitch16*5;
+ inbuf = inbuf - srcPitch*4 + sizeof(uint16);
+ outbuf = outbuf - dstPitch*5 + sizeof(uint16);
}
- inbuf = inbuf + srcPitch16*5;
- outbuf = outbuf + dstPitch16*6;
+ inbuf = instart + srcPitch*5;
+ outbuf = outstart + dstPitch*6;
}
}
@@ -150,10 +123,8 @@ extern "C" {
}
#endif
-void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef ARM
- PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
-#else
+template<int bitFormat>
+void PocketPCHalfTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
uint8 *work;
int i;
uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
@@ -168,18 +139,29 @@ void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 ds
uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
uint16 color3 = *(((const uint16 *)srcPtr) + (i + srcPitch16));
uint16 color4 = *(((const uint16 *)srcPtr) + (i + srcPitch16 + 1));
- *(((uint16 *)work) + 0) = CEinterpolate16_4(color1, color2, color3, color4);
+ *(((uint16 *)work) + 0) = interpolate16_1_1_1_1<bitFormat>(color1, color2, color3, color4);
work += sizeof(uint16);
}
srcPtr += 2 * srcPitch;
dstPtr += dstPitch;
}
-#endif
}
+void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+#ifdef ARM
+ int maskUsed = (gBitFormat == 565);
+ PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
+#else
+ if (gBitFormat == 565)
+ PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ else
+ PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+#endif
+}
-void PocketPCHalfZoom(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void PocketPCHalfZoomTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
uint8 *work;
int i;
uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
@@ -191,10 +173,10 @@ void PocketPCHalfZoom(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint3
i = 0;
work = dstPtr;
- for (int i=0; i<width; i+=2) {
+ for (int i = 0; i < width; i += 2) {
uint16 color1 = *(((const uint16 *)srcPtr) + i);
uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
- *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 1, color2, 1);
+ *(((uint16 *)work) + 0) = interpolate32_1_1<bitFormat>(color1, color2);
work += sizeof(uint16);
}
@@ -202,8 +184,10 @@ void PocketPCHalfZoom(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint3
dstPtr += dstPitch;
}
}
+MAKE_WRAPPER(PocketPCHalfZoom)
-void SmartphoneLandscape(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void SmartphoneLandscapeTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
uint8 *work;
int i;
int line = 0;
@@ -212,14 +196,14 @@ void SmartphoneLandscape(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, ui
i = 0;
work = dstPtr;
- for (int i=0; i<width; i+=3) {
+ for (int i = 0; i < width; i += 3) {
// Filter 2/3
uint16 color1 = *(((const uint16 *)srcPtr) + i);
uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
- *(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
- *(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
+ *(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
+ *(((uint16 *)work) + 1) = interpolate32_3_1<bitFormat>(color3, color2);
work += 2 * sizeof(uint16);
}
@@ -233,3 +217,4 @@ void SmartphoneLandscape(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, ui
}
}
}
+MAKE_WRAPPER(SmartphoneLandscape)
diff --git a/backends/platform/wince/CEScaler.h b/backends/platform/wince/CEScaler.h
index 7a8705a221..157ec98c63 100644
--- a/backends/platform/wince/CEScaler.h
+++ b/backends/platform/wince/CEScaler.h
@@ -39,6 +39,4 @@ DECLARE_SCALER(PocketPCHalfZoom);
DECLARE_SCALER(SmartphoneLandscape);
//#endif
-void initCEScaler(void);
-
#endif
diff --git a/backends/platform/wince/wince-sdl.cpp b/backends/platform/wince/wince-sdl.cpp
index fb7285439e..f8390290db 100644
--- a/backends/platform/wince/wince-sdl.cpp
+++ b/backends/platform/wince/wince-sdl.cpp
@@ -1399,7 +1399,6 @@ bool OSystem_WINCE3::loadGFXMode() {
InitScalers(555);
else
InitScalers(565);
- initCEScaler();
_overlayFormat.bytesPerPixel = _hwscreen->format->BytesPerPixel;
_overlayFormat.rLoss = _hwscreen->format->Rloss;
_overlayFormat.gLoss = _hwscreen->format->Gloss;