aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Horn2003-07-19 11:35:01 +0000
committerMax Horn2003-07-19 11:35:01 +0000
commitdcbd40267089add128a8394f74bb0ec179e10c8a (patch)
tree96d811cbf315a3ca5346b6fe785b678a9687dc7a
parentbe4446f945218204f0bf653f05d8befe11d53299 (diff)
downloadscummvm-rg350-dcbd40267089add128a8394f74bb0ec179e10c8a.tar.gz
scummvm-rg350-dcbd40267089add128a8394f74bb0ec179e10c8a.tar.bz2
scummvm-rg350-dcbd40267089add128a8394f74bb0ec179e10c8a.zip
added TODOs; made switching between asepct modes a bit nice; made scale a template parameter to help compile optimizations
svn-id: r9074
-rw-r--r--common/scaler.cpp61
1 files changed, 44 insertions, 17 deletions
diff --git a/common/scaler.cpp b/common/scaler.cpp
index d9bbc0b61e..85b1f575c4 100644
--- a/common/scaler.cpp
+++ b/common/scaler.cpp
@@ -24,7 +24,14 @@
#include "scummsys.h"
#include "scaler.h"
-/********** 2XSAI Filter *****************/
+// TODO: get rid of the colorMask etc. variables and instead use templates.
+// This should give a respectable boost, since variable access (i.e. memory reads)
+// in the innermost loops of our operations would work with constant data instead.
+// That should help the inliner; reduce memory access; thus improve cache efficeny
+// etc. The drawback will be that each scaler will exist twice, once for 555 and
+// once for 555, resulting in the object file being twice as big (but thanks to
+// templates, no source code would be duplicated.
+
static uint32 colorMask = 0xF7DEF7DE;
static uint32 lowPixelMask = 0x08210821;
static uint32 qcolorMask = 0xE79CE79C;
@@ -649,8 +656,16 @@ void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi
}
}
+#define kVeryFastAndUglyAspectMode 0 // No interpolation at all, but super-fast
+#define kFastAndNiceAspectMode 1 // Quite good quality with good speed
+#define kSlowAndPerfectAspectMode 2 // Accurate but slow code
+
+#define ASPECT_MODE kFastAndNiceAspectMode
+
-static inline uint16 interpolate5(uint16 A, uint16 B, int scale) {
+#if ASPECT_MODE == kSlowAndPerfectAspectMode
+template<int scale>
+static inline uint16 interpolate5(uint16 A, uint16 B) {
uint16 r = (uint16)(((A & redMask) * scale + (B & redMask) * (5 - scale)) / 5);
uint16 g = (uint16)(((A & greenMask) * scale + (B & greenMask) * (5 - scale)) / 5);
uint16 b = (uint16)(((A & blueMask) * scale + (B & blueMask) * (5 - scale)) / 5);
@@ -658,14 +673,30 @@ static inline uint16 interpolate5(uint16 A, uint16 B, int scale) {
return (uint16)((r & redMask) | (g & greenMask) | (b & blueMask));
}
-static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int scale, int width) {
-#if 0
+template<int scale>
+static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) {
// Accurate but slightly slower code
while (width--) {
- *dst++ = interpolate5(*srcA++, *srcB++, scale);
+ *dst++ = interpolate5<scale>(*srcA++, *srcB++);
}
-#else
- // Not fully accurate, but a bit faster
+}
+#endif
+
+#if ASPECT_MODE == kFastAndNiceAspectMode
+template<int scale>
+static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) {
+
+ // TODO: This code may not work correctly on architectures that require alignment.
+ // And even on those which accept it, reading/writing 32 bits words from odd memory
+ // locations usually has a speed penalty. Hence, it might be wise to first check
+ // if the dst address is odd, in which case we blit one pixel first; then we
+ // blit pixel pairs, till we get to the end, at which point we may have to blit
+ // again a single seperate pixel. This would of course cause additional overhead
+ // for each blitted line. Some of that overhead can be avoid by moving
+ // this logic into stretch200To240, since whether dst is at an odd position, and
+ // whether the last pixel has to be blitted seperately or not, is identical for
+ // each blitted line.
+ //
if (width & 1) {
// For efficency reasons we normally blit two pixels at a time; but if the
@@ -693,8 +724,8 @@ static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint1
*d++ = INTERPOLATE(*sA++, *sB++);
}
}
-#endif
}
+#endif
void makeRectStretchable(int &x, int &y, int &w, int &h) {
int m = real2Aspect(y) % 6;
@@ -726,20 +757,16 @@ void makeRectStretchable(int &x, int &y, int &w, int &h) {
*/
int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY) {
int maxDstY = real2Aspect(origSrcY + height - 1);
- int off = srcY - origSrcY;
int y;
-
+ const uint8 *startSrcPtr = buf + srcX * 2 + (srcY - origSrcY) * pitch;
uint8 *dstPtr = buf + srcX * 2 + maxDstY * pitch;
for (y = maxDstY; y >= srcY; y--) {
- uint8 *srcPtr = buf + srcX * 2 + (aspect2Real(y) + off) * pitch;
+ const uint8 *srcPtr = startSrcPtr + aspect2Real(y) * pitch;
-#if 0
- // Don't use bilinear filtering, rather just duplicate pixel lines:
- // a little bit faster, but looks ugly
+#if ASPECT_MODE == kVeryFastAndUglyAspectMode
if (srcPtr == dstPtr)
break;
-
memcpy(dstPtr, srcPtr, width * 2);
#else
// Bilinear filter
@@ -751,11 +778,11 @@ int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, i
break;
case 1:
case 4:
- interpolate5Line((uint16 *)dstPtr, (uint16 *)(srcPtr - pitch), (uint16 *)srcPtr, 1, width);
+ interpolate5Line<1>((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width);
break;
case 2:
case 3:
- interpolate5Line((uint16 *)dstPtr, (uint16 *)(srcPtr - pitch), (uint16 *)srcPtr, 2, width);
+ interpolate5Line<2>((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width);
break;
}
#endif