diff options
Diffstat (limited to 'engines/scumm/gfx.cpp')
-rw-r--r-- | engines/scumm/gfx.cpp | 70 |
1 files changed, 35 insertions, 35 deletions
diff --git a/engines/scumm/gfx.cpp b/engines/scumm/gfx.cpp index 6c8d24d25a..4e8c22479d 100644 --- a/engines/scumm/gfx.cpp +++ b/engines/scumm/gfx.cpp @@ -22,7 +22,6 @@ * */ - #include "common/system.h" #include "scumm/scumm.h" #include "scumm/actor.h" @@ -46,7 +45,7 @@ namespace Scumm { static void blit(byte *dst, int dstPitch, const byte *src, int srcPitch, int w, int h); static void fill(byte *dst, int dstPitch, byte color, int w, int h); -#ifndef ARM_USE_GFX_ASM +#ifndef USE_ARM_GFX_ASM static void copy8Col(byte *dst, int dstPitch, const byte *src, int height); #endif static void clear8Col(byte *dst, int dstPitch, int height); @@ -577,15 +576,12 @@ void ScummEngine::drawStripToScreen(VirtScreen *vs, int x, int width, int top, i const byte *src = vs->getPixels(x, top); int m = _textSurfaceMultiplier; - byte *dst; int vsPitch; int pitch = vs->pitch; if (_useCJKMode && _textSurfaceMultiplier == 2) { - dst = _fmtownsBuf; - - scale2x(dst, _screenWidth * m, src, vs->pitch, width, height); - src = dst; + scale2x(_fmtownsBuf, _screenWidth * m, src, vs->pitch, width, height); + src = _fmtownsBuf; vsPitch = _screenWidth * m - width * m; @@ -593,7 +589,6 @@ void ScummEngine::drawStripToScreen(VirtScreen *vs, int x, int width, int top, i vsPitch = vs->pitch - width; } - dst = _compositeBuf; if (_game.version < 7) { // For The Dig, FT and COMI, we just blit everything to the screen at once. @@ -612,32 +607,37 @@ void ScummEngine::drawStripToScreen(VirtScreen *vs, int x, int width, int top, i assert(0 == (width & 3)); // Compose the text over the game graphics - - // TODO: Optimize this code. There are several things that come immediately to mind: - // (1) Loop unrolling: We could read 4 or even 8 pixels at once, since everything is - // a multiple of 8 here. - // (2) More ASM versions (in particular, the ARM code for the NDS could be used on - // all ARM systems, couldn't it?) - // (3) Better encoding of the text surface data. This is the one with the biggest - // potential. - // (a) Keep an "isEmpty" marker for each pixel row in the _textSurface. The idea - // is that most rows won't contain any text data, so we can just use memcpy. - // (b) RLE encode the _textSurface row-wise. This is an improved variant of (a), - // but also more complicated to implement, and incurs a bigger overhead when - // writing to the text surface. -#ifdef ARM_USE_GFX_ASM - asmDrawStripToScreen(height, width, text, src, dst, vs->pitch, width, _textSurface.pitch); +#ifdef USE_ARM_GFX_ASM + asmDrawStripToScreen(height, width, text, src, _compositeBuf, vs->pitch, width, _textSurface.pitch); #else - for (int h = 0; h < height * m; ++h) { - for (int w = 0; w < width * m; ++w) { - byte tmp = *text++; - if (tmp == CHARSET_MASK_TRANSPARENCY) - tmp = *src; - *dst++ = tmp; - src++; + // We blit four pixels at a time, for improved performance. + const uint32 *src32 = (const uint32 *)src; + const uint32 *text32 = (const uint32 *)text; + uint32 *dst32 = (uint32 *)_compositeBuf; + + vsPitch >>= 2; + const int textPitch = (_textSurface.pitch - width * m) >> 2; + for (int h = height * m; h > 0; --h) { + for (int w = width*m; w > 0; w-=4) { + uint32 temp = *text32++; + + // Generate a byte mask for those text pixels (bytes) with + // value CHARSET_MASK_TRANSPARENCY. In the end, each byte + // in mask will be either equal to 0x00 or 0xFF. + // Doing it this way avoids branches and bytewise operations, + // at the cost of readability ;). + uint32 mask = temp ^ CHARSET_MASK_TRANSPARENCY_32; + mask = (((mask & 0x7f7f7f7f) + 0x7f7f7f7f) | mask) & 0x80808080; + mask = ((mask >> 7) + 0x7f7f7f7f) ^ 0x80808080; + + // The following line is equivalent to this code: + // *dst32++ = (*src32++ & mask) | (temp & ~mask); + // However, some compilers can generate somewhat better + // machine code for this equivalent statement: + *dst32++ = ((temp ^ *src32++) & mask) ^ temp; } - src += vsPitch; - text += _textSurface.pitch - width * m; + src32 += vsPitch; + text32 += textPitch; } #endif src = _compositeBuf; @@ -669,7 +669,7 @@ void ScummEngine::drawStripToScreen(VirtScreen *vs, int x, int width, int top, i x += 16; while (x + width >= _screenWidth) width -= 16; - if (width < 0) + if (width <= 0) return; } @@ -1078,7 +1078,7 @@ static void fill(byte *dst, int dstPitch, byte color, int w, int h) { } } -#ifdef ARM_USE_GFX_ASM +#ifdef USE_ARM_GFX_ASM #define copy8Col(A,B,C,D) asmCopy8Col(A,B,C,D) @@ -1098,7 +1098,7 @@ static void copy8Col(byte *dst, int dstPitch, const byte *src, int height) { } while (--height); } -#endif /* ARM_USE_GFX_ASM */ +#endif /* USE_ARM_GFX_ASM */ static void clear8Col(byte *dst, int dstPitch, int height) { do { |