From 816bd9a7ea4f99aaae67c6e82f686b88dd8c728f Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 27 Jul 2009 16:29:36 +0000 Subject: Add ARM code version of Normal2x scaler. Add ARM only aspect ratio correcting version of Normal2x scaler. Make WinCE port use Normal2x by default if the screen is large enough. Make WinCE port use aspect ratio correcting version if panel is hidden. svn-id: r42843 --- backends/platform/wince/wince-sdl.cpp | 70 ++++++++++++-- graphics/module.mk | 3 +- graphics/scaler.cpp | 56 +++++++++++ graphics/scaler.h | 3 + graphics/scaler/Normal2xARM.s | 171 ++++++++++++++++++++++++++++++++++ 5 files changed, 294 insertions(+), 9 deletions(-) create mode 100755 graphics/scaler/Normal2xARM.s diff --git a/backends/platform/wince/wince-sdl.cpp b/backends/platform/wince/wince-sdl.cpp index 8b2e0848f8..47d9597f4f 100644 --- a/backends/platform/wince/wince-sdl.cpp +++ b/backends/platform/wince/wince-sdl.cpp @@ -1147,10 +1147,69 @@ void OSystem_WINCE3::setGraphicsModeIntern() { } bool OSystem_WINCE3::update_scalers() { + _videoMode.aspectRatioCorrection = false; + if (_videoMode.mode != GFX_NORMAL) return false; - _videoMode.aspectRatioCorrection = false; + /* If we're on a device with a large enough screen to accomodate a + * doubled screen, double the screen. */ + if ((!_orientationLandscape) && + (_videoMode.screenWidth == 320 || !_videoMode.screenWidth) && + (getScreenWidth() >= 640) && + (getScreenHeight() >= 480)) + { +#ifdef USE_ARM_SCALER_ASM + if (!_panelVisible && !_overlayVisible && _canBeAspectScaled) + { + _scaleFactorXm = 2; + _scaleFactorXd = 1; + _scaleFactorYm = 12; + _scaleFactorYd = 5; + _scalerProc = Normal2xAspect; + _modeFlags = 0; + _videoMode.aspectRatioCorrection = true; + } + else +#endif + { + _scaleFactorXm = 2; + _scaleFactorXd = 1; + _scaleFactorYm = 2; + _scaleFactorYd = 1; + _scalerProc = Normal2x; + _modeFlags = 0; + } + return true; + } + if ((_orientationLandscape) && + (_videoMode.screenWidth == 320 || !_videoMode.screenWidth) && + (getScreenWidth() >= 480) && + (getScreenHeight() >= 640)) + { +#ifdef USE_ARM_SCALER_ASM + if (!_panelVisible && !_overlayVisible && _canBeAspectScaled) + { + _scaleFactorXm = 2; + _scaleFactorXd = 1; + _scaleFactorYm = 12; + _scaleFactorYd = 5; + _scalerProc = Normal2xAspect; + _modeFlags = 0; + _videoMode.aspectRatioCorrection = true; + } + else +#endif + { + _scaleFactorXm = 2; + _scaleFactorXd = 1; + _scaleFactorYm = 2; + _scaleFactorYd = 1; + _scalerProc = Normal2x; + _modeFlags = 0; + } + return true; + } if (CEDevice::hasPocketPCResolution()) { if ( (!_orientationLandscape && (_videoMode.screenWidth == 320 || !_videoMode.screenWidth)) @@ -1369,13 +1428,8 @@ bool OSystem_WINCE3::loadGFXMode() { // Create the surface that contains the scaled graphics in 16 bit mode // Always use full screen mode to have a "clean screen" - if (!_videoMode.aspectRatioCorrection) { - displayWidth = _videoMode.screenWidth * _scaleFactorXm / _scaleFactorXd; - displayHeight = _videoMode.screenHeight * _scaleFactorYm / _scaleFactorYd; - } else { - displayWidth = _videoMode.screenWidth; - displayHeight = _videoMode.screenHeight; - } + displayWidth = _videoMode.screenWidth * _scaleFactorXm / _scaleFactorXd; + displayHeight = _videoMode.screenHeight * _scaleFactorYm / _scaleFactorYd; switch (_orientationLandscape) { case 1: diff --git a/graphics/module.mk b/graphics/module.mk index 90659ce977..46ed564e1e 100644 --- a/graphics/module.mk +++ b/graphics/module.mk @@ -37,7 +37,8 @@ MODULE_OBJS += \ ifdef USE_ARM_SCALER_ASM MODULE_OBJS += \ - scaler/scale2xARM.o + scaler/scale2xARM.o \ + scaler/Normal2xARM.o endif ifndef DISABLE_HQ_SCALERS diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp index 11767848ed..7620e5b107 100644 --- a/graphics/scaler.cpp +++ b/graphics/scaler.cpp @@ -187,6 +187,61 @@ void Normal1x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit } #ifndef DISABLE_SCALERS +#ifdef USE_ARM_SCALER_ASM +extern "C" void Normal2xAspectMask(const uint8 *srcPtr, + uint32 srcPitch, + uint8 *dstPtr, + uint32 dstPitch, + int width, + int height, + uint32 mask); + +void Normal2xAspect(const uint8 *srcPtr, + uint32 srcPitch, + uint8 *dstPtr, + uint32 dstPitch, + int width, + int height) +{ + if (gBitFormat == 565) + { + Normal2xAspectMask(srcPtr, + srcPitch, + dstPtr, + dstPitch, + width, + height, + 0x07e0F81F); + } + else + { + Normal2xAspectMask(srcPtr, + srcPitch, + dstPtr, + dstPitch, + width, + height, + 0x03e07C1F); + } +} + +extern "C" void Normal2xARM(const uint8 *srcPtr, + uint32 srcPitch, + uint8 *dstPtr, + uint32 dstPitch, + int width, + int height); + +void Normal2x(const uint8 *srcPtr, + uint32 srcPitch, + uint8 *dstPtr, + uint32 dstPitch, + int width, + int height) +{ + Normal2xARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height); +} +#else /** * Trivial nearest-neighbour 2x scaler. */ @@ -210,6 +265,7 @@ void Normal2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit dstPtr += dstPitch << 1; } } +#endif /** * Trivial nearest-neighbour 3x scaler. diff --git a/graphics/scaler.h b/graphics/scaler.h index 4cea9ee2fb..bdae161bd1 100644 --- a/graphics/scaler.h +++ b/graphics/scaler.h @@ -45,6 +45,9 @@ DECLARE_SCALER(AdvMame2x); DECLARE_SCALER(AdvMame3x); DECLARE_SCALER(Normal1x); DECLARE_SCALER(Normal2x); +#ifdef USE_ARM_SCALER_ASM +DECLARE_SCALER(Normal2xAspect); +#endif DECLARE_SCALER(Normal3x); DECLARE_SCALER(Normal1o5x); DECLARE_SCALER(TV2x); diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s new file mode 100755 index 0000000000..5de50d9c17 --- /dev/null +++ b/graphics/scaler/Normal2xARM.s @@ -0,0 +1,171 @@ +@ ScummVM Scumm Interpreter +@ Copyright (C) 2009 The ScummVM project +@ +@ This program is free software@ you can redistribute it and/or +@ modify it under the terms of the GNU General Public License +@ as published by the Free Software Foundation@ either version 2 +@ of the License, or (at your option) any later version. +@ +@ This program is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY@ without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this program@ if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +@ +@ $URL$ +@ $Id$ +@ +@ @author Robin Watts (robin@wss.co.uk) + + .text + + .global Normal2xARM + .global Normal2xAspectMask + + + @ Assumes dst is aligned (so did the C) + @ Assumes 16bit (so did the C) +Normal2xARM: + @ r0 = src + @ r1 = srcPitch + @ r2 = dst + @ r3 = dstPitch + @ r4 = w + @ r5 = h + STMFD r13!,{r4-r11,r14} + LDR r4, [r13,#4*9] @ r4 = w + LDR r5, [r13,#4*10] @ r5 = h + ADD r12,r2, r3 + SUB r1, r1, r4, LSL #1 + SUB r6, r3, r4, LSL #2 + ADD r3, r3, r6 +yloop: + SUBS r14,r4, #4 + BLT thin +xloop: + LDRH r6, [r0], #2 + LDRH r7, [r0], #2 + LDRH r8, [r0], #2 + LDRH r9, [r0], #2 + ORR r6, r6, r6, LSL #16 + ORR r7, r7, r7, LSL #16 + ORR r8, r8, r8, LSL #16 + ORR r9, r9, r9, LSL #16 + STMIA r2!, {r6-r9} + STMIA r12!,{r6-r9} + SUBS r14,r14,#4 + BGE xloop + ADDS r14,r14,#4 + BNE thin + ADD r0, r0, r1 + ADD r2, r2, r3 + ADD r12,r12,r3 + SUBS r5, r5, #1 + BGT yloop + + LDMFD r13!,{r4-r11,PC} +thin: + LDRH r6, [r0], #2 + ORR r6, r6, r6, LSL #16 + STR r6, [r2], #4 + STR r6, [r12],#4 + SUBS r14,r14,#1 + BGT thin + ADD r0, r0, r1 + ADD r2, r2, r3 + ADD r12,r12,r3 + SUBS r5, r5, #1 + BGT yloop + + LDMFD r13!,{r4-r11,PC} + + + @ Horrid filter calculations + @ AAAAAAAAAAAABBBBBBBBBBBBCCCCCCCCCCCCDDDDDDDDDDDDEEEEEEEEEEEE + @ <-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-> + @ Ideal: A,A,(A*2+B*3)/5,B,(B*4+C)/5,C,C,(C+D*4)/5,D,(D*3+E*2)/5,E,E + @ Actual: A,A,(A*3+B*5)/8,B,(B*7+C)/8,C,C,(C+D*7)/8,D,(D*5+E*3)/8,E,E + + @ Assumes dst is aligned (so did the C) + @ Assumes 16bit (so did the C) +Normal2xAspectMask: + @ r0 = src + @ r1 = srcPitch + @ r2 = dst + @ r3 = dstPitch + @ r4 = w + @ r5 = h + @ r12= mask + STMFD r13!,{r4-r11,r14} + LDR r4, [r13,#4*9] @ r4 = w + LDR r5, [r13,#4*10] @ r5 = h + LDR r12,[r13,#4*11] @ r12= mask + MOV r11,#2 + SUB r11,r11,r1, LSL #2 @ r11= 2-srcPitch*4 + MOV r14,#4 + SUB r14,r14,r3, LSL #3 + SUB r14,r14,r3, LSL #1 + SUB r14,r14,r3 @ r14 = 4-dstPitch*11 +yloop_aspect: +xloop_aspect: + LDRH r6, [r0], r1 @ r6 = A + LDRH r7, [r0], r1 @ r7 = B + LDRH r8, [r0], r1 @ r8 = C + LDRH r9, [r0], r1 @ r9 = D + LDRH r10,[r0], r11 @ r10= E + ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1 + ORR r7, r7, r7, LSL #16 @ r7 = output 3 + ORR r8, r8, r8, LSL #16 @ r8 = output 5,6 + ORR r9, r9, r9, LSL #16 @ r9 = output 8 + ORR r10,r10,r10,LSL #16 @ r10= output 10, 11 + STR r6, [r2], r3 @ output 0 (A) + STR r6, [r2], r3 @ output 1 (A) + AND r6, r6, r12 @ r6 = A split + ADD r6, r6, r6, LSL #1 @ r6 = A*3 + STR r7, [r2, r3] @ output 3 (B) + AND r7, r7, r12 @ r7 = B split + ADD r6, r6, r7 @ r6 = A*3 + B + ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5 + AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 2 + STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3 + RSB r7, r7, r7, LSL #3 @ r7 = B*7 + AND r6, r8, r12 @ r6 = C split + ADD r7, r7, r6 @ r7 = B*7+C + AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3 + ORR r7, r7, r7, ROR #16 @ r7 = output 4 + STR r7, [r2], r3 @ output 4 (B*7+C)>>3 + STR r8, [r2], r3 @ output 5 (C) + STR r8, [r2], r3 @ output 6 (C) + STR r9, [r2, r3] @ output 8 (D) + AND r9, r9, r12 @ r9 = D split + RSB r7, r9, r9, LSL #3 @ r7 = D*7 + ADD r6, r6, r7 @ r6 = C+D*7 + AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 7 + STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3 + ADD r9, r9, r9, LSL #2 @ r9 = D*5 + AND r6, r10,r12 @ r6 = E split + ADD r9, r9, r6 @ r9 = D*5+E + ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3 + AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3 + ORR r9, r9, r9, ROR #16 @ r9 = output 9 + STR r9, [r2], r3 @ output 9 (D*5+E*3)>>3 + STR r10,[r2], r3 @ output 10 (E) + STR r10,[r2], r14 @ output 11 (E) + SUBS r4, r4, #1 + BGT xloop_aspect + LDR r4, [r13,#4*9] @ r4 = w + ADD r0, r0, r1, LSL #2 + ADD r0, r0, r1 + SUB r0, r0, r4, LSL #1 + ADD r2, r2, r3, LSL #3 + ADD r2, r2, r3, LSL #2 + SUB r2, r2, r4, LSL #2 + SUBS r5, r5, #5 + BGT yloop_aspect + + LDMFD r13!,{r4-r11,PC} -- cgit v1.2.3