From 5b64d512abb6f6a836d49559907e43a7719b62ef Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Sun, 3 Feb 2008 01:26:12 +0000 Subject: Introduce ARM code for commonest case of "proc3" costume blitting (as used for animation in SamNMax). Enabled by the USE_ARM_COSTUME_ASM define. Currently enabled for WinCE only. svn-id: r30750 --- engines/scumm/base-costume.h | 15 ++- engines/scumm/costume.cpp | 33 +++++ engines/scumm/proc3ARM.s | 306 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 348 insertions(+), 6 deletions(-) create mode 100755 engines/scumm/proc3ARM.s (limited to 'engines/scumm') diff --git a/engines/scumm/base-costume.h b/engines/scumm/base-costume.h index 7acaff2679..59ca3ded1f 100644 --- a/engines/scumm/base-costume.h +++ b/engines/scumm/base-costume.h @@ -106,22 +106,25 @@ protected: // width and height of cel to decode int _width, _height; +public: struct Codec1 { // Parameters for the original ("V1") costume codec. + // These ones are accessed from ARM code. Don't reorder. + int x; + int y; const byte *scaletable; + int skip_width; + byte *destptr; + const byte *mask_ptr; + int scaleXstep; byte mask, shr; byte repcolor; byte replen; - int scaleXstep; - int x, y; + // These ones aren't accessed from ARM code. Common::Rect boundsRect; int scaleXindex, scaleYindex; - int skip_width; - byte *destptr; - const byte *mask_ptr; }; -public: BaseCostumeRenderer(ScummEngine *scumm) { _actorID = 0; _shadow_mode = 0; diff --git a/engines/scumm/costume.cpp b/engines/scumm/costume.cpp index 83fa71e7b6..b0dbec9ff9 100644 --- a/engines/scumm/costume.cpp +++ b/engines/scumm/costume.cpp @@ -413,6 +413,20 @@ void ClassicCostumeRenderer::procC64(Codec1 &v1, int actor) { #undef LINE #undef MASK_AT +#ifdef USE_ARM_COSTUME_ASM +extern "C" int ClassicProc3RendererShadowARM(int _scaleY, + ClassicCostumeRenderer::Codec1 *v1, + Graphics::Surface *_out, + const byte *src, + int height, + int _scaleX, + int _scaleIndexX, + byte *_shadow_table, + byte _palette[32], + int32 _numStrips, + int _scaleIndexY); +#endif + void ClassicCostumeRenderer::proc3(Codec1 &v1) { const byte *mask, *src; byte *dst; @@ -422,6 +436,25 @@ void ClassicCostumeRenderer::proc3(Codec1 &v1) { byte scaleIndexY; bool masked; +#ifdef USE_ARM_COSTUME_ASM + if ((_shadow_mode & 0x20) == 0) && + (v1.mask_ptr != NULL) && + (_shadow_table != NULL)) + { + _scaleIndexX = ClassicProc3RendererShadowARM(_scaleY, + &v1, + &_out, + src, + _height, + _scaleX, + _scaleIndexX, + _shadow_table, + _palette, + _numStrips, + _scaleIndexY); + } +#endif /* USE_ARM_COSTUME_ASM */ + y = v1.y; src = _srcptr; dst = v1.destptr; diff --git a/engines/scumm/proc3ARM.s b/engines/scumm/proc3ARM.s new file mode 100755 index 0000000000..cefe37153e --- /dev/null +++ b/engines/scumm/proc3ARM.s @@ -0,0 +1,306 @@ +@ ScummVM Scumm Interpreter +@ Copyright (C) 2007 The ScummVM project +@ +@ This program is free software@ you can redistribute it and/or +@ modify it under the terms of the GNU General Public License +@ as published by the Free Software Foundation@ either version 2 +@ of the License, or (at your option) any later version. +@ +@ This program is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this program@ if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +@ +@ $URL: https://scummvm.svn.sourceforge.net/svnroot/scummvm/scummvm/trunk/engines/scumm/gfxARM.s $ +@ $Id: gfxARM.s 29646 2007-11-26 16:54:56Z fingolfin $ +@ +@ @author Robin Watts (robin@wss.co.uk) + + .text + + .global ClassicProc3RendererShadowARM + + +.equ _scaleIndexY , 112 +.equ _numStrips , 108 +.equ _palette , 104 +.equ _shadow_table , 100 +.equ _scaleIndexX , 96 +.equ _scaleX , 92 +.equ _height , 88 +.equ store_r14 , 84 +.equ store_r11 , 80 +.equ store_r10 , 76 +.equ store_r9 , 72 +.equ store_r8 , 68 +.equ store_r7 , 64 +.equ store_r6 , 60 +.equ store_r5 , 56 +.equ store_r4 , 52 +.equ src , 48 +.equ height , 44 +.equ len , 40 +.equ v1_shr , 36 +.equ v1_skip_width , 32 +.equ v1_destptr , 28 +.equ v1_scaleXstep , 24 +.equ v1_mask_ptr , 20 +.equ v1_y , 16 +.equ v1_scaletable , 12 +.equ pitch , 8 +.equ scaleIdxXPtr , 4 +.equ scaleIdxYPtr , 0 + +.equ space , 48 + + @ r0 = _scaleY + @ r1 = v1 + @ r2 = _out + @ r3 = src + @ <> = _height + @ <> = _scaleX + @ <> = _scaleIndexX + @ <> = _shadow_table + @ <> = _palette + @ <> = _numstrips + @ <> = _scaleIndexY +ClassicProc3RendererShadowARM: + @ shadow20 = false + @ shadowed = true + @ unscaled = false + STMFD r13!,{r3-r11,r14} + LDRH r6,[r2,#2] + LDRH r7,[r2] @ r7 = _out.w + LDRH r8,[r2,#4] @ r8 = _out.pitch + ADD r6,r6,#1 @ r6 = _out.h+1 + SUB r13,r13,#space + STR r3,[r13,#src] + STR r8,[r13,#pitch] + LDMIA r1,{r3,r4,r5,r8,r9,r10,r11} + @ r3 = v1.x + @ r4 = v1.y + @ r5 = scaletable + @ r8 = skip_width + @ r9 = destptr + @ r10= mask_ptr + @ r11= scaleXstep + LDR r2, [r13,#_scaleIndexY] + LDR r12,[r13,#_scaleIndexX] + STR r4, [r13,#v1_y] + STR r5, [r13,#v1_scaletable] + ADD r2, r5,r2 @ r2 = &scaletable[_scaleIndexY] + ADD r5, r5,r12 @ r5 = &scaletable[_scaleIndexX] + STR r5, [r13,#scaleIdxXPtr] + STR r2, [r13,#scaleIdxYPtr] + STR r8, [r13,#v1_skip_width] + + LDRB r8, [r1,#29] @ r8 = shr + LDRB r14,[r1,#31] @ r14= replen + LDRB r1, [r1,#30] @ r1 = repcolor + STR r8, [r13,#v1_shr] + STR r9, [r13,#v1_destptr] + STR r10,[r13,#v1_mask_ptr] + STR r11,[r13,#v1_scaleXstep] + + LDR r12,[r13,#_height] + + @ r0 = _scaleY + @ r1 = v1.repcolor + @ r2 = &v1.scaletable[scaleIndexY] + @ r3 = v1.x + @ r4 = v1.y + @ r5 = + @ r6 = _out.h+1 + @ r7 = _out.w + @ r8 = + @ r9 = v1.destptr + @ r10= v1.mask_ptr + @ r11= + @ r12= _height + @ r14= v1.replen + + MOV r8,#0x80 + AND r11,r3,#7 @ r11= v1.x & 7 + MOV r8,r8,LSR r11 @ r8 = maskbit = revBitMask(v1.x & 7) + ADD r10,r10,r3,ASR #3 @ r10= mask = v1.mask_ptr + (v1.x>>3) + + @ r0 = _scaleY + @ r1 = color = v1.repcolor + @ r2 = &v1.scaletable[scaleIndexY] + @ r3 = v1.x + @ r4 = y = v1.y + @ r5 = + @ r6 = _out.h+1 + @ r7 = _out.w + @ r8 = maskbit + @ r9 = v1.destptr + @ r10= mask + @ r11= + @ r12= height = _height + @ r14= len = v1.replen + CMP r14,#0 + BEQ outerloop + ADD r12,r12,#1 @ r12= height++ + MOV r5,r12 @ r5 = loopCount = height + CMP r5,r14 @ if (loopCount > len) + MOVGT r5,r14 @ loopCount = len + SUB r12,r12,r5 + SUB r14,r14,r5 + STR r12,[r13,#height] + STR r14,[r13,#len] + LDR r12,[r13,#pitch] + B startpos + +outerloop: + @ r0 = _scaleY + @ r1 = color + @ r2 = &v1.scaletable[scaleIndexY] + @ r3 = x + @ r4 = y + @ r5 = + @ r6 = _out.h+1 + @ r7 = _out.w + @ r8 = maskbit + @ r9 = dst + @ r10= mask + @ r11= + @ r12= height + @ r14= + + LDR r11,[r13,#src] + LDR r5,[r13,#v1_shr] + @ stall + LDRB r14,[r11],#1 @ r14= len = *src++ + @ stall + @ stall + MOV r1, r14,LSR r5 @ r1 = color = len>>v1.shr + BICS r14,r14,r1,LSL r5 @ r14= len + LDREQB r14,[r11],#1 @ if (!len) r14 = len = *src++ + STR r11,[r13,#src] +middleloop: + @ r0 = _scaleY + @ r1 = color + @ r2 = &v1.scaletable[scaleIndexY] + @ r3 = x + @ r4 = y + @ r5 = loopCount = min(height,len) + @ r6 = _out.h+1 + @ r7 = _out.w + @ r8 = maskbit + @ r9 = dst + @ r10= mask + @ r11= + @ r12= height + @ r14= len + + MOV r5,r12 @ loopCount = height + CMP r5,r14 @ if (loopCount > len) + MOVGT r5,r14 @ loopCount = len + SUB r12,r12,r5 @ height -= loopCount + SUB r14,r14,r5 @ len -= loopCount + STR r12,[r13,#height] + STR r14,[r13,#len] + + LDR r11,[r13,#_numStrips] + LDR r12,[r13,#pitch] + @ r0 = _scaleY + @ r1 = color + @ r2 = &v1.scaletable[scaleIndexY] + @ r3 = x + @ r4 = y + @ r5 = loopCount = min(height,len) + @ r6 = _out.h+1 + @ r7 = _out.w + @ r8 = maskbit + @ r9 = dst + @ r10= mask + @ r11= _numStrips + @ r12= _out.pitch + @ r14= scratch +innerloop: + @ inner loop + CMP r0,#254 @ if _scaleY <= 254 + LDRLEB r14,[r2],#1 @ r14 = v1.scaletable[scaleIndexY++] + @ stallLE + @ stallLE + CMPLE r0,r14 @ || _scaleY >= r14 + BLE startpos + + LDRB r14,[r10],r11 @ r14 = mask[0] mask += _numStrips + ADDS r4,r4,#1 @ y >= 0 (equiv to y>-1,y+1>0) + CMPGT r1,#0 @ && color > 0 + CMPGT r6,r4 @ && _out.h+1 > y+1 + CMNGT r3,#1 @ && x >= 0 (equiv to x>-1,x+1>0) + CMPGT r7,r3 @ && _out.w > x + BLE masked + TST r14,r8 @ && !(mask[0] && maskbit) + LDREQ r14,[r13,#_palette] + BNE masked + + @ stall + LDRB r14,[r14,r1] @ r14 = pcolor = _palette[color] + @ stall + @ stall + CMP r14,#13 @ if (pcolor == 13) + LDREQ r12,[r13,#_shadow_table] + LDREQB r14,[r9] @ r14 = *dst + @ stallEQ + @ stallEQ + LDREQB r14,[r12,r14] @ r14 = pcolor=_shadow_tab[r14] + LDREQ r12,[r13,#pitch] + @ stallEQ + STRB r14,[r9] @ *dst = pcolor +masked: + ADD r9,r9,r12 @ dst += _out.pitch +startpos: + SUBS r5,r5,#1 @ loopCount -=1 + BNE innerloop + + @ So we either ran out of height or len (or both) + LDR r12,[r13,#height] + LDR r14,[r13,#len] + CMP r12,#0 + BNE notheight + LDR r10,[r13,#v1_skip_width] + LDR r11,[r13,#_scaleX] + LDR r2,[r13,#scaleIdxXPtr] + LDR r12,[r13,#v1_scaleXstep] + SUBS r10,r10,#1 @ if --v1.skip_width == 0 + BEQ end @ return _scaleIndexX + LDRB r9,[r2],r12 @ r2 = scaleIdxXPtr+=sclXstp + STR r10,[r13,#v1_skip_width] + CMP r11,#254 @ if _scaleX <= 254 + CMPLE r11,r9 @ && _scaleX <= scltb[scIdxX] + LDR r9,[r13,#v1_destptr] @ r9 = dst = v1.destptr + BLE noXstep + + SUB r11,r7,#1 + ADDS r3,r3,r12 @ v1.x += v1.scaleXstep + @ if v1.x < 0 || + CMPGE r11,r3 @ _out.w-1 < v1.x + BLT end + AND r11,r3,#7 + MOV r8,#0x80 + MOV r8,r8,LSR r11 @ maskbit = revBitMask(v1.x & 7) + ADD r9,r9,r12 @ r10 = v1.destptr += v1.scaleXstep + STR r9,[r13,#v1_destptr] +noXstep: + STR r2,[r13,#scaleIdxXPtr] + LDR r10,[r13,#v1_mask_ptr] + LDR r12,[r13,#_height] @ r12= height = _height + LDR r4,[r13,#v1_y] @ r4 = y = v1.y + LDR r2,[r13,#scaleIdxYPtr] @ r2 = v1.scaletable[sclIdxY] + ADD r10,r10,r3,ASR #3 @ mask=v1.mask_ptr+(v1.x>>3) +notheight: + CMP r14,#0 @ while (len > 0) + BGT middleloop + B outerloop @ while (1) +end: + LDR r0,[r13,#v1_scaletable] + SUB r0,r2,r0 + ADD r13,r13,#space + LDMFD r13!,{r3-r11,PC} -- cgit v1.2.3