diff options
| author | Neil Millstone | 2007-06-30 23:42:58 +0000 | 
|---|---|---|
| committer | Neil Millstone | 2007-06-30 23:42:58 +0000 | 
| commit | b49b2b7db386b5be961bf9388c6cdc47329a1e68 (patch) | |
| tree | e1f21f5af4880dfdf47f433cdbec468382792ee9 /backends/platform/ds/arm9/source | |
| parent | 951ffd0684b8d8c2b594a990c9e60cffc63f5fb0 (diff) | |
| download | scummvm-rg350-b49b2b7db386b5be961bf9388c6cdc47329a1e68.tar.gz scummvm-rg350-b49b2b7db386b5be961bf9388c6cdc47329a1e68.tar.bz2 scummvm-rg350-b49b2b7db386b5be961bf9388c6cdc47329a1e68.zip  | |
Adding mistakenly deleted file
svn-id: r27805
Diffstat (limited to 'backends/platform/ds/arm9/source')
| -rw-r--r-- | backends/platform/ds/arm9/source/blitters.cpp | 400 | 
1 files changed, 400 insertions, 0 deletions
diff --git a/backends/platform/ds/arm9/source/blitters.cpp b/backends/platform/ds/arm9/source/blitters.cpp new file mode 100644 index 0000000000..ec33a5aab4 --- /dev/null +++ b/backends/platform/ds/arm9/source/blitters.cpp @@ -0,0 +1,400 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2005-2006 Neil Millstone + * Copyright (C) 2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. + * + */ + +#include "stdafx.h" +#include "blitters.h" +#define CHARSET_MASK_TRANSPARENCY 253 + +//#define PERFECT_5_TO_4_RESCALING + +namespace DS { + +void asmDrawStripToScreen(int height, int width, byte const* text, byte const* src, byte* dst,  +	int vsPitch, int vmScreenWidth, int textSurfacePitch) { + + +	if (height <= 0) height = 1; +	if (width < 4) return; +	 +	 +	width &= ~4; +//	src = (const byte *) (((int) (src)) & (~4)); +//	dst = (byte *) (((int) (dst)) & (~4)); +//	text = (const byte *) (((int) (text)) & (~4)); +	 +	asm (	"mov r5, %0\n"				// Height +			"yLoop:\n"			 +			"mov r3, #0\n"				// X pos +			 +			"xLoop:\n" +			 +			"ldr r4, [%2, r3]\n"		// Load text layer word +			"cmp r4, %5\n" +			"bne singleByteCompare\n" +			"ldr r4, [%3, r3]\n" +			"str r4, [%4, r3]\n" +			"add r3, r3, #4\n" +			"cmp r3, %1\n"				// x == width? +			"blt xLoop\n" +			 +			"add %2, %2, %8\n"			// src += vs->pitch +			"add %3, %3, %6\n"			// dst += _vm->_screenWidth +			"add %4, %4, %7\n"			// text += _textSurface.pitch +			"sub r5, r5, #1\n"			// y -= 1 +			"cmp r5, #0\n"				// y == 0? +			"bne yLoop\n" +			"b end\n" +			 +			 +			"singleByteCompare:\n" +			"ldrb r4, [%2, r3]\n"		// Load text byte +			"cmps r4, %5, lsr #24\n"	// Compare with mask +			"strneb r4, [%4, r3]\n"		// Store if not equal +			"ldreqb r4, [%3, r3]\n"		// Otherwise Load src byte +			"streqb r4, [%4, r3]\n"		// Store it +			"add r3, r3, #1\n" + +			"ldrb r4, [%2, r3]\n"		// Load text byte +			"cmps r4, %5, lsr #24\n"	// Compare with mask +			"strneb r4, [%4, r3]\n"		// Store if not equal +			"ldreqb r4, [%3, r3]\n"		// Otherwise Load src byte +			"streqb r4, [%4, r3]\n"		// Store it +			"add r3, r3, #1\n" +			 +			"ldrb r4, [%2, r3]\n"		// Load text byte +			"cmps r4, %5, lsr #24\n"	// Compare with mask +			"strneb r4, [%4, r3]\n"		// Store if not equal +			"ldreqb r4, [%3, r3]\n"		// Otherwise Load src byte +			"streqb r4, [%4, r3]\n"		// Store it +			"add r3, r3, #1\n" + +			"ldrb r4, [%2, r3]\n"		// Load text byte +			"cmps r4, %5, lsr #24\n"	// Compare with mask +			"strneb r4, [%4, r3]\n"		// Store if not equal +			"ldreqb r4, [%3, r3]\n"		// Otherwise Load src byte +			"streqb r4, [%4, r3]\n"		// Store it +			"add r3, r3, #1\n"			 + +			"cmps r3, %1\n"				// x == width? +			"blt xLoop\n"				// Repeat +			"add %2, %2, %8\n"			// src += vs->pitch +			"add %3, %3, %6\n"			// dst += _vm->_screenWidth +			"add %4, %4, %7\n"			// text += _textSurface.pitch +			"sub r5, r5, #1\n"			// y -= 1 +			"cmp r5, #0\n"				// y == 0? +			"bne yLoop\n" +			 +			"end:\n" +		: /* no output registers */ +		: "r" (height), "r" (width), "r" (text), "r" (src), "r" (dst), "r" (CHARSET_MASK_TRANSPARENCY | (CHARSET_MASK_TRANSPARENCY << 8) | (CHARSET_MASK_TRANSPARENCY << 16) | (CHARSET_MASK_TRANSPARENCY << 24)),  +			"r" (vsPitch), "r" (vmScreenWidth), "r" (textSurfacePitch) +		: "r5", "r3", "r4", "%2", "%3", "%4", "memory"); +} + + + +void asmCopy8Col(byte* dst, int dstPitch, const byte* src, int height) { +	asm("ands r0, %3, #1\n" +		 "addne %3, %3, #1\n" +		 "bne roll2\n" +			 +		 "yLoop2:\n" +		 "ldr r0, [%2, #0]\n" +		 "str r0, [%0, #0]\n" +		 "ldr r0, [%2, #4]\n" +		 "str r0, [%0, #4]\n" +		 "add %0, %0, %1\n" +		 "add %2, %2, %1\n" +		 "roll2:\n" +		 "ldr r0, [%2, #0]\n" +		 "str r0, [%0, #0]\n" +		 "ldr r0, [%2, #4]\n" +		 "str r0, [%0, #4]\n" +		 "add %0, %0, %1\n" +		 "add %2, %2, %1\n" +		 "subs %3, %3, #2\n" +		 "bne yLoop2\n"		  + +		: /* no output registers */ +		: "r" (dst), "r" (dstPitch), "r" (src), "r" (height) +		: "r0", "%0", "%2", "%3"); +} + +static bool isDivBy5Ready = false; +static u32  DIV_BY_5[160]; + +void ComputeDivBy5TableIFN() +{ +    if (isDivBy5Ready) +        return; +    isDivBy5Ready = true; + +    for(int i=0; i<160; ++i) +    { +        DIV_BY_5[i] = (2*i+5)/10; +    }                 +} + +#ifdef PERFECT_5_TO_4_RESCALING +static inline void RescaleBlock_5x1555_To_4x1555( u16 s0, u16 s1, u16 s2, u16 s3, u16 s4, +                                                    u16* dest) +{ +    u32 bs0 = s0 & 0x1F; +    u32 bs1 = s1 & 0x1F; +    u32 bs2 = s2 & 0x1F; +    u32 bs3 = s3 & 0x1F; +    u32 bs4 = s4 & 0x1F; + +#if 0     +    u32 gs0 = (s0 >> 5) & 0x1F; +    u32 gs1 = (s1 >> 5) & 0x1F; +    u32 gs2 = (s2 >> 5) & 0x1F; +    u32 gs3 = (s3 >> 5) & 0x1F; +    u32 gs4 = (s4 >> 5) & 0x1F; +     +    u32 rs0 = (s0 >> 10) & 0x1F; +    u32 rs1 = (s1 >> 10) & 0x1F; +    u32 rs2 = (s2 >> 10) & 0x1F; +    u32 rs3 = (s3 >> 10) & 0x1F; +    u32 rs4 = (s4 >> 10) & 0x1F; +#else +    // The compiler absolutely wants to use 0x1F as an immediate, which makes it unable to fold the shift during the and +    u32 mask = 0x1F; +    u32 gs0, gs1, gs2, gs3, gs4; +    asm("and %0, %2, %1, lsr #5" : "=r"(gs0) : "r"(s0), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs1) : "r"(s1), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs2) : "r"(s2), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs3) : "r"(s3), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs4) : "r"(s4), "r"(mask) : ); +    u32 rs0, rs1, rs2, rs3, rs4; +    asm("and %0, %2, %1, lsr #10" : "=r"(rs0) : "r"(s0), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs1) : "r"(s1), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs2) : "r"(s2), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs3) : "r"(s3), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs4) : "r"(s4), "r"(mask) : ); +#endif +     +    u32 rd0 = 4*rs0 +   rs1; +    u32 rd1 = 2*rs1 + rs1 + 2*rs2; +    u32 rd2 = 2*rs2 + 2*rs3 + rs3; +    u32 rd3 =   rs3 + 4*rs4; +     +    u32 gd0 = 4*gs0 +   gs1; +    u32 gd1 = 2*gs1 + gs1 + 2*gs2; +    u32 gd2 = 2*gs2 + 2*gs3 + gs3; +    u32 gd3 =   gs3 + 4*gs4; +     +    u32 bd0 = 4*bs0 +   bs1; +    u32 bd1 = 2*bs1 + bs1 + 2*bs2; +    u32 bd2 = 2*bs2 + 2*bs3 + bs3; +    u32 bd3 =   bs3 + 4*bs4; +     +#if 0 +    // Offsetting for correct rounding +    rd0 = rd0*2+5; rd1 = rd1*2+5; rd2 = rd2*2+5; rd3 = rd3*2+5; +    gd0 = gd0*2+5; gd1 = gd1*2+5; gd2 = gd2*2+5; gd3 = gd3*2+5; +    bd0 = bd0*2+5; bd1 = bd1*2+5; bd2 = bd2*2+5; bd3 = bd3*2+5; + +	rd0 = (rd0 * 51) >> 9; rd1 = (rd1 * 51) >> 9; rd2 = (rd2 * 51) >> 9; rd3 = (rd3 * 51) >> 9; +	gd0 = (gd0 * 51) >> 9; gd1 = (gd1 * 51) >> 9; gd2 = (gd2 * 51) >> 9; gd3 = (gd3 * 51) >> 9; +	bd0 = (bd0 * 51) >> 9; bd1 = (bd1 * 51) >> 9; bd2 = (bd2 * 51) >> 9; bd3 = (bd3 * 51) >> 9; +#else +	rd0 = DIV_BY_5[rd0]; rd1 = DIV_BY_5[rd1]; rd2 = DIV_BY_5[rd2]; rd3 = DIV_BY_5[rd3];  +	gd0 = DIV_BY_5[gd0]; gd1 = DIV_BY_5[gd1]; gd2 = DIV_BY_5[gd2]; gd3 = DIV_BY_5[gd3];  +	bd0 = DIV_BY_5[bd0]; bd1 = DIV_BY_5[bd1]; bd2 = DIV_BY_5[bd2]; bd3 = DIV_BY_5[bd3];  +#endif +     +    u32 d10 = 0x80008000 | (rd1 << 26) | (gd1 << 21) | (bd1 << 16) | (rd0 << 10) | (gd0 << 5) | bd0; +    u32 d32 = 0x80008000 | (rd3 << 26) | (gd3 << 21) | (bd3 << 16) | (rd2 << 10) | (gd2 << 5) | bd2; +     +    ((u32*)dest)[0] = d10; +    ((u32*)dest)[1] = d32; +} +#else +static inline void RescaleBlock_5x1555_To_4x1555( u16 s0, u16 s1, u16 s2, u16 s3, u16 s4, +                                                    u16* dest) +{ +    u32 ar0bs0 = s0 & 0x7C1F; +    u32 ar0bs1 = s1 & 0x7C1F; +    u32 ar0bs2 = s2 & 0x7C1F; +    u32 ar0bs3 = s3 & 0x7C1F; +    u32 ar0bs4 = s4 & 0x7C1F; + +    u32 gs0 = s0 & 0x03E0; +    u32 gs1 = s1 & 0x03E0; +    u32 gs2 = s2 & 0x03E0; +    u32 gs3 = s3 & 0x03E0; +    u32 gs4 = s4 & 0x03E0; +         +    u32 ar0bd0 = (3*ar0bs0 +   ar0bs1) >> 2; +    u32 ar0bd1 = (  ar0bs1 +   ar0bs2) >> 1; +    u32 ar0bd2 = (  ar0bs2 +   ar0bs3) >> 1; +    u32 ar0bd3 = (  ar0bs3 + 3*ar0bs4) >> 2; +     +    u32 gd0 = (3*gs0 +   gs1) >> 2; +    u32 gd1 = (  gs1 +   gs2) >> 1; +    u32 gd2 = (  gs2 +   gs3) >> 1; +    u32 gd3 = (  gs3 + 3*gs4) >> 2; + +    u32 d0 = (ar0bd0 & 0xFC1F) | (gd0 & 0x03E0); +    u32 d1 = (ar0bd1 & 0xFC1F) | (gd1 & 0x03E0); +    u32 d2 = (ar0bd2 & 0xFC1F) | (gd2 & 0x03E0); +    u32 d3 = (ar0bd3 & 0xFC1F) | (gd3 & 0x03E0); +     +    u32 d10 = 0x80008000 | (d1 << 16) | d0; +    u32 d32 = 0x80008000 | (d3 << 16) | d2; +     +    ((u32*)dest)[0] = d10; +    ((u32*)dest)[1] = d32; +} +#endif + +static inline void RescaleBlock_5x8888_To_4x1555( u32 s0, u32 s1, u32 s2, u32 s3, u32 s4, +                                                    u16* dest) +{ +    u32 d0 = 4*s0 +   s1; +    u32 d1 = 2*s1 +   s1 + 2*s2; + +    u32 bd0 = (d0 << 24) >> 24; +    u32 bd1 = (d1 << 24) >> 24; +    u32 gd0 = (d0 << 16) >> 24; +    u32 gd1 = (d1 << 16) >> 24;     +    u32 rd0 = (d0 >> 16); +    u32 rd1 = (d1 >> 16); +     +	rd0 = DIV_BY_5[rd0]; rd1 = DIV_BY_5[rd1]; +	gd0 = DIV_BY_5[gd0]; gd1 = DIV_BY_5[gd1]; +	bd0 = DIV_BY_5[bd0]; bd1 = DIV_BY_5[bd1];     +    u32 d10 = 0x80008000 | (rd1 << 26) | (gd1 << 21) | (bd1 << 16) | (rd0 << 10) | (gd0 << 5) | bd0; +    ((u32*)dest)[0] = d10; + +    u32 d2 = 2*s2 + 2*s3 +   s3; +    u32 d3 =   s3 + 4*s4; + +    u32 bd2 = (d2 << 24) >> 24; +    u32 bd3 = (d3 << 24) >> 24; +    u32 gd2 = (d2 << 16) >> 24; +    u32 gd3 = (d3 << 16) >> 24; +    u32 rd2 = (d2 >> 16); +    u32 rd3 = (d3 >> 16); + +    rd2 = DIV_BY_5[rd2]; rd3 = DIV_BY_5[rd3];  +    gd2 = DIV_BY_5[gd2]; gd3 = DIV_BY_5[gd3];  +    bd2 = DIV_BY_5[bd2]; bd3 = DIV_BY_5[bd3];  +    u32 d32 = 0x80008000 | (rd3 << 26) | (gd3 << 21) | (bd3 << 16) | (rd2 << 10) | (gd2 << 5) | bd2; +     +    ((u32*)dest)[1] = d32; +} + +// Can't work in place +#ifdef PERFECT_5_TO_4_RESCALING +static inline void Rescale_320xPAL8Scanline_To_256x1555Scanline(u16* dest, const u8* src, const u32* palette) +{ +    ComputeDivBy5TableIFN(); +     +    for(size_t i=0; i<64; ++i) +    { +        u32 s0 = palette[src[5*i+0]]; +        u32 s1 = palette[src[5*i+1]]; +        u32 s2 = palette[src[5*i+2]]; +        u32 s3 = palette[src[5*i+3]]; +        u32 s4 = palette[src[5*i+4]]; + +        RescaleBlock_5x8888_To_4x1555(s0, s1, s2, s3, s4, dest+4*i); +    } +} +#else +static inline void Rescale_320xPAL8Scanline_To_256x1555Scanline(u16* dest, const u8* src, const u16* palette) +{ +    for(size_t i=0; i<64; ++i) +    { +        u16 s0 = palette[src[5*i+0]]; +        u16 s1 = palette[src[5*i+1]]; +        u16 s2 = palette[src[5*i+2]]; +        u16 s3 = palette[src[5*i+3]]; +        u16 s4 = palette[src[5*i+4]]; + +        RescaleBlock_5x1555_To_4x1555(s0, s1, s2, s3, s4, dest+4*i); +    } +} +#endif + + +// Can work in place, because it's a contraction +static inline void Rescale_320x1555Scanline_To_256x1555Scanline(u16* dest, const u16* src) +{ +    ComputeDivBy5TableIFN(); +     +    for(size_t i=0; i<64; ++i) +    { +        u16 s0 = src[5*i+0]; +        u16 s1 = src[5*i+1]; +        u16 s2 = src[5*i+2]; +        u16 s3 = src[5*i+3]; +        u16 s4 = src[5*i+4]; + +        RescaleBlock_5x1555_To_4x1555(s0, s1, s2, s3, s4, dest+4*i); +    } +} + +#ifdef PERFECT_5_TO_4_RESCALING +void Rescale_320x256xPAL8_To_256x256x1555(u16* dest, const u8* src, const u16* palette, int destStride, int srcStride) +{ +	u32 fastRam[768]; + +    // Palette lookup -> 0_888 +    for(size_t i=0; i<256; ++i) +    { +        u32 col = palette[i]; +        u32 result = col & 0x0000001F; +        result |= (col << 3) & 0x00001F00; +        result |= (col << 6) & 0x001F0000; + +        fastRam[i] = result; +    } + +	for(size_t i=0; i<200; ++i) +	{ +		Rescale_320xPAL8Scanline_To_256x1555Scanline(dest + i*destStride, src + i *srcStride, fastRam);			 +	} +} +#else +void Rescale_320x256xPAL8_To_256x256x1555(u16* dest, const u8* src, const u16* palette, int destStride, int srcStride) +{ +	u16 fastRam[256]; +    for(size_t i=0; i<128; ++i) +        ((u32*)fastRam)[i] = ((const u32*)palette)[i]; + +	for(size_t i=0; i<200; ++i) +	{ +		Rescale_320xPAL8Scanline_To_256x1555Scanline(dest + i*destStride, src + i *srcStride, fastRam);			 +	} +} +#endif + +void Rescale_320x256x1555_To_256x256x1555(u16* dest, const u16* src, int destStride, int srcStride) +{ +	for(size_t i=0; i<200; ++i) +	{ +		Rescale_320x1555Scanline_To_256x1555Scanline(dest + i*destStride, src + i *srcStride); +	} +} + +}  | 
