diff options
| author | Bertrand Augereau | 2006-12-03 16:09:26 +0000 | 
|---|---|---|
| committer | Bertrand Augereau | 2006-12-03 16:09:26 +0000 | 
| commit | 0b110b42daabfa8c06d97edab3ad844f24f0a034 (patch) | |
| tree | 4d1d9ca6908de48353e84886d4ab5a20e4894ec3 /backends | |
| parent | 352ed7299fe87d08273609c8527a7580a7851199 (diff) | |
| download | scummvm-rg350-0b110b42daabfa8c06d97edab3ad844f24f0a034.tar.gz scummvm-rg350-0b110b42daabfa8c06d97edab3ad844f24f0a034.tar.bz2 scummvm-rg350-0b110b42daabfa8c06d97edab3ad844f24f0a034.zip | |
NDS : Micro-optimizations for the software blitter
svn-id: r24804
Diffstat (limited to 'backends')
| -rw-r--r-- | backends/platform/ds/arm9/source/blitters.cpp | 35 | 
1 files changed, 26 insertions, 9 deletions
| diff --git a/backends/platform/ds/arm9/source/blitters.cpp b/backends/platform/ds/arm9/source/blitters.cpp index 62fe4d6294..86a69e116c 100644 --- a/backends/platform/ds/arm9/source/blitters.cpp +++ b/backends/platform/ds/arm9/source/blitters.cpp @@ -143,28 +143,45 @@ static inline void RescaleBlock_5x1555_To_4x1555( u16 s0, u16 s1, u16 s2, u16 s3      u32 bs2 = s2 & 0x1F;      u32 bs3 = s3 & 0x1F;      u32 bs4 = s4 & 0x1F; -     -    u32 gs0_4 = (s0 >> 3) & 0x7C; + +#if 0     +    u32 gs0 = (s0 >> 5) & 0x1F;      u32 gs1 = (s1 >> 5) & 0x1F;      u32 gs2 = (s2 >> 5) & 0x1F;      u32 gs3 = (s3 >> 5) & 0x1F; -    u32 gs4_4 = (s4 >> 3) & 0x7C; +    u32 gs4 = (s4 >> 5) & 0x1F; -    u32 rs0_4 = (s0 >> 8) & 0x7C; +    u32 rs0 = (s0 >> 10) & 0x1F;      u32 rs1 = (s1 >> 10) & 0x1F;      u32 rs2 = (s2 >> 10) & 0x1F;      u32 rs3 = (s3 >> 10) & 0x1F; -    u32 rs4_4 = (s4 >> 8) & 0x7C; +    u32 rs4 = (s4 >> 10) & 0x1F; +#else +    // The compiler absolutely wants to use 0x1F as an immediate, which makes it unable to fold the shift during the and +    u32 mask = 0x1F; +    u32 gs0, gs1, gs2, gs3, gs4; +    asm("and %0, %2, %1, lsr #5" : "=r"(gs0) : "r"(s0), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs1) : "r"(s1), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs2) : "r"(s2), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs3) : "r"(s3), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #5" : "=r"(gs4) : "r"(s4), "r"(mask) : ); +    u32 rs0, rs1, rs2, rs3, rs4; +    asm("and %0, %2, %1, lsr #10" : "=r"(rs0) : "r"(s0), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs1) : "r"(s1), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs2) : "r"(s2), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs3) : "r"(s3), "r"(mask) : ); +    asm("and %0, %2, %1, lsr #10" : "=r"(rs4) : "r"(s4), "r"(mask) : ); +#endif -    u32 rd0 = rs0_4 +   rs1; +    u32 rd0 = 4*rs0 +   rs1;      u32 rd1 = 2*rs1 + rs1 + 2*rs2;      u32 rd2 = 2*rs2 + 2*rs3 + rs3; -    u32 rd3 =   rs3 + rs4_4; +    u32 rd3 =   rs3 + 4*rs4; -    u32 gd0 = gs0_4 +   gs1; +    u32 gd0 = 4*gs0 +   gs1;      u32 gd1 = 2*gs1 + gs1 + 2*gs2;      u32 gd2 = 2*gs2 + 2*gs3 + gs3; -    u32 gd3 =   gs3 + gs4_4; +    u32 gd3 =   gs3 + 4*gs4;      u32 bd0 = 4*bs0 +   bs1;      u32 bd1 = 2*bs1 + bs1 + 2*bs2; | 
