diff options
author | Autechre | 2020-10-15 18:54:26 +0200 |
---|---|---|
committer | GitHub | 2020-10-15 18:54:26 +0200 |
commit | fed55b71325a5cd2ead019b2fe355644f7a6e794 (patch) | |
tree | 976ef8cf2b58958ad4888f97fa736916d6064386 /source/tile.c | |
parent | 5f03bb288e72cc19c04060549b486a6311afe9c9 (diff) | |
parent | 23294848d036e19ca45b8b511d343c95b9f914e6 (diff) | |
download | snes9x2005-fed55b71325a5cd2ead019b2fe355644f7a6e794.tar.gz snes9x2005-fed55b71325a5cd2ead019b2fe355644f7a6e794.tar.bz2 snes9x2005-fed55b71325a5cd2ead019b2fe355644f7a6e794.zip |
Merge pull request #76 from jdgleaver/optimisations
Backports: Colour operations from Snes9x 1.60 + MIPS optimisations from PocketSNES
Diffstat (limited to 'source/tile.c')
-rw-r--r-- | source/tile.c | 146 |
1 files changed, 144 insertions, 2 deletions
diff --git a/source/tile.c b/source/tile.c index 17a2934..b384561 100644 --- a/source/tile.c +++ b/source/tile.c @@ -126,8 +126,78 @@ static uint8_t ConvertTile(uint8_t* pCache, uint32_t TileAddr) #define PLOT_PIXEL(screen, pixel) (pixel) -static void WRITE_4PIXELS16(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenColors) +static INLINE void WRITE_4PIXELS16(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenColors) { +#if defined(__MIPSEL) && defined(__GNUC__) && !defined(NO_ASM) + uint16_t *Screen = (uint16_t *) GFX.S + Offset; + uint8_t *Depth = GFX.DB + Offset; + uint8_t Pixel_A, Pixel_B, Pixel_C, Pixel_D; + uint8_t Depth_A, Depth_B, Depth_C, Depth_D; + uint8_t Cond; + uint32_t Temp; + __asm__ __volatile__ ( + ".set noreorder \n" + " lbu %[In8A], 0(%[In8]) \n" + " lbu %[In8B], 1(%[In8]) \n" + " lbu %[In8C], 2(%[In8]) \n" + " lbu %[In8D], 3(%[In8]) \n" + " lbu %[ZA], 0(%[Z]) \n" + " lbu %[ZB], 1(%[Z]) \n" + " lbu %[ZC], 2(%[Z]) \n" + " lbu %[ZD], 3(%[Z]) \n" + /* If In8A is non-zero (opaque) and ZCompare > ZA, write the pixel to + * the screen from the palette. */ + " sltiu %[Temp], %[In8A], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZA] \n" + " or %[Cond], %[Cond], %[Temp] \n" + /* Otherwise skip to the next pixel, B. */ + " bne %[Cond], $0, 2f \n" + /* Load the address of the palette entry (16-bit) corresponding to + * this pixel (partially in the delay slot). */ + " sll %[In8A], %[In8A], 1 \n" + " addu %[Temp], %[Palette], %[In8A] \n" + /* Load the palette entry. While that's being done, store the new + * depth for this pixel. Then store to the screen. */ + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 0(%[Z]) \n" + " sh %[Temp], 0(%[Out16]) \n" + /* Now do the same for pixel B. */ + "2: sltiu %[Temp], %[In8B], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZB] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 3f \n" + " sll %[In8B], %[In8B], 1 \n" + " addu %[Temp], %[Palette], %[In8B] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 1(%[Z]) \n" + " sh %[Temp], 2(%[Out16]) \n" + /* Now do the same for pixel C. */ + "3: sltiu %[Temp], %[In8C], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZC] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 4f \n" + " sll %[In8C], %[In8C], 1 \n" + " addu %[Temp], %[Palette], %[In8C] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 2(%[Z]) \n" + " sh %[Temp], 4(%[Out16]) \n" + /* Now do the same for pixel D. */ + "4: sltiu %[Temp], %[In8D], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZD] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 5f \n" + " sll %[In8D], %[In8D], 1 \n" + " addu %[Temp], %[Palette], %[In8D] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 3(%[Z]) \n" + " sh %[Temp], 6(%[Out16]) \n" + "5: \n" + ".set reorder \n" + : /* output */ [In8A] "=&r" (Pixel_A), [In8B] "=&r" (Pixel_B), [In8C] "=&r" (Pixel_C), [In8D] "=&r" (Pixel_D), [ZA] "=&r" (Depth_A), [ZB] "=&r" (Depth_B), [ZC] "=&r" (Depth_C), [ZD] "=&r" (Depth_D), [Cond] "=&r" (Cond), [Temp] "=&r" (Temp) + : /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2) + : /* clobber */ "memory" + ); +#else uint8_t Pixel, N; uint16_t* Screen = (uint16_t*) GFX.S + Offset; uint8_t* Depth = GFX.DB + Offset; @@ -140,10 +210,81 @@ static void WRITE_4PIXELS16(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenCol Depth [N] = GFX.Z2; } } +#endif } -static void WRITE_4PIXELS16_FLIPPED(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenColors) +static INLINE void WRITE_4PIXELS16_FLIPPED(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenColors) { +#if defined(__MIPSEL) && defined(__GNUC__) && !defined(NO_ASM) + uint16_t *Screen = (uint16_t *) GFX.S + Offset; + uint8_t *Depth = GFX.DB + Offset; + uint8_t Pixel_A, Pixel_B, Pixel_C, Pixel_D; + uint8_t Depth_A, Depth_B, Depth_C, Depth_D; + uint8_t Cond; + uint32_t Temp; + __asm__ __volatile__ ( + ".set noreorder \n" + " lbu %[In8A], 3(%[In8]) \n" + " lbu %[In8B], 2(%[In8]) \n" + " lbu %[In8C], 1(%[In8]) \n" + " lbu %[In8D], 0(%[In8]) \n" + " lbu %[ZA], 0(%[Z]) \n" + " lbu %[ZB], 1(%[Z]) \n" + " lbu %[ZC], 2(%[Z]) \n" + " lbu %[ZD], 3(%[Z]) \n" + /* If In8A is non-zero (opaque) and ZCompare > ZA, write the pixel to + * the screen from the palette. */ + " sltiu %[Temp], %[In8A], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZA] \n" + " or %[Cond], %[Cond], %[Temp] \n" + /* Otherwise skip to the next pixel, B. */ + " bne %[Cond], $0, 2f \n" + /* Load the address of the palette entry (16-bit) corresponding to + * this pixel (partially in the delay slot). */ + " sll %[In8A], %[In8A], 1 \n" + " addu %[Temp], %[Palette], %[In8A] \n" + /* Load the palette entry. While that's being done, store the new + * depth for this pixel. Then store to the screen. */ + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 0(%[Z]) \n" + " sh %[Temp], 0(%[Out16]) \n" + /* Now do the same for pixel B. */ + "2: sltiu %[Temp], %[In8B], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZB] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 3f \n" + " sll %[In8B], %[In8B], 1 \n" + " addu %[Temp], %[Palette], %[In8B] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 1(%[Z]) \n" + " sh %[Temp], 2(%[Out16]) \n" + /* Now do the same for pixel C. */ + "3: sltiu %[Temp], %[In8C], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZC] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 4f \n" + " sll %[In8C], %[In8C], 1 \n" + " addu %[Temp], %[Palette], %[In8C] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 2(%[Z]) \n" + " sh %[Temp], 4(%[Out16]) \n" + /* Now do the same for pixel D. */ + "4: sltiu %[Temp], %[In8D], 1 \n" + " sltu %[Cond], %[ZCompare], %[ZD] \n" + " or %[Cond], %[Cond], %[Temp] \n" + " bne %[Cond], $0, 5f \n" + " sll %[In8D], %[In8D], 1 \n" + " addu %[Temp], %[Palette], %[In8D] \n" + " lhu %[Temp], 0(%[Temp]) \n" + " sb %[ZSet], 3(%[Z]) \n" + " sh %[Temp], 6(%[Out16]) \n" + "5: \n" + ".set reorder \n" + : /* output */ [In8A] "=&r" (Pixel_A), [In8B] "=&r" (Pixel_B), [In8C] "=&r" (Pixel_C), [In8D] "=&r" (Pixel_D), [ZA] "=&r" (Depth_A), [ZB] "=&r" (Depth_B), [ZC] "=&r" (Depth_C), [ZD] "=&r" (Depth_D), [Cond] "=&r" (Cond), [Temp] "=&r" (Temp) + : /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2) + : /* clobber */ "memory" + ); +#else uint8_t Pixel, N; uint16_t* Screen = (uint16_t*) GFX.S + Offset; uint8_t* Depth = GFX.DB + Offset; @@ -156,6 +297,7 @@ static void WRITE_4PIXELS16_FLIPPED(int32_t Offset, uint8_t* Pixels, uint16_t* S Depth [N] = GFX.Z2; } } +#endif } static void WRITE_4PIXELS16_HALFWIDTH(int32_t Offset, uint8_t* Pixels, uint16_t* ScreenColors) |