diff options
author | neonloop | 2022-02-09 07:20:17 +0000 |
---|---|---|
committer | neonloop | 2022-02-09 07:20:17 +0000 |
commit | 479872a8d30b092671ed49868748e48830bc36da (patch) | |
tree | e80b622d8fd4b0d30f0a265d8c57ffb9ae738cc4 /source/tile.c | |
parent | 874c431fdaff24413886416ed3ffa3455681ac01 (diff) | |
download | snes9x2005-asmgfx.tar.gz snes9x2005-asmgfx.tar.bz2 snes9x2005-asmgfx.zip |
Adds few assembly gfx functions from snes9x2002asmgfx
Diffstat (limited to 'source/tile.c')
-rw-r--r-- | source/tile.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/source/tile.c b/source/tile.c index b384561..7bca4c3 100644 --- a/source/tile.c +++ b/source/tile.c @@ -11,6 +11,173 @@ extern uint32_t HeadMask [4]; extern uint32_t TailMask [5]; +#ifdef ARM_ASM + +#define f(from, to_lo, to_hi, pix) \ + " movs " #from ", " #from ", lsl #(17) \n" \ + " addcs " #to_hi ", " #to_hi ", #(1 << ( 0 + 1 + " #pix ")) \n" \ + " addmi " #to_hi ", " #to_hi ", #(1 << ( 8 + 1 + " #pix ")) \n" \ + " movs " #from ", " #from ", lsl #2 \n" \ + " addcs " #to_hi ", " #to_hi ", #(1 << (16 + 1 + " #pix ")) \n" \ + " addmi " #to_hi ", " #to_hi ", #(1 << (24 + 1 + " #pix ")) \n" \ + " movs " #from ", " #from ", lsl #2 \n"\ + " addcs " #to_lo ", " #to_lo ", #(1 << ( 0 + 1 + " #pix ")) \n" \ + " addmi " #to_lo ", " #to_lo ", #(1 << ( 8 + 1 + " #pix ")) \n" \ + " movs " #from ", " #from ", lsl #2 \n" \ + " addcs " #to_lo ", " #to_lo ", #(1 << (16 + 1 + " #pix ")) \n" \ + " addmi " #to_lo ", " #to_lo ", #(1 << (24 + 1 + " #pix ")) \n" \ + \ + " movs " #from ", " #from ", lsl #2 \n"\ + " addcs " #to_hi ", " #to_hi ", #(1 << ( 0 + " #pix ")) \n"\ + " addmi " #to_hi ", " #to_hi ", #(1 << ( 8 + " #pix ")) \n" \ + " movs " #from ", " #from ", lsl #2 \n"\ + " addcs " #to_hi ", " #to_hi ", #(1 << (16 + " #pix ")) \n" \ + " addmi " #to_hi ", " #to_hi ", #(1 << (24 + " #pix ")) \n"\ + " movs " #from ", " #from ", lsl #2 \n"\ + " addcs " #to_lo ", " #to_lo ", #(1 << ( 0 + " #pix ")) \n"\ + " addmi " #to_lo ", " #to_lo ", #(1 << ( 8 + " #pix ")) \n" \ + " movs " #from ", " #from ", lsl #2 \n"\ + " addcs " #to_lo ", " #to_lo ", #(1 << (16 + " #pix ")) \n" \ + " addmi " #to_lo ", " #to_lo ", #(1 << (24 + " #pix ")) \n" + +uint8_t ConvertTile8bpp(uint8_t* pCache, uint32_t TileAddr) +{ + uint8_t* tp = &Memory.VRAM[TileAddr]; + uint32_t* p = (uint32_t*) pCache; + uint32_t non_zero; + + __asm__ volatile( + " mov r0, #8 \n" + " mov %[non_zero], #0 \n" + + "1: \n" + + " mov r1, #0 \n" + " mov r2, #0 \n" + + " ldrh r3, [%[tp], #16] \n" + " ldrh r4, [%[tp], #32] \n" + + f(r3, r2, r1, 2) + f(r4, r2, r1, 4) + + " ldrh r3, [%[tp], #48] \n" + " ldrh r4, [%[tp]], #2 \n" + + f(r3, r2, r1, 6) + f(r4, r2, r1, 0) + + " stmia %[p]!, {r1, r2} \n" + + " orr %[non_zero], %[non_zero], r1 \n" + " orr %[non_zero], %[non_zero], r2 \n" + + " subs r0, r0, #1 \n" + " bne 1b \n" + + : [non_zero] "+r"(non_zero), + [tp] "+r"(tp), + [p] "+r"(p) + : + : "r0", "r1", "r2", "r3", "r4", "cc" + ); + + return (non_zero ? 1 : BLANK_TILE); +} + +uint8_t ConvertTile4bpp(uint8_t* pCache, uint32_t TileAddr) +{ + uint8_t* tp = &Memory.VRAM[TileAddr]; + uint32_t* p = (uint32_t*) pCache; + uint32_t non_zero; + + __asm__ volatile( + " mov r0, #8 \n" + " mov %[non_zero], #0 \n" + "1: \n" + + " mov r1, #0 \n" + " mov r2, #0 \n" + + " ldrh r3, [%[tp], #16]\n" + " ldrh r4, [%[tp]], #2 \n" + + f(r3, r2, r1, 2) + f(r4, r2, r1, 0) + + " stmia %[p]!, {r1, r2} \n" + + " orr %[non_zero], %[non_zero], r1 \n" + " orr %[non_zero], %[non_zero], r2 \n" + + " subs r0, r0, #1 \n" + " bne 1b \n" + + : [non_zero] "+r"(non_zero), + [tp] "+r"(tp), + [p] "+r"(p) + : + : "r0", "r1", "r2", "r3", "r4", "cc" + ); + + return (non_zero ? 1 : BLANK_TILE); +} + +uint8_t ConvertTile2bpp(uint8_t* pCache, uint32_t TileAddr) +{ + uint8_t* tp = &Memory.VRAM[TileAddr]; + uint32_t* p = (uint32_t*) pCache; + uint32_t non_zero; + + __asm__ volatile( + " mov r0, #8 \n" + " mov %[non_zero], #0 \n" + "1: \n" + + " ldrh r3, [%[tp]], #2 \n" + + " mov r1, #0 \n" + " mov r2, #0 \n" + + f(r3, r2, r1, 0) + + " stmia %[p]!, {r1, r2} \n" + + " orr %[non_zero], %[non_zero], r1 \n" + " orr %[non_zero], %[non_zero], r2 \n" + + " subs r0, r0, #1 \n" + " bne 1b \n" + + : [non_zero] "+r"(non_zero), + [tp] "+r"(tp), + [p] "+r"(p) + : + : "r0", "r1", "r2", "r3", "cc" + ); + + return (non_zero ? 1 : BLANK_TILE); +} + + +uint8_t(*ConvertTile)(uint8_t* pCache, uint32_t TileAddr); +void SelectConvertTile() +{ + switch (BG.BitShift) + { + + case 8: + ConvertTile = &ConvertTile8bpp; + break; + case 4: + ConvertTile = &ConvertTile4bpp; + break; + case 2: + ConvertTile = &ConvertTile2bpp; + break; + } +} +#else static uint8_t ConvertTile(uint8_t* pCache, uint32_t TileAddr) { uint8_t* tp = &Memory.VRAM[TileAddr]; @@ -123,6 +290,7 @@ static uint8_t ConvertTile(uint8_t* pCache, uint32_t TileAddr) } return non_zero ? 1 : BLANK_TILE; } +#endif #define PLOT_PIXEL(screen, pixel) (pixel) @@ -197,6 +365,56 @@ static INLINE void WRITE_4PIXELS16(int32_t Offset, uint8_t* Pixels, uint16_t* Sc : /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2) : /* clobber */ "memory" ); +#elif defined(ARM_ASM) + uint16_t *Screen = (uint16_t *) GFX.S + Offset; + uint8_t *Depth = GFX.DB + Offset; + uint32_t t1, t2; + __asm__ __volatile__ ( + "ldrb %[t1], [%[Depth]] \n" + "ldrb %[t2], [%[Depth], #1] \n" + + "cmp %[ZCompare], %[t1] \n" + "ldrhib %[t1], [%[Pixels]] \n" + "bls 2f \n" + "lsls %[t1], %[t1], #1 \n" + "ldrneh %[t1], [%[ScreenColors], %[t1]] \n" + "strneb %[ZSet], [%[Depth]] \n" + "strneh %[t1], [%[Screen]] \n" + + "2: \n" + "ldrb %[t1], [%[Depth], #2] \n" + "cmp %[ZCompare], %[t2] \n" + "ldrhib %[t2], [%[Pixels], #1] \n" + "bls 3f \n" + "lsls %[t2], %[t2], #1 \n" + "ldrneh %[t2], [%[ScreenColors], %[t2]] \n" + "strneb %[ZSet], [%[Depth], #1] \n" + "strneh %[t2], [%[Screen], #2] \n" + + "3: \n" + "ldrb %[t2], [%[Depth], #3] \n" + "cmp %[ZCompare], %[t1] \n" + "ldrhib %[t1], [%[Pixels], #2] \n" + "bls 4f \n" + "lsls %[t1], %[t1], #1 \n" + "ldrneh %[t1], [%[ScreenColors], %[t1]] \n" + "strneb %[ZSet], [%[Depth], #2] \n" + "strneh %[t1], [%[Screen], #4] \n" + + "4: \n" + "cmp %[ZCompare], %[t2] \n" + "ldrhib %[t2], [%[Pixels], #3] \n" + "bls 5f \n" + "lsls %[t2], %[t2], #1 \n" + "ldrneh %[t2], [%[ScreenColors], %[t2]] \n" + "strneb %[ZSet], [%[Depth], #3] \n" + "strneh %[t2], [%[Screen], #6] \n" + + "5: \n" + : [t1] "=&r" (t1), [t2] "=&r" (t2) + : [Screen] "r" (Screen), [Depth] "r" (Depth), [Pixels] "r" (Pixels), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2), [ScreenColors] "r" (ScreenColors) + : "cc", "memory" + ); #else uint8_t Pixel, N; uint16_t* Screen = (uint16_t*) GFX.S + Offset; @@ -284,6 +502,56 @@ static INLINE void WRITE_4PIXELS16_FLIPPED(int32_t Offset, uint8_t* Pixels, uint : /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2) : /* clobber */ "memory" ); +#elif defined(ARM_ASM) + uint16_t *Screen = (uint16_t *) GFX.S + Offset; + uint8_t *Depth = GFX.DB + Offset; + uint32_t t1, t2; + __asm__ __volatile__ ( + "ldrb %[t1], [%[Depth]] \n" + "ldrb %[t2], [%[Depth], #1] \n" + + "cmp %[ZCompare], %[t1] \n" + "ldrhib %[t1], [%[Pixels], #3] \n" + "bls 2f \n" + "lsls %[t1], %[t1], #1 \n" + "ldrneh %[t1], [%[ScreenColors], %[t1]] \n" + "strneb %[ZSet], [%[Depth]] \n" + "strneh %[t1], [%[Screen]] \n" + + "2: \n" + "ldrb %[t1], [%[Depth], #2] \n" + "cmp %[ZCompare], %[t2] \n" + "ldrhib %[t2], [%[Pixels], #2] \n" + "bls 3f \n" + "lsls %[t2], %[t2], #1 \n" + "ldrneh %[t2], [%[ScreenColors], %[t2]] \n" + "strneb %[ZSet], [%[Depth], #1] \n" + "strneh %[t2], [%[Screen], #2] \n" + + "3: \n" + "ldrb %[t2], [%[Depth], #3] \n" + "cmp %[ZCompare], %[t1] \n" + "ldrhib %[t1], [%[Pixels], #1] \n" + "bls 4f \n" + "lsls %[t1], %[t1], #1 \n" + "ldrneh %[t1], [%[ScreenColors], %[t1]] \n" + "strneb %[ZSet], [%[Depth], #2] \n" + "strneh %[t1], [%[Screen], #4] \n" + + "4: \n" + "cmp %[ZCompare], %[t2] \n" + "ldrhib %[t2], [%[Pixels]] \n" + "bls 5f \n" + "lsls %[t2], %[t2], #1 \n" + "ldrneh %[t2], [%[ScreenColors], %[t2]] \n" + "strneb %[ZSet], [%[Depth], #3] \n" + "strneh %[t2], [%[Screen], #6] \n" + + "5: \n" + : [t1] "=&r" (t1), [t2] "=&r" (t2) + : [Screen] "r" (Screen), [Depth] "r" (Depth), [Pixels] "r" (Pixels), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2), [ScreenColors] "r" (ScreenColors) + : "cc", "memory" + ); #else uint8_t Pixel, N; uint16_t* Screen = (uint16_t*) GFX.S + Offset; |