aboutsummaryrefslogtreecommitdiff
path: root/source/tile.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/tile.c')
-rw-r--r--source/tile.c268
1 files changed, 268 insertions, 0 deletions
diff --git a/source/tile.c b/source/tile.c
index b384561..7bca4c3 100644
--- a/source/tile.c
+++ b/source/tile.c
@@ -11,6 +11,173 @@
extern uint32_t HeadMask [4];
extern uint32_t TailMask [5];
+#ifdef ARM_ASM
+
+#define f(from, to_lo, to_hi, pix) \
+ " movs " #from ", " #from ", lsl #(17) \n" \
+ " addcs " #to_hi ", " #to_hi ", #(1 << ( 0 + 1 + " #pix ")) \n" \
+ " addmi " #to_hi ", " #to_hi ", #(1 << ( 8 + 1 + " #pix ")) \n" \
+ " movs " #from ", " #from ", lsl #2 \n" \
+ " addcs " #to_hi ", " #to_hi ", #(1 << (16 + 1 + " #pix ")) \n" \
+ " addmi " #to_hi ", " #to_hi ", #(1 << (24 + 1 + " #pix ")) \n" \
+ " movs " #from ", " #from ", lsl #2 \n"\
+ " addcs " #to_lo ", " #to_lo ", #(1 << ( 0 + 1 + " #pix ")) \n" \
+ " addmi " #to_lo ", " #to_lo ", #(1 << ( 8 + 1 + " #pix ")) \n" \
+ " movs " #from ", " #from ", lsl #2 \n" \
+ " addcs " #to_lo ", " #to_lo ", #(1 << (16 + 1 + " #pix ")) \n" \
+ " addmi " #to_lo ", " #to_lo ", #(1 << (24 + 1 + " #pix ")) \n" \
+ \
+ " movs " #from ", " #from ", lsl #2 \n"\
+ " addcs " #to_hi ", " #to_hi ", #(1 << ( 0 + " #pix ")) \n"\
+ " addmi " #to_hi ", " #to_hi ", #(1 << ( 8 + " #pix ")) \n" \
+ " movs " #from ", " #from ", lsl #2 \n"\
+ " addcs " #to_hi ", " #to_hi ", #(1 << (16 + " #pix ")) \n" \
+ " addmi " #to_hi ", " #to_hi ", #(1 << (24 + " #pix ")) \n"\
+ " movs " #from ", " #from ", lsl #2 \n"\
+ " addcs " #to_lo ", " #to_lo ", #(1 << ( 0 + " #pix ")) \n"\
+ " addmi " #to_lo ", " #to_lo ", #(1 << ( 8 + " #pix ")) \n" \
+ " movs " #from ", " #from ", lsl #2 \n"\
+ " addcs " #to_lo ", " #to_lo ", #(1 << (16 + " #pix ")) \n" \
+ " addmi " #to_lo ", " #to_lo ", #(1 << (24 + " #pix ")) \n"
+
+uint8_t ConvertTile8bpp(uint8_t* pCache, uint32_t TileAddr)
+{
+ uint8_t* tp = &Memory.VRAM[TileAddr];
+ uint32_t* p = (uint32_t*) pCache;
+ uint32_t non_zero;
+
+ __asm__ volatile(
+ " mov r0, #8 \n"
+ " mov %[non_zero], #0 \n"
+
+ "1: \n"
+
+ " mov r1, #0 \n"
+ " mov r2, #0 \n"
+
+ " ldrh r3, [%[tp], #16] \n"
+ " ldrh r4, [%[tp], #32] \n"
+
+ f(r3, r2, r1, 2)
+ f(r4, r2, r1, 4)
+
+ " ldrh r3, [%[tp], #48] \n"
+ " ldrh r4, [%[tp]], #2 \n"
+
+ f(r3, r2, r1, 6)
+ f(r4, r2, r1, 0)
+
+ " stmia %[p]!, {r1, r2} \n"
+
+ " orr %[non_zero], %[non_zero], r1 \n"
+ " orr %[non_zero], %[non_zero], r2 \n"
+
+ " subs r0, r0, #1 \n"
+ " bne 1b \n"
+
+ : [non_zero] "+r"(non_zero),
+ [tp] "+r"(tp),
+ [p] "+r"(p)
+ :
+ : "r0", "r1", "r2", "r3", "r4", "cc"
+ );
+
+ return (non_zero ? 1 : BLANK_TILE);
+}
+
+uint8_t ConvertTile4bpp(uint8_t* pCache, uint32_t TileAddr)
+{
+ uint8_t* tp = &Memory.VRAM[TileAddr];
+ uint32_t* p = (uint32_t*) pCache;
+ uint32_t non_zero;
+
+ __asm__ volatile(
+ " mov r0, #8 \n"
+ " mov %[non_zero], #0 \n"
+ "1: \n"
+
+ " mov r1, #0 \n"
+ " mov r2, #0 \n"
+
+ " ldrh r3, [%[tp], #16]\n"
+ " ldrh r4, [%[tp]], #2 \n"
+
+ f(r3, r2, r1, 2)
+ f(r4, r2, r1, 0)
+
+ " stmia %[p]!, {r1, r2} \n"
+
+ " orr %[non_zero], %[non_zero], r1 \n"
+ " orr %[non_zero], %[non_zero], r2 \n"
+
+ " subs r0, r0, #1 \n"
+ " bne 1b \n"
+
+ : [non_zero] "+r"(non_zero),
+ [tp] "+r"(tp),
+ [p] "+r"(p)
+ :
+ : "r0", "r1", "r2", "r3", "r4", "cc"
+ );
+
+ return (non_zero ? 1 : BLANK_TILE);
+}
+
+uint8_t ConvertTile2bpp(uint8_t* pCache, uint32_t TileAddr)
+{
+ uint8_t* tp = &Memory.VRAM[TileAddr];
+ uint32_t* p = (uint32_t*) pCache;
+ uint32_t non_zero;
+
+ __asm__ volatile(
+ " mov r0, #8 \n"
+ " mov %[non_zero], #0 \n"
+ "1: \n"
+
+ " ldrh r3, [%[tp]], #2 \n"
+
+ " mov r1, #0 \n"
+ " mov r2, #0 \n"
+
+ f(r3, r2, r1, 0)
+
+ " stmia %[p]!, {r1, r2} \n"
+
+ " orr %[non_zero], %[non_zero], r1 \n"
+ " orr %[non_zero], %[non_zero], r2 \n"
+
+ " subs r0, r0, #1 \n"
+ " bne 1b \n"
+
+ : [non_zero] "+r"(non_zero),
+ [tp] "+r"(tp),
+ [p] "+r"(p)
+ :
+ : "r0", "r1", "r2", "r3", "cc"
+ );
+
+ return (non_zero ? 1 : BLANK_TILE);
+}
+
+
+uint8_t(*ConvertTile)(uint8_t* pCache, uint32_t TileAddr);
+void SelectConvertTile()
+{
+ switch (BG.BitShift)
+ {
+
+ case 8:
+ ConvertTile = &ConvertTile8bpp;
+ break;
+ case 4:
+ ConvertTile = &ConvertTile4bpp;
+ break;
+ case 2:
+ ConvertTile = &ConvertTile2bpp;
+ break;
+ }
+}
+#else
static uint8_t ConvertTile(uint8_t* pCache, uint32_t TileAddr)
{
uint8_t* tp = &Memory.VRAM[TileAddr];
@@ -123,6 +290,7 @@ static uint8_t ConvertTile(uint8_t* pCache, uint32_t TileAddr)
}
return non_zero ? 1 : BLANK_TILE;
}
+#endif
#define PLOT_PIXEL(screen, pixel) (pixel)
@@ -197,6 +365,56 @@ static INLINE void WRITE_4PIXELS16(int32_t Offset, uint8_t* Pixels, uint16_t* Sc
: /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2)
: /* clobber */ "memory"
);
+#elif defined(ARM_ASM)
+ uint16_t *Screen = (uint16_t *) GFX.S + Offset;
+ uint8_t *Depth = GFX.DB + Offset;
+ uint32_t t1, t2;
+ __asm__ __volatile__ (
+ "ldrb %[t1], [%[Depth]] \n"
+ "ldrb %[t2], [%[Depth], #1] \n"
+
+ "cmp %[ZCompare], %[t1] \n"
+ "ldrhib %[t1], [%[Pixels]] \n"
+ "bls 2f \n"
+ "lsls %[t1], %[t1], #1 \n"
+ "ldrneh %[t1], [%[ScreenColors], %[t1]] \n"
+ "strneb %[ZSet], [%[Depth]] \n"
+ "strneh %[t1], [%[Screen]] \n"
+
+ "2: \n"
+ "ldrb %[t1], [%[Depth], #2] \n"
+ "cmp %[ZCompare], %[t2] \n"
+ "ldrhib %[t2], [%[Pixels], #1] \n"
+ "bls 3f \n"
+ "lsls %[t2], %[t2], #1 \n"
+ "ldrneh %[t2], [%[ScreenColors], %[t2]] \n"
+ "strneb %[ZSet], [%[Depth], #1] \n"
+ "strneh %[t2], [%[Screen], #2] \n"
+
+ "3: \n"
+ "ldrb %[t2], [%[Depth], #3] \n"
+ "cmp %[ZCompare], %[t1] \n"
+ "ldrhib %[t1], [%[Pixels], #2] \n"
+ "bls 4f \n"
+ "lsls %[t1], %[t1], #1 \n"
+ "ldrneh %[t1], [%[ScreenColors], %[t1]] \n"
+ "strneb %[ZSet], [%[Depth], #2] \n"
+ "strneh %[t1], [%[Screen], #4] \n"
+
+ "4: \n"
+ "cmp %[ZCompare], %[t2] \n"
+ "ldrhib %[t2], [%[Pixels], #3] \n"
+ "bls 5f \n"
+ "lsls %[t2], %[t2], #1 \n"
+ "ldrneh %[t2], [%[ScreenColors], %[t2]] \n"
+ "strneb %[ZSet], [%[Depth], #3] \n"
+ "strneh %[t2], [%[Screen], #6] \n"
+
+ "5: \n"
+ : [t1] "=&r" (t1), [t2] "=&r" (t2)
+ : [Screen] "r" (Screen), [Depth] "r" (Depth), [Pixels] "r" (Pixels), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2), [ScreenColors] "r" (ScreenColors)
+ : "cc", "memory"
+ );
#else
uint8_t Pixel, N;
uint16_t* Screen = (uint16_t*) GFX.S + Offset;
@@ -284,6 +502,56 @@ static INLINE void WRITE_4PIXELS16_FLIPPED(int32_t Offset, uint8_t* Pixels, uint
: /* input */ [Out16] "r" (Screen), [Z] "r" (Depth), [In8] "r" (Pixels), [Palette] "r" (ScreenColors), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2)
: /* clobber */ "memory"
);
+#elif defined(ARM_ASM)
+ uint16_t *Screen = (uint16_t *) GFX.S + Offset;
+ uint8_t *Depth = GFX.DB + Offset;
+ uint32_t t1, t2;
+ __asm__ __volatile__ (
+ "ldrb %[t1], [%[Depth]] \n"
+ "ldrb %[t2], [%[Depth], #1] \n"
+
+ "cmp %[ZCompare], %[t1] \n"
+ "ldrhib %[t1], [%[Pixels], #3] \n"
+ "bls 2f \n"
+ "lsls %[t1], %[t1], #1 \n"
+ "ldrneh %[t1], [%[ScreenColors], %[t1]] \n"
+ "strneb %[ZSet], [%[Depth]] \n"
+ "strneh %[t1], [%[Screen]] \n"
+
+ "2: \n"
+ "ldrb %[t1], [%[Depth], #2] \n"
+ "cmp %[ZCompare], %[t2] \n"
+ "ldrhib %[t2], [%[Pixels], #2] \n"
+ "bls 3f \n"
+ "lsls %[t2], %[t2], #1 \n"
+ "ldrneh %[t2], [%[ScreenColors], %[t2]] \n"
+ "strneb %[ZSet], [%[Depth], #1] \n"
+ "strneh %[t2], [%[Screen], #2] \n"
+
+ "3: \n"
+ "ldrb %[t2], [%[Depth], #3] \n"
+ "cmp %[ZCompare], %[t1] \n"
+ "ldrhib %[t1], [%[Pixels], #1] \n"
+ "bls 4f \n"
+ "lsls %[t1], %[t1], #1 \n"
+ "ldrneh %[t1], [%[ScreenColors], %[t1]] \n"
+ "strneb %[ZSet], [%[Depth], #2] \n"
+ "strneh %[t1], [%[Screen], #4] \n"
+
+ "4: \n"
+ "cmp %[ZCompare], %[t2] \n"
+ "ldrhib %[t2], [%[Pixels]] \n"
+ "bls 5f \n"
+ "lsls %[t2], %[t2], #1 \n"
+ "ldrneh %[t2], [%[ScreenColors], %[t2]] \n"
+ "strneb %[ZSet], [%[Depth], #3] \n"
+ "strneh %[t2], [%[Screen], #6] \n"
+
+ "5: \n"
+ : [t1] "=&r" (t1), [t2] "=&r" (t2)
+ : [Screen] "r" (Screen), [Depth] "r" (Depth), [Pixels] "r" (Pixels), [ZCompare] "r" (GFX.Z1), [ZSet] "r" (GFX.Z2), [ScreenColors] "r" (ScreenColors)
+ : "cc", "memory"
+ );
#else
uint8_t Pixel, N;
uint16_t* Screen = (uint16_t*) GFX.S + Offset;