summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Guillen Fandos2021-03-15 02:25:02 +0100
committerDavid Guillen Fandos2021-03-15 02:25:02 +0100
commitc86b9064df021a8d619fd0e9fd4d3525cd4a1373 (patch)
tree7b5b7517b8d36ba0894ee710105e4da3b3017636
parent6e9104e1ee0881a9e51e95b8b1d52eb0b612adf4 (diff)
downloadpicogpsp-c86b9064df021a8d619fd0e9fd4d3525cd4a1373.tar.gz
picogpsp-c86b9064df021a8d619fd0e9fd4d3525cd4a1373.tar.bz2
picogpsp-c86b9064df021a8d619fd0e9fd4d3525cd4a1373.zip
Move palettes around to simplify MIPS dynarec
Will move also OAM structures to gain a few cycles per load/store. Loads can also be optimized for an extra instruction per access.
-rw-r--r--arm/arm_stub.S8
-rw-r--r--cpu.c2
-rw-r--r--gba_memory.c2
-rw-r--r--psp/mips_stub.S60
-rw-r--r--x86/x86_stub.S6
5 files changed, 42 insertions, 36 deletions
diff --git a/arm/arm_stub.S b/arm/arm_stub.S
index 7deffc0..61b2301 100644
--- a/arm/arm_stub.S
+++ b/arm/arm_stub.S
@@ -6,6 +6,8 @@
.globl memory_map_read
.globl memory_map_write
.globl reg
+.globl palette_ram
+.globl palette_ram_converted
#define REG_R0 (0 * 4)
#define REG_R1 (1 * 4)
@@ -763,10 +765,12 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000)
.pool
+.data
+
.comm memory_map_read 0x8000
.comm memory_map_write 0x8000
-
-.data
+.comm palette_ram 0x400
+.comm palette_ram_converted 0x400
.globl reg
.globl _reg
diff --git a/cpu.c b/cpu.c
index a15b432..6b999a5 100644
--- a/cpu.c
+++ b/cpu.c
@@ -1652,6 +1652,8 @@ void raise_interrupt(irq_type irq_raised)
#ifndef HAVE_DYNAREC
u8 *memory_map_read [8 * 1024];
u8 *memory_map_write[8 * 1024];
+u16 palette_ram[512];
+u16 palette_ram_converted[512];
#endif
void execute_arm(u32 cycles)
diff --git a/gba_memory.c b/gba_memory.c
index 47359d0..0727279 100644
--- a/gba_memory.c
+++ b/gba_memory.c
@@ -305,9 +305,7 @@ u32 gamepak_waitstate_sequential[2][3][3] =
}
};
-u16 palette_ram[512];
u16 oam_ram[512];
-u16 palette_ram_converted[512];
u16 io_registers[1024 * 16];
u8 ewram[1024 * 256 * 2];
u8 iwram[1024 * 32 * 2];
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 2e83064..ed0e370 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -46,6 +46,8 @@
.global execute_arm_translate
.global icache_region_sync
.global reg_check
+.global palette_ram
+.global palette_ram_converted
.global memory_map_read
.global memory_map_write
@@ -2093,18 +2095,15 @@ execute_store_io_u8:
execute_store_palette_u8:
region_check 5, patch_store_u8
- lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
+ andi $2, $4, 0x3FE # align palette address
ins $5, $5, 8, 8 # double value
- andi $4, $4, 0x3FE # align palette address
- addu $2, $2, $4
- sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ addu $2, $2, $16
+ sh $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
- lui $2, %hi(palette_ram_converted)
- addu $2, $2, $4
jr $ra # return
- sh $1, %lo(palette_ram_converted)($2)
+ sh $1, 0x500($2)
execute_store_vram_u8:
translate_region_vram_store_align16 patch_store_u8
@@ -2193,17 +2192,14 @@ execute_store_io_u16:
execute_store_palette_u16:
region_check 5, patch_store_u16
- lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
- andi $4, $4, 0x3FE # wrap/align palette address
- addu $2, $2, $4
- sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ andi $2, $4, 0x3FE # wrap/align palette address
+ addu $2, $2, $16
+ sh $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
- lui $2, %hi(palette_ram_converted)
- addu $2, $2, $4
jr $ra # return
- sh $1, %lo(palette_ram_converted)($2)
+ sh $1, 0x500($2)
execute_store_vram_u16:
translate_region_vram_store_align16 patch_store_u16
@@ -2295,18 +2291,14 @@ execute_store_io_u32:
execute_store_palette_u32:
region_check 5, patch_store_u32
- lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
- andi $4, $4, 0x3FC # wrap/align palette address
- addu $2, $2, $4
- sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ andi $2, $4, 0x3FC # wrap/align palette address
+ addu $2, $2, $16
+ sw $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
- lui $2, %hi(palette_ram_converted)
- addu $2, $2, $4
- addiu $2, $2, %lo(palette_ram_converted)
- sh $1, ($2)
+ sh $1, 0x500($2)
srl $5, $5, 16 # shift down to next palette value
sll $1, $5, 1 # make green 6bits
@@ -2314,7 +2306,7 @@ execute_store_palette_u32:
ins $1, $5, 0, 5 # insert red channel into $1
jr $ra # return
- sh $1, 2($2)
+ sh $1, 0x502($2)
execute_store_vram_u32:
translate_region_vram_store_align32 patch_store_u32
@@ -2411,18 +2403,14 @@ execute_store_io_u32a:
execute_store_palette_u32a:
region_check 5, patch_store_u32a
- lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
- andi $4, $4, 0x3FC # wrap/align palette address
- addu $2, $2, $4
- sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ andi $2, $4, 0x3FC # wrap/align palette address
+ addu $2, $2, $16
+ sw $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
- lui $2, %hi(palette_ram_converted)
- addu $2, $2, $4
- addiu $2, $2, %lo(palette_ram_converted)
- sh $1, ($2)
+ sh $1, 0x500($2)
srl $5, $5, 16 # shift down to next palette value
sll $1, $5, 1 # make green 6bits
@@ -2430,7 +2418,7 @@ execute_store_palette_u32a:
ins $1, $5, 0, 5 # insert red channel into $1
jr $ra # return
- sh $1, 2($2)
+ sh $1, 0x502($2)
execute_store_vram_u32a:
translate_region_vram_store_align32 patch_store_u32a
@@ -2810,6 +2798,7 @@ execute_arm_translate:
.data
+.align 6
memory_map_read:
.space 0x8000
@@ -2819,5 +2808,12 @@ memory_map_read:
reg:
.space 0x100
+# Placed here for easy access
+palette_ram:
+ .space 0x400
+palette_ram_converted:
+ .space 0x400
+
+
memory_map_write:
.space 0x8000
diff --git a/x86/x86_stub.S b/x86/x86_stub.S
index ba997ba..9a7e359 100644
--- a/x86/x86_stub.S
+++ b/x86/x86_stub.S
@@ -69,6 +69,8 @@
.global _memory_map_read
.global _memory_map_write
.global _reg
+.global _palette_ram
+.global _palette_ram_converted
.global _oam_update
@@ -561,6 +563,10 @@ return_to_main:
_reg:
.space 0x100, 0
+_palette_ram:
+ .space 0x400
+_palette_ram_converted:
+ .space 0x400
.comm _memory_map_read 0x8000
.comm _memory_map_write 0x8000