summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Guillen Fandos2021-03-30 21:06:52 +0200
committerDavid Guillen Fandos2021-03-30 21:06:52 +0200
commit71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f (patch)
treeb3feb2dcf190c1ba3f954d026a5e795bedf3d8f2
parent336b14a876ceb45fe4d0a70e6df3301d1cdf25ba (diff)
downloadpicogpsp-71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f.tar.gz
picogpsp-71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f.tar.bz2
picogpsp-71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f.zip
Improve indirect jumps in ARM
Handle already translated blocks in the ARM asm to speed up indirect branches (affect some games more than others)
-rw-r--r--arm/arm_stub.S161
-rw-r--r--cpu.h3
-rw-r--r--cpu_threaded.c4
-rw-r--r--gba_memory.c2
-rw-r--r--libretro.c2
-rw-r--r--main.c2
6 files changed, 81 insertions, 93 deletions
diff --git a/arm/arm_stub.S b/arm/arm_stub.S
index 8160bfe..5be4ca4 100644
--- a/arm/arm_stub.S
+++ b/arm/arm_stub.S
@@ -156,6 +156,66 @@ _##symbol:
ldmia sp!, { call_c_saved_regs } ;\
ldr sp, =reg ;\
+@ Jumps to PC (ARM or Thumb modes)
+@ This is really two functions/routines in one
+@ r0 contains the PC
+
+.align 2
+#define execute_pc_builder(mode, align) ;\
+defsymbl(arm_indirect_branch_##mode) ;\
+ save_flags() ;\
+execute_pc_##mode: ;\
+ bic r0, r0, #(align) /* Align PC */;\
+ mov r1, r0, lsr #24 /* Get region */;\
+ cmp r1, #2 ;\
+ beq 1f /* ewram */;\
+ cmp r1, #3 ;\
+ beq 2f /* iwram */;\
+3: ;\
+ call_c_function(block_lookup_address_##mode) ;\
+ restore_flags() ;\
+ bx r0 ;\
+1: ;\
+ ldr r1, =(ewram+0x40000) /* Load base addr */;\
+ mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\
+ mov r2, r2, lsr #14 ;\
+ ldrh r2, [r1, r2] /* Load half word there */;\
+ ldr r1, =(ram_block_ptrs) ;\
+ ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
+ cmp r1, #0 /* NULL means not translated */;\
+ beq 3b /* Need to translate */;\
+ restore_flags() ;\
+ bx r1 ;\
+2: ;\
+ ldr r1, =(iwram) /* Load base addr */;\
+ mov r2, r0, lsl #17 /* addr &= 0x7fff */;\
+ mov r2, r2, lsr #17 ;\
+ ldrh r2, [r1, r2] /* Load half word there */;\
+ ldr r1, =(ram_block_ptrs) ;\
+ ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
+ cmp r1, #0 /* NULL means not translated */;\
+ beq 3b /* Need to translate */;\
+ restore_flags() ;\
+ bx r1 ;\
+
+
+execute_pc_builder(arm, 0x3)
+execute_pc_builder(thumb, 0x1)
+
+@ Resumes execution from saved PC, in any mode
+
+execute_pc:
+ ldr r0, [reg_base, #REG_PC] @ load new PC
+ ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
+ tst r1, #0x20 @ see if Thumb bit is set
+ bne 2f
+
+ load_registers_arm()
+ b execute_pc_arm
+
+2:
+ load_registers_thumb()
+ b execute_pc_thumb
@ Update the GBA hardware (video, sound, input, etc)
@@ -201,28 +261,11 @@ wait_halt_##name: ;\
;\
ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\
cmp r0, #0 /* see if PC has changed */;\
- beq 1f /* if not return */;\
- ;\
- ldr r0, [reg_base, #REG_PC] /* load new PC */;\
- ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\
- tst r1, #0x20 /* see if Thumb bit is set */;\
- bne 2f /* if so load Thumb PC */;\
- ;\
- load_registers_arm() /* load ARM regs */;\
- call_c_function(block_lookup_address_arm) ;\
- restore_flags() ;\
- bx r0 /* jump to new ARM block */;\
+ bne execute_pc /* go jump/translate */;\
;\
-1: ;\
load_registers_##mode() /* reload registers */;\
restore_flags() ;\
- return_##return_op() ;\
- ;\
-2: ;\
- load_registers_thumb() /* load Thumb regs */;\
- call_c_function(block_lookup_address_thumb) ;\
- restore_flags() ;\
- bx r0 /* jump to new ARM block */;\
+ return_##return_op() /* continue, no PC change */;\
arm_update_gba_builder(arm, arm, straight)
@@ -240,58 +283,31 @@ arm_update_gba_builder(idle_thumb, thumb, add)
@ r0: PC to branch to
.align 2
-defsymbl(arm_indirect_branch_arm)
- save_flags()
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0
-
-.align 2
-defsymbl(arm_indirect_branch_thumb)
- save_flags()
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0
-
-.align 2
defsymbl(arm_indirect_branch_dual_arm)
save_flags()
tst r0, #0x01 @ check lower bit
- bne 1f @ if set going to Thumb mode
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ return
+ beq execute_pc_arm @ Keep executing ARM code
-1:
- bic r0, r0, #0x01
+ bic r0, r0, #0x01 @ Switch to Thumb mode
store_registers_arm() @ save out ARM registers
load_registers_thumb() @ load in Thumb registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
orr r1, r1, #0x20 @ set Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ return
+ b execute_pc_thumb @ Now execute Thumb
.align 2
defsymbl(arm_indirect_branch_dual_thumb)
save_flags()
tst r0, #0x01 @ check lower bit
- beq 1f @ if set going to ARM mode
- bic r0, r0, #0x01
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ return
+ bne execute_pc_thumb @ Keep executing Thumb mode
-1:
store_registers_thumb() @ save out Thumb registers
load_registers_arm() @ load in ARM registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
bic r1, r1, #0x20 @ clear Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ return
+ b execute_pc_arm @ Now execute ARM
@ Update the cpsr.
@@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr)
cmp r0, #0 @ check new PC
beq 1f @ if it's zero, return
- call_c_function(block_lookup_address_arm)
-
- restore_flags()
- bx r0 @ return to new ARM address
+ b execute_pc_arm
1:
restore_flags()
@@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore)
bne 2f @ if so handle it
load_registers_arm() @ restore ARM registers
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0
+ b execute_pc_arm
2:
load_registers_thumb() @ load Thumb registers
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0
-
+ b execute_pc_thumb
@ Setup the mode transition work for calling an SWI.
@@ -718,21 +726,7 @@ alert_loop:
bne alert_loop @ Keep looping until it is
mvn reg_cycles, r0 @ load new cycle count
- ldr r0, [reg_base, #REG_PC] @ load new PC
- ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
- tst r1, #0x20 @ see if Thumb bit is set
- bne 2f
-
- load_registers_arm()
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ jump to new ARM block
-
-2:
- load_registers_thumb()
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ jump to new Thumb block
+ b execute_pc @ restart execution at PC
4:
restore_flags()
@@ -746,17 +740,8 @@ lookup_pc:
ldr r0, [reg_base, #REG_PC] @ r0 = new pc
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
tst r1, #0x20 @ see if Thumb bit is set
- beq lookup_pc_arm @ if not lookup ARM
-
-lookup_pc_thumb:
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ jump to new Thumb block
-
-lookup_pc_arm:
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ jump to new ARM block
+ beq execute_pc_arm @ if not lookup ARM
+ b execute_pc_thumb
#define sign_extend_u8(reg)
diff --git a/cpu.h b/cpu.h
index 2b250ca..2dacd6a 100644
--- a/cpu.h
+++ b/cpu.h
@@ -157,7 +157,8 @@ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
void flush_translation_cache_rom(void);
void flush_translation_cache_ram(void);
void dump_translation_cache(void);
-void wipe_caches(void);
+void init_caches(void);
+void init_emitter(void);
extern u32 reg_mode[7][7];
extern u32 spsr[6];
diff --git a/cpu_threaded.c b/cpu_threaded.c
index 7f12b4f..e5c027e 100644
--- a/cpu_threaded.c
+++ b/cpu_threaded.c
@@ -3644,7 +3644,7 @@ void flush_translation_cache_rom(void)
memset(rom_branch_hash, 0, sizeof(rom_branch_hash));
}
-void wipe_caches(void)
+void init_caches(void)
{
/* Ensure we wipe everything including the SMC mirrors */
flush_translation_cache_rom();
@@ -3653,6 +3653,8 @@ void wipe_caches(void)
iwram_code_min = 0;
iwram_code_max = 0x7FFF;
flush_translation_cache_ram();
+ /* Ensure 0 and FFFF get zeroed out */
+ memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs));
}
#define cache_dump_prefix ""
diff --git a/gba_memory.c b/gba_memory.c
index b66dce7..8d3d39e 100644
--- a/gba_memory.c
+++ b/gba_memory.c
@@ -3322,7 +3322,7 @@ void gba_load_state(const void* src)
#ifdef HAVE_DYNAREC
if (dynarec_enable)
- wipe_caches();
+ init_caches();
#endif
reg[OAM_UPDATED] = 1;
diff --git a/libretro.c b/libretro.c
index d94ddcb..0373c94 100644
--- a/libretro.c
+++ b/libretro.c
@@ -675,7 +675,7 @@ static void check_variables(int started_from_load)
dynarec_enable = 1;
if (dynarec_enable != prevvalue)
- wipe_caches();
+ init_caches();
}
else
dynarec_enable = 1;
diff --git a/main.c b/main.c
index 2a82338..759aa94 100644
--- a/main.c
+++ b/main.c
@@ -114,7 +114,7 @@ void init_main(void)
video_count = 960;
#ifdef HAVE_DYNAREC
- wipe_caches();
+ init_caches();
init_emitter();
#endif
}