From 336b14a876ceb45fe4d0a70e6df3301d1cdf25ba Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 30 Mar 2021 01:21:48 +0200 Subject: Improve ARM store handlers --- arm/arm_stub.S | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'arm') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 5917e82..8160bfe 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -538,19 +538,18 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type) ;\ +#define execute_store_body(store_type, tblnum) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ - str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ - tst r0, #0xF0000000 /* make sure address is in range */;\ - bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ - mov r4, r0, lsr #24 /* r4 = region number */;\ - ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ - ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ - bx lr /* jump to handler */;\ + mov lr, r0, lsr #24 /* lr = region number */;\ + cmp lr, #15 ;\ + movcs lr, #15 /* lr = min(lr, 15) */;\ ;\ + add lr, lr, #(16*tblnum + 64) /* lr += table offset */;\ + ldr pc, [reg_base, lr, lsl #2] /* jump to handler */;\ + +#define store_fnptr_table(store_type) ;\ ptr_tbl_##store_type: ;\ .word ext_store_ignore /* 0x00: BIOS, ignore */;\ .word ext_store_ignore /* 0x01: ignore */;\ @@ -576,11 +575,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op, tn) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type) ;\ + execute_store_body(store_type, tn) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -626,7 +625,7 @@ ext_store_vram_u##store_type: ;\ ;\ ext_store_oam_ram_u##store_type: ;\ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ - add r2, reg_base, #256 /* r2 = oam ram base */;\ + sub r2, reg_base, #0x400 /* r2 = oam ram base */;\ store_op16 r1, [r0, r2] /* store data */;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -640,14 +639,14 @@ ext_store_oam_ram_u##store_type: ;\ b smc_write /* perform smc write */;\ -execute_store_builder(8, strb, strh, ldrb) -execute_store_builder(16, strh, strh, ldrh) -execute_store_builder(32, str, str, ldr) +execute_store_builder(8, strb, strh, ldrb, 0) +execute_store_builder(16, strh, strh, ldrh, 1) +execute_store_builder(32, str, str, ldr, 2) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe) + execute_store_body(32_safe, 3) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -682,7 +681,7 @@ ext_store_vram_u32_safe: ext_store_oam_ram_u32_safe: mask_addr_8(10) @ Mask to mirror memory (no need to align!) - add r2, reg_base, #256 @ r2 = oam ram base + sub r2, reg_base, #0x400 @ r2 = oam ram base str r1, [r0, r2] @ store data str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here restore_flags() @@ -842,10 +841,15 @@ defsymbl(spsr) defsymbl(reg_mode) .space 196 -defsymbl(reg) - .space 0x100, 0 defsymbl(oam_ram) .space 0x400 +defsymbl(reg) + .space 0x100, 0 +@ Store pointer tables down here +store_fnptr_table(8) +store_fnptr_table(16) +store_fnptr_table(32) +store_fnptr_table(32_safe) @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. -- cgit v1.2.3 From 71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 30 Mar 2021 21:06:52 +0200 Subject: Improve indirect jumps in ARM Handle already translated blocks in the ARM asm to speed up indirect branches (affect some games more than others) --- arm/arm_stub.S | 161 ++++++++++++++++++++++++++------------------------------- 1 file changed, 73 insertions(+), 88 deletions(-) (limited to 'arm') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 8160bfe..5be4ca4 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -156,6 +156,66 @@ _##symbol: ldmia sp!, { call_c_saved_regs } ;\ ldr sp, =reg ;\ +@ Jumps to PC (ARM or Thumb modes) +@ This is really two functions/routines in one +@ r0 contains the PC + +.align 2 +#define execute_pc_builder(mode, align) ;\ +defsymbl(arm_indirect_branch_##mode) ;\ + save_flags() ;\ +execute_pc_##mode: ;\ + bic r0, r0, #(align) /* Align PC */;\ + mov r1, r0, lsr #24 /* Get region */;\ + cmp r1, #2 ;\ + beq 1f /* ewram */;\ + cmp r1, #3 ;\ + beq 2f /* iwram */;\ +3: ;\ + call_c_function(block_lookup_address_##mode) ;\ + restore_flags() ;\ + bx r0 ;\ +1: ;\ + ldr r1, =(ewram+0x40000) /* Load base addr */;\ + mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\ + mov r2, r2, lsr #14 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ +2: ;\ + ldr r1, =(iwram) /* Load base addr */;\ + mov r2, r0, lsl #17 /* addr &= 0x7fff */;\ + mov r2, r2, lsr #17 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ + + +execute_pc_builder(arm, 0x3) +execute_pc_builder(thumb, 0x1) + +@ Resumes execution from saved PC, in any mode + +execute_pc: + ldr r0, [reg_base, #REG_PC] @ load new PC + ldr r1, [reg_base, #REG_CPSR] @ r1 = flags + tst r1, #0x20 @ see if Thumb bit is set + bne 2f + + load_registers_arm() + b execute_pc_arm + +2: + load_registers_thumb() + b execute_pc_thumb @ Update the GBA hardware (video, sound, input, etc) @@ -201,28 +261,11 @@ wait_halt_##name: ;\ ;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ cmp r0, #0 /* see if PC has changed */;\ - beq 1f /* if not return */;\ - ;\ - ldr r0, [reg_base, #REG_PC] /* load new PC */;\ - ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\ - tst r1, #0x20 /* see if Thumb bit is set */;\ - bne 2f /* if so load Thumb PC */;\ - ;\ - load_registers_arm() /* load ARM regs */;\ - call_c_function(block_lookup_address_arm) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ + bne execute_pc /* go jump/translate */;\ ;\ -1: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ - return_##return_op() ;\ - ;\ -2: ;\ - load_registers_thumb() /* load Thumb regs */;\ - call_c_function(block_lookup_address_thumb) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ + return_##return_op() /* continue, no PC change */;\ arm_update_gba_builder(arm, arm, straight) @@ -239,59 +282,32 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ Input: @ r0: PC to branch to -.align 2 -defsymbl(arm_indirect_branch_arm) - save_flags() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 - -.align 2 -defsymbl(arm_indirect_branch_thumb) - save_flags() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - .align 2 defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit - bne 1f @ if set going to Thumb mode - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + beq execute_pc_arm @ Keep executing ARM code -1: - bic r0, r0, #0x01 + bic r0, r0, #0x01 @ Switch to Thumb mode store_registers_arm() @ save out ARM registers load_registers_thumb() @ load in Thumb registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + b execute_pc_thumb @ Now execute Thumb .align 2 defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit - beq 1f @ if set going to ARM mode - bic r0, r0, #0x01 - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + bne execute_pc_thumb @ Keep executing Thumb mode -1: store_registers_thumb() @ save out Thumb registers load_registers_arm() @ load in ARM registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + b execute_pc_arm @ Now execute ARM @ Update the cpsr. @@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr) cmp r0, #0 @ check new PC beq 1f @ if it's zero, return - call_c_function(block_lookup_address_arm) - - restore_flags() - bx r0 @ return to new ARM address + b execute_pc_arm 1: restore_flags() @@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore) bne 2f @ if so handle it load_registers_arm() @ restore ARM registers - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 + b execute_pc_arm 2: load_registers_thumb() @ load Thumb registers - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - + b execute_pc_thumb @ Setup the mode transition work for calling an SWI. @@ -718,21 +726,7 @@ alert_loop: bne alert_loop @ Keep looping until it is mvn reg_cycles, r0 @ load new cycle count - ldr r0, [reg_base, #REG_PC] @ load new PC - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags - tst r1, #0x20 @ see if Thumb bit is set - bne 2f - - load_registers_arm() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block - -2: - load_registers_thumb() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block + b execute_pc @ restart execution at PC 4: restore_flags() @@ -746,17 +740,8 @@ lookup_pc: ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set - beq lookup_pc_arm @ if not lookup ARM - -lookup_pc_thumb: - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block - -lookup_pc_arm: - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block + beq execute_pc_arm @ if not lookup ARM + b execute_pc_thumb #define sign_extend_u8(reg) -- cgit v1.2.3 From 8c14ac96192f6d966ac0ad252003a8dd3c61667a Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 2 Apr 2021 02:10:00 +0200 Subject: Add function decorators for easier debugging / profiling --- arm/arm_stub.S | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arm') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 5be4ca4..9779aa5 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -2,6 +2,7 @@ #include "../gpsp_config.h" #define defsymbl(symbol) \ +.type symbol, %function ;\ .global symbol ; \ .global _##symbol ; \ symbol: \ @@ -197,7 +198,7 @@ execute_pc_##mode: ;\ beq 3b /* Need to translate */;\ restore_flags() ;\ bx r1 ;\ - +.size arm_indirect_branch_##mode, .-arm_indirect_branch_##mode execute_pc_builder(arm, 0x3) execute_pc_builder(thumb, 0x1) @@ -266,7 +267,7 @@ wait_halt_##name: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ return_##return_op() /* continue, no PC change */;\ - +.size arm_update_gba_##mode, .-arm_update_gba_##mode arm_update_gba_builder(arm, arm, straight) arm_update_gba_builder(thumb, thumb, straight) @@ -295,6 +296,7 @@ defsymbl(arm_indirect_branch_dual_arm) orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags b execute_pc_thumb @ Now execute Thumb +.size arm_indirect_branch_dual_arm, .-arm_indirect_branch_dual_arm .align 2 defsymbl(arm_indirect_branch_dual_thumb) @@ -308,7 +310,7 @@ defsymbl(arm_indirect_branch_dual_thumb) bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags b execute_pc_arm @ Now execute ARM - +.size arm_indirect_branch_dual_thumb, .-arm_indirect_branch_dual_thumb @ Update the cpsr. @@ -340,7 +342,7 @@ defsymbl(execute_store_cpsr) 1: restore_flags() add pc, lr, #4 @ return - +.size execute_store_cpsr, .-execute_store_cpsr @ Update the current spsr. @@ -354,6 +356,7 @@ defsymbl(execute_store_spsr) ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr bx lr +.size execute_store_spsr, .-execute_store_spsr @ Read the current spsr. @@ -366,7 +369,7 @@ defsymbl(execute_read_spsr) ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] bx lr @ return - +.size execute_read_spsr, .-execute_read_spsr @ Restore the cpsr from the mode spsr and mode shift. @@ -645,7 +648,7 @@ ext_store_oam_ram_u##store_type: ;\ ldr r0, [lr] /* load PC */;\ str r0, [reg_base, #REG_PC] /* write out PC */;\ b smc_write /* perform smc write */;\ - +.size execute_store_u##store_type, .-execute_store_u##store_type execute_store_builder(8, strb, strh, ldrb, 0) execute_store_builder(16, strh, strh, ldrh, 1) @@ -694,6 +697,7 @@ ext_store_oam_ram_u32_safe: str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +.size execute_store_u32_safe, .-execute_store_u32_safe write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -804,6 +808,7 @@ ext_load_##load_type: ;\ sign_extend_##load_type(r0) /* sign extend result */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.size execute_load_##load_type, .-execute_load_##load_type .pool @@ -852,8 +857,10 @@ store_fnptr_table(32_safe) .align 4 defsymbl(rom_translation_cache) .space ROM_TRANSLATION_CACHE_SIZE +.size rom_translation_cache, .-rom_translation_cache defsymbl(ram_translation_cache) .space RAM_TRANSLATION_CACHE_SIZE +.size ram_translation_cache, .-ram_translation_cache #endif -- cgit v1.2.3 From 5b5a4db6c2963ba72a3adcace6ec055ac65f2f3d Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 3 Apr 2021 00:37:42 +0200 Subject: Add instruction tracing, for testing purposes --- arm/arm_emit.h | 24 ++++++++++++++++++++++++ arm/arm_stub.S | 23 +++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'arm') diff --git a/arm/arm_emit.h b/arm/arm_emit.h index a5dc930..a6951c2 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -1227,6 +1227,30 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) return 0; } +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_instruction(pc) \ + generate_save_flags(); \ + ARM_LDR_IMM(0, ARMREG_SP, reg_base, 34*4); \ + ARM_STMDB_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(reg_a0, pc); \ + generate_function_call(trace_instruction); \ + ARM_LDMIA_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(ARMREG_SP, (u32)reg); \ + generate_restore_flags(); + #define emit_trace_thumb_instruction(pc) \ + emit_trace_instruction(pc) + #define emit_trace_arm_instruction(pc) \ + emit_trace_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + #define arm_psr_load_new_reg() \ generate_load_reg(reg_a0, rm) \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 9779aa5..b8651cf 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -168,10 +168,25 @@ defsymbl(arm_indirect_branch_##mode) ;\ execute_pc_##mode: ;\ bic r0, r0, #(align) /* Align PC */;\ mov r1, r0, lsr #24 /* Get region */;\ - cmp r1, #2 ;\ - beq 1f /* ewram */;\ - cmp r1, #3 ;\ - beq 2f /* iwram */;\ + ldr pc, [pc, r1, lsl #2] ;\ + nop ;\ + .long 3f /* 0 BIOS (like ROM) */;\ + .long 3f /* 1 Bad region */;\ + .long 1f /* 2 EWRAM */;\ + .long 2f /* 3 IWRAM */;\ + .long 3f /* 4 Not supported */;\ + .long 3f /* 5 Not supported */;\ + .long 3f /* 6 Not supported */;\ + .long 3f /* 7 Not supported */;\ + .long 3f /* 8 ROM */;\ + .long 3f /* 9 ROM */;\ + .long 3f /* A ROM */;\ + .long 3f /* B ROM */;\ + .long 3f /* C ROM */;\ + .long 3f /* D ROM */;\ + .long 3f /* E ROM */;\ + .long 3f /* F Bad region */;\ + ;\ 3: ;\ call_c_function(block_lookup_address_##mode) ;\ restore_flags() ;\ -- cgit v1.2.3 From d83f8fbd25562dcebf26e0e71d346bc41820e239 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 27 Apr 2021 19:05:00 +0200 Subject: Fix Vita port and likely some Linux/Android hidden issues Using an invalid SP makes Vita crash (for an unkown reason) and makes things like C signal handlers crash (luckily Retroarch doesn't use them). It is also a violation of the ABI and not a great idea. Recycled some little used registers to free SP. Perf should be roughly the same. --- arm/arm_emit.h | 18 ++++++++++++------ arm/arm_stub.S | 13 +++---------- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'arm') diff --git a/arm/arm_emit.h b/arm/arm_emit.h index a6951c2..1432617 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -67,9 +67,10 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_a1 ARMREG_R1 #define reg_a2 ARMREG_R2 +/* scratch0 is shared with flags, be careful! */ #define reg_s0 ARMREG_R9 -#define reg_base ARMREG_SP -#define reg_flags ARMREG_R11 +#define reg_base ARMREG_R11 +#define reg_flags ARMREG_R9 #define reg_cycles ARMREG_R12 @@ -110,6 +111,7 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_x5 ARMREG_R8 #define mem_reg (~0U) +#define save1_reg 21 /* @@ -1415,7 +1417,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define arm_block_memory_adjust_pc_load() \ if(reg_list & 0x8000) \ { \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_arm(); \ } \ @@ -1463,12 +1464,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) arm_block_memory_offset_##offset_type(); \ arm_block_memory_writeback_##access_type(writeback_type); \ ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 16; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ @@ -1493,12 +1496,12 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_load_reg(reg_a0, rn); \ generate_function_call(execute_load_##type); \ write32((pc + 8)); \ - generate_mov(reg_s0, reg_rv); \ + generate_mov(reg_a2, reg_rv); \ generate_load_reg(reg_a0, rn); \ generate_load_reg(reg_a1, rm); \ + generate_store_reg(reg_a2, rd); \ generate_function_call(execute_store_##type); \ write32((pc + 4)); \ - generate_store_reg(reg_s0, rd); \ } \ @@ -1729,13 +1732,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define thumb_block_memory_extra_down() \ #define thumb_block_memory_extra_pop_pc() \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_function_call(execute_load_u32); \ write32((pc + 4)); \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_cycle_update(thumb) \ #define thumb_block_memory_extra_push_lr(base_reg) \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_load_reg(reg_a1, REG_LR); \ generate_function_call(execute_store_u32_safe) \ @@ -1782,12 +1786,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ thumb_block_address_preadjust_##pre_op(); \ thumb_block_address_postadjust_##post_op(base_reg); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 8; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index b8651cf..944d36a 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -45,15 +45,14 @@ _##symbol: #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) #define OAM_UPDATED (33 * 4) -#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 #define reg_a2 r2 #define reg_s0 r9 -#define reg_base sp -#define reg_flags r11 +#define reg_base r11 +#define reg_flags r9 #define reg_cycles r12 @@ -151,11 +150,9 @@ _##symbol: @ registers which are important to the dynarec. #define call_c_function(function) ;\ - ldr sp, [reg_base, #MAIN_THREAD_SP] ;\ stmdb sp!, { call_c_saved_regs } ;\ bl function ;\ ldmia sp!, { call_c_saved_regs } ;\ - ldr sp, =reg ;\ @ Jumps to PC (ARM or Thumb modes) @ This is really two functions/routines in one @@ -483,9 +480,7 @@ defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } - ldr r1, =reg @ reg to r1 - str sp, [r1, #MAIN_THREAD_SP] @ store the current sp - ldr sp, =reg @ reg_base = sp (loading addr) + ldr reg_base, =reg @ init base_reg mvn reg_cycles, r0 @ load cycle counter @@ -515,8 +510,6 @@ defsymbl(execute_arm_translate) @ Epilogue to return to the main thread (whatever called execute_arm_translate) return_to_main: - @ restore the stack pointer - ldr sp, [reg_base, #MAIN_THREAD_SP] @ restore the saved regs and return ldmia sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } bx lr -- cgit v1.2.3