From 336b14a876ceb45fe4d0a70e6df3301d1cdf25ba Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 30 Mar 2021 01:21:48 +0200 Subject: Improve ARM store handlers --- arm/arm_stub.S | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 5917e82..8160bfe 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -538,19 +538,18 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type) ;\ +#define execute_store_body(store_type, tblnum) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ - str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ - tst r0, #0xF0000000 /* make sure address is in range */;\ - bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ - mov r4, r0, lsr #24 /* r4 = region number */;\ - ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ - ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ - bx lr /* jump to handler */;\ + mov lr, r0, lsr #24 /* lr = region number */;\ + cmp lr, #15 ;\ + movcs lr, #15 /* lr = min(lr, 15) */;\ ;\ + add lr, lr, #(16*tblnum + 64) /* lr += table offset */;\ + ldr pc, [reg_base, lr, lsl #2] /* jump to handler */;\ + +#define store_fnptr_table(store_type) ;\ ptr_tbl_##store_type: ;\ .word ext_store_ignore /* 0x00: BIOS, ignore */;\ .word ext_store_ignore /* 0x01: ignore */;\ @@ -576,11 +575,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op, tn) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type) ;\ + execute_store_body(store_type, tn) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -626,7 +625,7 @@ ext_store_vram_u##store_type: ;\ ;\ ext_store_oam_ram_u##store_type: ;\ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ - add r2, reg_base, #256 /* r2 = oam ram base */;\ + sub r2, reg_base, #0x400 /* r2 = oam ram base */;\ store_op16 r1, [r0, r2] /* store data */;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -640,14 +639,14 @@ ext_store_oam_ram_u##store_type: ;\ b smc_write /* perform smc write */;\ -execute_store_builder(8, strb, strh, ldrb) -execute_store_builder(16, strh, strh, ldrh) -execute_store_builder(32, str, str, ldr) +execute_store_builder(8, strb, strh, ldrb, 0) +execute_store_builder(16, strh, strh, ldrh, 1) +execute_store_builder(32, str, str, ldr, 2) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe) + execute_store_body(32_safe, 3) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -682,7 +681,7 @@ ext_store_vram_u32_safe: ext_store_oam_ram_u32_safe: mask_addr_8(10) @ Mask to mirror memory (no need to align!) - add r2, reg_base, #256 @ r2 = oam ram base + sub r2, reg_base, #0x400 @ r2 = oam ram base str r1, [r0, r2] @ store data str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here restore_flags() @@ -842,10 +841,15 @@ defsymbl(spsr) defsymbl(reg_mode) .space 196 -defsymbl(reg) - .space 0x100, 0 defsymbl(oam_ram) .space 0x400 +defsymbl(reg) + .space 0x100, 0 +@ Store pointer tables down here +store_fnptr_table(8) +store_fnptr_table(16) +store_fnptr_table(32) +store_fnptr_table(32_safe) @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. -- cgit v1.2.3 From 71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 30 Mar 2021 21:06:52 +0200 Subject: Improve indirect jumps in ARM Handle already translated blocks in the ARM asm to speed up indirect branches (affect some games more than others) --- arm/arm_stub.S | 161 ++++++++++++++++++++++++++------------------------------- cpu.h | 3 +- cpu_threaded.c | 4 +- gba_memory.c | 2 +- libretro.c | 2 +- main.c | 2 +- 6 files changed, 81 insertions(+), 93 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 8160bfe..5be4ca4 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -156,6 +156,66 @@ _##symbol: ldmia sp!, { call_c_saved_regs } ;\ ldr sp, =reg ;\ +@ Jumps to PC (ARM or Thumb modes) +@ This is really two functions/routines in one +@ r0 contains the PC + +.align 2 +#define execute_pc_builder(mode, align) ;\ +defsymbl(arm_indirect_branch_##mode) ;\ + save_flags() ;\ +execute_pc_##mode: ;\ + bic r0, r0, #(align) /* Align PC */;\ + mov r1, r0, lsr #24 /* Get region */;\ + cmp r1, #2 ;\ + beq 1f /* ewram */;\ + cmp r1, #3 ;\ + beq 2f /* iwram */;\ +3: ;\ + call_c_function(block_lookup_address_##mode) ;\ + restore_flags() ;\ + bx r0 ;\ +1: ;\ + ldr r1, =(ewram+0x40000) /* Load base addr */;\ + mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\ + mov r2, r2, lsr #14 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ +2: ;\ + ldr r1, =(iwram) /* Load base addr */;\ + mov r2, r0, lsl #17 /* addr &= 0x7fff */;\ + mov r2, r2, lsr #17 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ + + +execute_pc_builder(arm, 0x3) +execute_pc_builder(thumb, 0x1) + +@ Resumes execution from saved PC, in any mode + +execute_pc: + ldr r0, [reg_base, #REG_PC] @ load new PC + ldr r1, [reg_base, #REG_CPSR] @ r1 = flags + tst r1, #0x20 @ see if Thumb bit is set + bne 2f + + load_registers_arm() + b execute_pc_arm + +2: + load_registers_thumb() + b execute_pc_thumb @ Update the GBA hardware (video, sound, input, etc) @@ -201,28 +261,11 @@ wait_halt_##name: ;\ ;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ cmp r0, #0 /* see if PC has changed */;\ - beq 1f /* if not return */;\ - ;\ - ldr r0, [reg_base, #REG_PC] /* load new PC */;\ - ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\ - tst r1, #0x20 /* see if Thumb bit is set */;\ - bne 2f /* if so load Thumb PC */;\ - ;\ - load_registers_arm() /* load ARM regs */;\ - call_c_function(block_lookup_address_arm) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ + bne execute_pc /* go jump/translate */;\ ;\ -1: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ - return_##return_op() ;\ - ;\ -2: ;\ - load_registers_thumb() /* load Thumb regs */;\ - call_c_function(block_lookup_address_thumb) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ + return_##return_op() /* continue, no PC change */;\ arm_update_gba_builder(arm, arm, straight) @@ -239,59 +282,32 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ Input: @ r0: PC to branch to -.align 2 -defsymbl(arm_indirect_branch_arm) - save_flags() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 - -.align 2 -defsymbl(arm_indirect_branch_thumb) - save_flags() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - .align 2 defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit - bne 1f @ if set going to Thumb mode - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + beq execute_pc_arm @ Keep executing ARM code -1: - bic r0, r0, #0x01 + bic r0, r0, #0x01 @ Switch to Thumb mode store_registers_arm() @ save out ARM registers load_registers_thumb() @ load in Thumb registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + b execute_pc_thumb @ Now execute Thumb .align 2 defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit - beq 1f @ if set going to ARM mode - bic r0, r0, #0x01 - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + bne execute_pc_thumb @ Keep executing Thumb mode -1: store_registers_thumb() @ save out Thumb registers load_registers_arm() @ load in ARM registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + b execute_pc_arm @ Now execute ARM @ Update the cpsr. @@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr) cmp r0, #0 @ check new PC beq 1f @ if it's zero, return - call_c_function(block_lookup_address_arm) - - restore_flags() - bx r0 @ return to new ARM address + b execute_pc_arm 1: restore_flags() @@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore) bne 2f @ if so handle it load_registers_arm() @ restore ARM registers - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 + b execute_pc_arm 2: load_registers_thumb() @ load Thumb registers - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - + b execute_pc_thumb @ Setup the mode transition work for calling an SWI. @@ -718,21 +726,7 @@ alert_loop: bne alert_loop @ Keep looping until it is mvn reg_cycles, r0 @ load new cycle count - ldr r0, [reg_base, #REG_PC] @ load new PC - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags - tst r1, #0x20 @ see if Thumb bit is set - bne 2f - - load_registers_arm() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block - -2: - load_registers_thumb() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block + b execute_pc @ restart execution at PC 4: restore_flags() @@ -746,17 +740,8 @@ lookup_pc: ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set - beq lookup_pc_arm @ if not lookup ARM - -lookup_pc_thumb: - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block - -lookup_pc_arm: - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block + beq execute_pc_arm @ if not lookup ARM + b execute_pc_thumb #define sign_extend_u8(reg) diff --git a/cpu.h b/cpu.h index 2b250ca..2dacd6a 100644 --- a/cpu.h +++ b/cpu.h @@ -157,7 +157,8 @@ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; void flush_translation_cache_rom(void); void flush_translation_cache_ram(void); void dump_translation_cache(void); -void wipe_caches(void); +void init_caches(void); +void init_emitter(void); extern u32 reg_mode[7][7]; extern u32 spsr[6]; diff --git a/cpu_threaded.c b/cpu_threaded.c index 7f12b4f..e5c027e 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -3644,7 +3644,7 @@ void flush_translation_cache_rom(void) memset(rom_branch_hash, 0, sizeof(rom_branch_hash)); } -void wipe_caches(void) +void init_caches(void) { /* Ensure we wipe everything including the SMC mirrors */ flush_translation_cache_rom(); @@ -3653,6 +3653,8 @@ void wipe_caches(void) iwram_code_min = 0; iwram_code_max = 0x7FFF; flush_translation_cache_ram(); + /* Ensure 0 and FFFF get zeroed out */ + memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs)); } #define cache_dump_prefix "" diff --git a/gba_memory.c b/gba_memory.c index b66dce7..8d3d39e 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -3322,7 +3322,7 @@ void gba_load_state(const void* src) #ifdef HAVE_DYNAREC if (dynarec_enable) - wipe_caches(); + init_caches(); #endif reg[OAM_UPDATED] = 1; diff --git a/libretro.c b/libretro.c index d94ddcb..0373c94 100644 --- a/libretro.c +++ b/libretro.c @@ -675,7 +675,7 @@ static void check_variables(int started_from_load) dynarec_enable = 1; if (dynarec_enable != prevvalue) - wipe_caches(); + init_caches(); } else dynarec_enable = 1; diff --git a/main.c b/main.c index 2a82338..759aa94 100644 --- a/main.c +++ b/main.c @@ -114,7 +114,7 @@ void init_main(void) video_count = 960; #ifdef HAVE_DYNAREC - wipe_caches(); + init_caches(); init_emitter(); #endif } -- cgit v1.2.3 From 8c14ac96192f6d966ac0ad252003a8dd3c61667a Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 2 Apr 2021 02:10:00 +0200 Subject: Add function decorators for easier debugging / profiling --- arm/arm_stub.S | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 5be4ca4..9779aa5 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -2,6 +2,7 @@ #include "../gpsp_config.h" #define defsymbl(symbol) \ +.type symbol, %function ;\ .global symbol ; \ .global _##symbol ; \ symbol: \ @@ -197,7 +198,7 @@ execute_pc_##mode: ;\ beq 3b /* Need to translate */;\ restore_flags() ;\ bx r1 ;\ - +.size arm_indirect_branch_##mode, .-arm_indirect_branch_##mode execute_pc_builder(arm, 0x3) execute_pc_builder(thumb, 0x1) @@ -266,7 +267,7 @@ wait_halt_##name: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ return_##return_op() /* continue, no PC change */;\ - +.size arm_update_gba_##mode, .-arm_update_gba_##mode arm_update_gba_builder(arm, arm, straight) arm_update_gba_builder(thumb, thumb, straight) @@ -295,6 +296,7 @@ defsymbl(arm_indirect_branch_dual_arm) orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags b execute_pc_thumb @ Now execute Thumb +.size arm_indirect_branch_dual_arm, .-arm_indirect_branch_dual_arm .align 2 defsymbl(arm_indirect_branch_dual_thumb) @@ -308,7 +310,7 @@ defsymbl(arm_indirect_branch_dual_thumb) bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags b execute_pc_arm @ Now execute ARM - +.size arm_indirect_branch_dual_thumb, .-arm_indirect_branch_dual_thumb @ Update the cpsr. @@ -340,7 +342,7 @@ defsymbl(execute_store_cpsr) 1: restore_flags() add pc, lr, #4 @ return - +.size execute_store_cpsr, .-execute_store_cpsr @ Update the current spsr. @@ -354,6 +356,7 @@ defsymbl(execute_store_spsr) ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr bx lr +.size execute_store_spsr, .-execute_store_spsr @ Read the current spsr. @@ -366,7 +369,7 @@ defsymbl(execute_read_spsr) ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] bx lr @ return - +.size execute_read_spsr, .-execute_read_spsr @ Restore the cpsr from the mode spsr and mode shift. @@ -645,7 +648,7 @@ ext_store_oam_ram_u##store_type: ;\ ldr r0, [lr] /* load PC */;\ str r0, [reg_base, #REG_PC] /* write out PC */;\ b smc_write /* perform smc write */;\ - +.size execute_store_u##store_type, .-execute_store_u##store_type execute_store_builder(8, strb, strh, ldrb, 0) execute_store_builder(16, strh, strh, ldrh, 1) @@ -694,6 +697,7 @@ ext_store_oam_ram_u32_safe: str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +.size execute_store_u32_safe, .-execute_store_u32_safe write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -804,6 +808,7 @@ ext_load_##load_type: ;\ sign_extend_##load_type(r0) /* sign extend result */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.size execute_load_##load_type, .-execute_load_##load_type .pool @@ -852,8 +857,10 @@ store_fnptr_table(32_safe) .align 4 defsymbl(rom_translation_cache) .space ROM_TRANSLATION_CACHE_SIZE +.size rom_translation_cache, .-rom_translation_cache defsymbl(ram_translation_cache) .space RAM_TRANSLATION_CACHE_SIZE +.size ram_translation_cache, .-ram_translation_cache #endif -- cgit v1.2.3 From 5b5a4db6c2963ba72a3adcace6ec055ac65f2f3d Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 3 Apr 2021 00:37:42 +0200 Subject: Add instruction tracing, for testing purposes --- Makefile | 1 + arm/arm_emit.h | 24 ++++++++++++++++++++++++ arm/arm_stub.S | 23 +++++++++++++++++++---- cpu_threaded.c | 2 ++ psp/mips_emit.h | 18 ++++++++++++++++++ x86/x86_emit.h | 29 +++++++++++++++++++++++++++++ 6 files changed, 93 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 4a5806d..5d28045 100644 --- a/Makefile +++ b/Makefile @@ -434,6 +434,7 @@ ifeq ($(FORCE_32BIT_ARCH), 1) fpic := endif +# Add -DTRACE_INSTRUCTIONS to trace instruction execution ifeq ($(DEBUG), 1) OPTIMIZE_SAFE := -O0 -g OPTIMIZE := -O0 -g diff --git a/arm/arm_emit.h b/arm/arm_emit.h index a5dc930..a6951c2 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -1227,6 +1227,30 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) return 0; } +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_instruction(pc) \ + generate_save_flags(); \ + ARM_LDR_IMM(0, ARMREG_SP, reg_base, 34*4); \ + ARM_STMDB_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(reg_a0, pc); \ + generate_function_call(trace_instruction); \ + ARM_LDMIA_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(ARMREG_SP, (u32)reg); \ + generate_restore_flags(); + #define emit_trace_thumb_instruction(pc) \ + emit_trace_instruction(pc) + #define emit_trace_arm_instruction(pc) \ + emit_trace_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + #define arm_psr_load_new_reg() \ generate_load_reg(reg_a0, rm) \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 9779aa5..b8651cf 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -168,10 +168,25 @@ defsymbl(arm_indirect_branch_##mode) ;\ execute_pc_##mode: ;\ bic r0, r0, #(align) /* Align PC */;\ mov r1, r0, lsr #24 /* Get region */;\ - cmp r1, #2 ;\ - beq 1f /* ewram */;\ - cmp r1, #3 ;\ - beq 2f /* iwram */;\ + ldr pc, [pc, r1, lsl #2] ;\ + nop ;\ + .long 3f /* 0 BIOS (like ROM) */;\ + .long 3f /* 1 Bad region */;\ + .long 1f /* 2 EWRAM */;\ + .long 2f /* 3 IWRAM */;\ + .long 3f /* 4 Not supported */;\ + .long 3f /* 5 Not supported */;\ + .long 3f /* 6 Not supported */;\ + .long 3f /* 7 Not supported */;\ + .long 3f /* 8 ROM */;\ + .long 3f /* 9 ROM */;\ + .long 3f /* A ROM */;\ + .long 3f /* B ROM */;\ + .long 3f /* C ROM */;\ + .long 3f /* D ROM */;\ + .long 3f /* E ROM */;\ + .long 3f /* F Bad region */;\ + ;\ 3: ;\ call_c_function(block_lookup_address_##mode) ;\ restore_flags() ;\ diff --git a/cpu_threaded.c b/cpu_threaded.c index e5c027e..a32b1b8 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -264,6 +264,7 @@ void translate_icache_sync() { check_pc_region(pc); \ opcode = address32(pc_address_block, (pc & 0x7FFF)); \ condition = block_data[block_data_position].condition; \ + emit_trace_arm_instruction(pc); \ \ if((condition != last_condition) || (condition >= 0x20)) \ { \ @@ -1703,6 +1704,7 @@ void translate_icache_sync() { check_pc_region(pc); \ last_opcode = opcode; \ opcode = address16(pc_address_block, (pc & 0x7FFF)); \ + emit_trace_thumb_instruction(pc); \ \ switch((opcode >> 8) & 0xFF) \ { \ diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 818b724..a435e63 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2422,6 +2422,24 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_indirect_branch_cycle_update(dual); \ } \ +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_instruction(pc) \ + emit_save_regs(false); \ + generate_load_imm(reg_a0, pc); \ + genccall(&trace_instruction); \ + emit_restore_regs(false) + #define emit_trace_thumb_instruction(pc) emit_trace_instruction(pc) + #define emit_trace_arm_instruction(pc) emit_trace_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + #define thumb_swi() \ generate_swi_hle_handler(opcode & 0xFF); \ generate_load_pc(reg_a0, (pc + 2)); \ diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 68930e1..ef79110 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -96,6 +96,7 @@ typedef enum x86_opcode_push_reg = 0x50, x86_opcode_push_rm = 0xFF, x86_opcode_push_imm = 0x0668, + x86_opcode_pop_reg = 0x58, x86_opcode_call_offset = 0xE8, x86_opcode_ret = 0xC3, x86_opcode_test_rm_imm = 0x00F7, @@ -266,6 +267,12 @@ typedef enum #define x86_emit_idiv_eax_reg(source) \ x86_emit_opcode_1b_ext_reg(idiv_eax_rm, source) \ +#define x86_emit_pop_reg(regn) \ + x86_emit_opcode_1b(pop_reg, regn) \ + +#define x86_emit_push_reg(regn) \ + x86_emit_opcode_1b(push_reg, regn) \ + #define x86_emit_push_mem(base, offset) \ x86_emit_opcode_1b_mem(push_rm, 0x06, base, offset) \ @@ -523,6 +530,28 @@ typedef enum generate_function_call(execute_##name##_##flags_op##_reg); \ generate_mov(ireg, rv) \ +#ifdef TRACE_INSTRUCTIONS + void function_cc trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_thumb_instruction(pc) \ + x86_emit_push_reg(eax); \ + x86_emit_push_reg(ecx); \ + x86_emit_push_reg(edx); \ + x86_emit_mov_reg_imm(eax, pc); \ + generate_function_call(trace_instruction); \ + x86_emit_pop_reg(edx); \ + x86_emit_pop_reg(ecx); \ + x86_emit_pop_reg(eax); + #define emit_trace_arm_instruction(pc) \ + emit_trace_thumb_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + u32 function_cc execute_lsl_no_flags_reg(u32 value, u32 shift) { if(shift != 0) -- cgit v1.2.3 From 5bee4d66c2e461b3a4e804b2806bdea3938a4577 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 3 Apr 2021 23:43:32 +0200 Subject: Do not use stack in mips stubs No need for it (faster) and avoid mis-aligning it across calls --- psp/mips_stub.S | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 1c4ad4b..08151db 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -391,8 +391,7 @@ execute_read_spsr: # $4: Current pc execute_swi: - add $sp, $sp, -4 # push $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR collapse_flags # get cpsr in $2 sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR @@ -402,10 +401,10 @@ execute_swi: save_registers li $4, 3 # 3 is supervisor mode cfncall set_cpu_mode, 5 # set the CPU mode to supervisor + lw $ra, REG_SAVE3($16) restore_registers - lw $ra, ($sp) # pop $ra jr $ra # return - add $sp, $sp, 4 # fix stack (delay slot) + nop # $4: pc to restore to # returns in $4 @@ -420,15 +419,13 @@ execute_spsr_restore: lw $1, SPSR_BASE($2) # $1 = spsr[cpu_mode] sw $1, REG_CPSR($16) # cpsr = spsr[cpu_mode] extract_flags_body # extract flags from $1 - addiu $sp, $sp, -4 - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) save_registers cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function restore_registers - addu $4, $2, $0 # move return value to $4 - lw $ra, ($sp) + lw $ra, REG_SAVE3($16) jr $ra - addiu $sp, $sp, 4 + addu $4, $2, $0 # move return value to $4 no_spsr_restore: jr $ra -- cgit v1.2.3 From a5c06f62d64c03b245c14bfb86b176b6455f22aa Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sun, 4 Apr 2021 18:13:15 +0200 Subject: Fix palette writes in MIPS Was not writing to the right address (but decoded memory was working). Most game worked well except those that depend on modifying the existing palette bits (instead of copying from ROM/RAM). Fixes several games. --- psp/mips_emit.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/psp/mips_emit.h b/psp/mips_emit.h index a435e63..73f589a 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2950,13 +2950,14 @@ static void emit_palette_hdl( } mips_emit_addu(reg_rv, reg_rv, reg_base); - // Store the data (delay slot from the SMC branch) + // Store the data in real palette memory if (realsize == 2) { - mips_emit_sw(reg_a1, reg_base, 0x100); + mips_emit_sw(reg_a1, reg_rv, 0x100); } else if (realsize == 1) { - mips_emit_sh(reg_a1, reg_base, 0x100); + mips_emit_sh(reg_a1, reg_rv, 0x100); } + // Convert and store in mirror memory palette_convert(); mips_emit_sh(reg_temp, reg_rv, 0x500); -- cgit v1.2.3 From ff48af07b08c0870ea950335fec338a828f88c27 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sun, 25 Apr 2021 21:16:46 +0200 Subject: Fix RTC support for MIPS --- psp/mips_emit.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 73f589a..12685e8 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2984,24 +2984,23 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { mips_emit_jr(mips_reg_ra); mips_emit_nop(); - // Region 8-B - tmemst[size][ 8] = tmemst[size][ 9] = - tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr; + // Region 9-C + tmemst[size][ 9] = tmemst[size][10] = + tmemst[size][11] = tmemst[size][12] = (u32)translation_ptr; - mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02 - mips_emit_xori(reg_temp, reg_temp, 0x02); + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_addiu(reg_temp, reg_temp, -9); + mips_emit_srl(reg_temp, reg_temp, 2); mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); mips_emit_nop(); mips_emit_jr(mips_reg_ra); mips_emit_nop(); - // Region C or F (or bigger!) - tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr; + // Region F or higher + tmemst[size][15] = (u32)translation_ptr; mips_emit_srl(reg_temp, reg_a0, 24); - mips_emit_sltiu(reg_rv, reg_temp, 0x0F); - mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store - mips_emit_xori(reg_rv, reg_temp, 0x0C); - mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Is < 15? + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(size)); mips_emit_nop(); mips_emit_jr(mips_reg_ra); mips_emit_nop(); @@ -3009,7 +3008,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { *tr_ptr = translation_ptr; } -// Stubs for regions with EEPROM or flash/SRAM +// Stubs for regions with EEPROM or flash/SRAM (also RTC) static void emit_saveaccess_stub(u8 **tr_ptr) { unsigned opt, i, strop; u8 *translation_ptr = *tr_ptr; @@ -3062,6 +3061,21 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { } } + // RTC writes, only for 16 bit accesses + for (strop = 0; strop <= 3; strop++) { + tmemst[strop][8] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x08); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + if (strop == 1) { + emit_mem_call(&write_rtc, 0xFF); // Addr + } else { + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); // Do nothing + mips_emit_nop(); + } + } + // Region 4 writes // I/O writes are also a bit special, they can trigger things like DMA, IRQs... // Also: aligned (strop==3) accesses do not trigger IRQs -- cgit v1.2.3 From d83f8fbd25562dcebf26e0e71d346bc41820e239 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 27 Apr 2021 19:05:00 +0200 Subject: Fix Vita port and likely some Linux/Android hidden issues Using an invalid SP makes Vita crash (for an unkown reason) and makes things like C signal handlers crash (luckily Retroarch doesn't use them). It is also a violation of the ABI and not a great idea. Recycled some little used registers to free SP. Perf should be roughly the same. --- arm/arm_emit.h | 18 ++++++++++++------ arm/arm_stub.S | 13 +++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arm/arm_emit.h b/arm/arm_emit.h index a6951c2..1432617 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -67,9 +67,10 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_a1 ARMREG_R1 #define reg_a2 ARMREG_R2 +/* scratch0 is shared with flags, be careful! */ #define reg_s0 ARMREG_R9 -#define reg_base ARMREG_SP -#define reg_flags ARMREG_R11 +#define reg_base ARMREG_R11 +#define reg_flags ARMREG_R9 #define reg_cycles ARMREG_R12 @@ -110,6 +111,7 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_x5 ARMREG_R8 #define mem_reg (~0U) +#define save1_reg 21 /* @@ -1415,7 +1417,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define arm_block_memory_adjust_pc_load() \ if(reg_list & 0x8000) \ { \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_arm(); \ } \ @@ -1463,12 +1464,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) arm_block_memory_offset_##offset_type(); \ arm_block_memory_writeback_##access_type(writeback_type); \ ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 16; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ @@ -1493,12 +1496,12 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_load_reg(reg_a0, rn); \ generate_function_call(execute_load_##type); \ write32((pc + 8)); \ - generate_mov(reg_s0, reg_rv); \ + generate_mov(reg_a2, reg_rv); \ generate_load_reg(reg_a0, rn); \ generate_load_reg(reg_a1, rm); \ + generate_store_reg(reg_a2, rd); \ generate_function_call(execute_store_##type); \ write32((pc + 4)); \ - generate_store_reg(reg_s0, rd); \ } \ @@ -1729,13 +1732,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define thumb_block_memory_extra_down() \ #define thumb_block_memory_extra_pop_pc() \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_function_call(execute_load_u32); \ write32((pc + 4)); \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_cycle_update(thumb) \ #define thumb_block_memory_extra_push_lr(base_reg) \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_load_reg(reg_a1, REG_LR); \ generate_function_call(execute_store_u32_safe) \ @@ -1782,12 +1786,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ thumb_block_address_preadjust_##pre_op(); \ thumb_block_address_postadjust_##post_op(base_reg); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 8; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index b8651cf..944d36a 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -45,15 +45,14 @@ _##symbol: #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) #define OAM_UPDATED (33 * 4) -#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 #define reg_a2 r2 #define reg_s0 r9 -#define reg_base sp -#define reg_flags r11 +#define reg_base r11 +#define reg_flags r9 #define reg_cycles r12 @@ -151,11 +150,9 @@ _##symbol: @ registers which are important to the dynarec. #define call_c_function(function) ;\ - ldr sp, [reg_base, #MAIN_THREAD_SP] ;\ stmdb sp!, { call_c_saved_regs } ;\ bl function ;\ ldmia sp!, { call_c_saved_regs } ;\ - ldr sp, =reg ;\ @ Jumps to PC (ARM or Thumb modes) @ This is really two functions/routines in one @@ -483,9 +480,7 @@ defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } - ldr r1, =reg @ reg to r1 - str sp, [r1, #MAIN_THREAD_SP] @ store the current sp - ldr sp, =reg @ reg_base = sp (loading addr) + ldr reg_base, =reg @ init base_reg mvn reg_cycles, r0 @ load cycle counter @@ -515,8 +510,6 @@ defsymbl(execute_arm_translate) @ Epilogue to return to the main thread (whatever called execute_arm_translate) return_to_main: - @ restore the stack pointer - ldr sp, [reg_base, #MAIN_THREAD_SP] @ restore the saved regs and return ldmia sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } bx lr -- cgit v1.2.3 From 52088a4d10af9a8c0e95b0eb168d4dfd0a13639f Mon Sep 17 00:00:00 2001 From: negativeExponent Date: Wed, 28 Apr 2021 02:39:44 +0800 Subject: Fix invalid memory map entries --- libretro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libretro.c b/libretro.c index 0373c94..21ca04f 100644 --- a/libretro.c +++ b/libretro.c @@ -808,7 +808,7 @@ static void set_input_descriptors() static void set_memory_descriptors(void) { const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM; - struct retro_memory_descriptor desc[9] = { + struct retro_memory_descriptor desc[2] = { { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, { mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL }, }; -- cgit v1.2.3