diff options
author | neonloop | 2021-05-05 17:12:36 +0000 |
---|---|---|
committer | neonloop | 2021-05-05 17:12:59 +0000 |
commit | 997d3f2183eb9e99a3a1578a0060cb2fcd2165a2 (patch) | |
tree | 5b2afae111495c75ddf5c2b8fd8e6699da50117e | |
parent | 3aa34f7503bbb03e9bcd191f8af4fd5faac66d2e (diff) | |
parent | 52088a4d10af9a8c0e95b0eb168d4dfd0a13639f (diff) | |
download | picogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.tar.gz picogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.tar.bz2 picogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.zip |
Merge remote-tracking branch 'libretro/master' into pico-fe
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | arm/arm_emit.h | 42 | ||||
-rw-r--r-- | arm/arm_stub.S | 249 | ||||
-rw-r--r-- | cpu.h | 3 | ||||
-rw-r--r-- | cpu_threaded.c | 6 | ||||
-rw-r--r-- | frontend/menu.c | 2 | ||||
-rw-r--r-- | gba_memory.c | 2 | ||||
-rw-r--r-- | libretro.c | 4 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | psp/mips_emit.h | 63 | ||||
-rw-r--r-- | psp/mips_stub.S | 15 | ||||
-rw-r--r-- | x86/x86_emit.h | 29 |
12 files changed, 259 insertions, 159 deletions
@@ -434,6 +434,7 @@ ifeq ($(FORCE_32BIT_ARCH), 1) fpic := endif +# Add -DTRACE_INSTRUCTIONS to trace instruction execution ifeq ($(DEBUG), 1) OPTIMIZE_SAFE := -O0 -g OPTIMIZE := -O0 -g diff --git a/arm/arm_emit.h b/arm/arm_emit.h index a5dc930..1432617 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -67,9 +67,10 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_a1 ARMREG_R1 #define reg_a2 ARMREG_R2 +/* scratch0 is shared with flags, be careful! */ #define reg_s0 ARMREG_R9 -#define reg_base ARMREG_SP -#define reg_flags ARMREG_R11 +#define reg_base ARMREG_R11 +#define reg_flags ARMREG_R9 #define reg_cycles ARMREG_R12 @@ -110,6 +111,7 @@ void execute_store_u32_safe(u32 address, u32 source); #define reg_x5 ARMREG_R8 #define mem_reg (~0U) +#define save1_reg 21 /* @@ -1227,6 +1229,30 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) return 0; } +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_instruction(pc) \ + generate_save_flags(); \ + ARM_LDR_IMM(0, ARMREG_SP, reg_base, 34*4); \ + ARM_STMDB_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(reg_a0, pc); \ + generate_function_call(trace_instruction); \ + ARM_LDMIA_WB(0, ARMREG_SP, 0x500C); \ + arm_load_imm_32bit(ARMREG_SP, (u32)reg); \ + generate_restore_flags(); + #define emit_trace_thumb_instruction(pc) \ + emit_trace_instruction(pc) + #define emit_trace_arm_instruction(pc) \ + emit_trace_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + #define arm_psr_load_new_reg() \ generate_load_reg(reg_a0, rm) \ @@ -1391,7 +1417,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define arm_block_memory_adjust_pc_load() \ if(reg_list & 0x8000) \ { \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_arm(); \ } \ @@ -1439,12 +1464,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) arm_block_memory_offset_##offset_type(); \ arm_block_memory_writeback_##access_type(writeback_type); \ ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 16; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ @@ -1469,12 +1496,12 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_load_reg(reg_a0, rn); \ generate_function_call(execute_load_##type); \ write32((pc + 8)); \ - generate_mov(reg_s0, reg_rv); \ + generate_mov(reg_a2, reg_rv); \ generate_load_reg(reg_a0, rn); \ generate_load_reg(reg_a1, rm); \ + generate_store_reg(reg_a2, rd); \ generate_function_call(execute_store_##type); \ write32((pc + 4)); \ - generate_store_reg(reg_s0, rd); \ } \ @@ -1705,13 +1732,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define thumb_block_memory_extra_down() \ #define thumb_block_memory_extra_pop_pc() \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_function_call(execute_load_u32); \ write32((pc + 4)); \ - generate_mov(reg_a0, reg_rv); \ generate_indirect_branch_cycle_update(thumb) \ #define thumb_block_memory_extra_push_lr(base_reg) \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ generate_load_reg(reg_a1, REG_LR); \ generate_function_call(execute_store_u32_safe) \ @@ -1758,12 +1786,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \ thumb_block_address_preadjust_##pre_op(); \ thumb_block_address_postadjust_##post_op(base_reg); \ + generate_store_reg(reg_s0, save1_reg); \ \ for(i = 0; i < 8; i++) \ { \ if((reg_list >> i) & 0x01) \ { \ cycle_count++; \ + generate_load_reg(reg_s0, save1_reg); \ generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \ if(reg_list & ~((2 << i) - 1)) \ { \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 723c185..e0f02f4 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -2,6 +2,7 @@ #include "../gpsp_config.h" #define defsymbl(symbol) \ +.type symbol, %function ;\ .global symbol ; \ .global _##symbol ; \ symbol: \ @@ -44,15 +45,14 @@ _##symbol: #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) #define OAM_UPDATED (33 * 4) -#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 #define reg_a2 r2 #define reg_s0 r9 -#define reg_base sp -#define reg_flags r11 +#define reg_base r11 +#define reg_flags r9 #define reg_cycles r12 @@ -150,12 +150,85 @@ _##symbol: @ registers which are important to the dynarec. #define call_c_function(function) ;\ - ldr sp, [reg_base, #MAIN_THREAD_SP] ;\ stmdb sp!, { call_c_saved_regs } ;\ bl function ;\ ldmia sp!, { call_c_saved_regs } ;\ - ldr sp, =reg ;\ +@ Jumps to PC (ARM or Thumb modes) +@ This is really two functions/routines in one +@ r0 contains the PC + +.align 2 +#define execute_pc_builder(mode, align) ;\ +defsymbl(arm_indirect_branch_##mode) ;\ + save_flags() ;\ +execute_pc_##mode: ;\ + bic r0, r0, #(align) /* Align PC */;\ + mov r1, r0, lsr #24 /* Get region */;\ + ldr pc, [pc, r1, lsl #2] ;\ + nop ;\ + .long 3f /* 0 BIOS (like ROM) */;\ + .long 3f /* 1 Bad region */;\ + .long 1f /* 2 EWRAM */;\ + .long 2f /* 3 IWRAM */;\ + .long 3f /* 4 Not supported */;\ + .long 3f /* 5 Not supported */;\ + .long 3f /* 6 Not supported */;\ + .long 3f /* 7 Not supported */;\ + .long 3f /* 8 ROM */;\ + .long 3f /* 9 ROM */;\ + .long 3f /* A ROM */;\ + .long 3f /* B ROM */;\ + .long 3f /* C ROM */;\ + .long 3f /* D ROM */;\ + .long 3f /* E ROM */;\ + .long 3f /* F Bad region */;\ + ;\ +3: ;\ + call_c_function(block_lookup_address_##mode) ;\ + restore_flags() ;\ + bx r0 ;\ +1: ;\ + ldr r1, =(ewram+0x40000) /* Load base addr */;\ + mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\ + mov r2, r2, lsr #14 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ +2: ;\ + ldr r1, =(iwram) /* Load base addr */;\ + mov r2, r0, lsl #17 /* addr &= 0x7fff */;\ + mov r2, r2, lsr #17 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ +.size arm_indirect_branch_##mode, .-arm_indirect_branch_##mode + +execute_pc_builder(arm, 0x3) +execute_pc_builder(thumb, 0x1) + +@ Resumes execution from saved PC, in any mode + +execute_pc: + ldr r0, [reg_base, #REG_PC] @ load new PC + ldr r1, [reg_base, #REG_CPSR] @ r1 = flags + tst r1, #0x20 @ see if Thumb bit is set + bne 2f + + load_registers_arm() + b execute_pc_arm + +2: + load_registers_thumb() + b execute_pc_thumb @ Update the GBA hardware (video, sound, input, etc) @@ -201,29 +274,12 @@ wait_halt_##name: ;\ ;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ cmp r0, #0 /* see if PC has changed */;\ - beq 1f /* if not return */;\ + bne execute_pc /* go jump/translate */;\ ;\ - ldr r0, [reg_base, #REG_PC] /* load new PC */;\ - ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\ - tst r1, #0x20 /* see if Thumb bit is set */;\ - bne 2f /* if so load Thumb PC */;\ - ;\ - load_registers_arm() /* load ARM regs */;\ - call_c_function(block_lookup_address_arm) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ - ;\ -1: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ - return_##return_op() ;\ - ;\ -2: ;\ - load_registers_thumb() /* load Thumb regs */;\ - call_c_function(block_lookup_address_thumb) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ - + return_##return_op() /* continue, no PC change */;\ +.size arm_update_gba_##mode, .-arm_update_gba_##mode arm_update_gba_builder(arm, arm, straight) arm_update_gba_builder(thumb, thumb, straight) @@ -240,59 +296,33 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ r0: PC to branch to .align 2 -defsymbl(arm_indirect_branch_arm) - save_flags() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 - -.align 2 -defsymbl(arm_indirect_branch_thumb) - save_flags() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - -.align 2 defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit - bne 1f @ if set going to Thumb mode - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + beq execute_pc_arm @ Keep executing ARM code -1: - bic r0, r0, #0x01 + bic r0, r0, #0x01 @ Switch to Thumb mode store_registers_arm() @ save out ARM registers load_registers_thumb() @ load in Thumb registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + b execute_pc_thumb @ Now execute Thumb +.size arm_indirect_branch_dual_arm, .-arm_indirect_branch_dual_arm .align 2 defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit - beq 1f @ if set going to ARM mode - bic r0, r0, #0x01 - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + bne execute_pc_thumb @ Keep executing Thumb mode -1: store_registers_thumb() @ save out Thumb registers load_registers_arm() @ load in ARM registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return - + b execute_pc_arm @ Now execute ARM +.size arm_indirect_branch_dual_thumb, .-arm_indirect_branch_dual_thumb @ Update the cpsr. @@ -319,15 +349,12 @@ defsymbl(execute_store_cpsr) cmp r0, #0 @ check new PC beq 1f @ if it's zero, return - call_c_function(block_lookup_address_arm) - - restore_flags() - bx r0 @ return to new ARM address + b execute_pc_arm 1: restore_flags() add pc, lr, #4 @ return - +.size execute_store_cpsr, .-execute_store_cpsr @ Update the current spsr. @@ -341,6 +368,7 @@ defsymbl(execute_store_spsr) ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr bx lr +.size execute_store_spsr, .-execute_store_spsr @ Read the current spsr. @@ -353,7 +381,7 @@ defsymbl(execute_read_spsr) ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] bx lr @ return - +.size execute_read_spsr, .-execute_read_spsr @ Restore the cpsr from the mode spsr and mode shift. @@ -378,16 +406,11 @@ defsymbl(execute_spsr_restore) bne 2f @ if so handle it load_registers_arm() @ restore ARM registers - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 + b execute_pc_arm 2: load_registers_thumb() @ load Thumb registers - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - + b execute_pc_thumb @ Setup the mode transition work for calling an SWI. @@ -459,9 +482,7 @@ defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } - ldr r1, =reg @ reg to r1 - str sp, [r1, #MAIN_THREAD_SP] @ store the current sp - ldr sp, =reg @ reg_base = sp (loading addr) + ldr reg_base, =reg @ init base_reg mvn reg_cycles, r0 @ load cycle counter @@ -491,8 +512,6 @@ defsymbl(execute_arm_translate) @ Epilogue to return to the main thread (whatever called execute_arm_translate) return_to_main: - @ restore the stack pointer - ldr sp, [reg_base, #MAIN_THREAD_SP] @ restore the saved regs and return ldmia sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } bx lr @@ -540,19 +559,18 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type) ;\ +#define execute_store_body(store_type, tblnum) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ - str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ - tst r0, #0xF0000000 /* make sure address is in range */;\ - bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ - mov r4, r0, lsr #24 /* r4 = region number */;\ - ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ - ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ - bx lr /* jump to handler */;\ + mov lr, r0, lsr #24 /* lr = region number */;\ + cmp lr, #15 ;\ + movcs lr, #15 /* lr = min(lr, 15) */;\ ;\ + add lr, lr, #(16*tblnum + 64) /* lr += table offset */;\ + ldr pc, [reg_base, lr, lsl #2] /* jump to handler */;\ + +#define store_fnptr_table(store_type) ;\ ptr_tbl_##store_type: ;\ .word ext_store_ignore /* 0x00: BIOS, ignore */;\ .word ext_store_ignore /* 0x01: ignore */;\ @@ -578,11 +596,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op, tn) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type) ;\ + execute_store_body(store_type, tn) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -628,7 +646,7 @@ ext_store_vram_u##store_type: ;\ ;\ ext_store_oam_ram_u##store_type: ;\ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ - add r2, reg_base, #256 /* r2 = oam ram base */;\ + sub r2, reg_base, #0x400 /* r2 = oam ram base */;\ store_op16 r1, [r0, r2] /* store data */;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -640,16 +658,16 @@ ext_store_oam_ram_u##store_type: ;\ ldr r0, [lr] /* load PC */;\ str r0, [reg_base, #REG_PC] /* write out PC */;\ b smc_write /* perform smc write */;\ +.size execute_store_u##store_type, .-execute_store_u##store_type - -execute_store_builder(8, strb, strh, ldrb) -execute_store_builder(16, strh, strh, ldrh) -execute_store_builder(32, str, str, ldr) +execute_store_builder(8, strb, strh, ldrb, 0) +execute_store_builder(16, strh, strh, ldrh, 1) +execute_store_builder(32, str, str, ldr, 2) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe) + execute_store_body(32_safe, 3) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -684,11 +702,12 @@ ext_store_vram_u32_safe: ext_store_oam_ram_u32_safe: mask_addr_8(10) @ Mask to mirror memory (no need to align!) - add r2, reg_base, #256 @ r2 = oam ram base + sub r2, reg_base, #0x400 @ r2 = oam ram base str r1, [r0, r2] @ store data str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +.size execute_store_u32_safe, .-execute_store_u32_safe write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -721,21 +740,7 @@ alert_loop: bne alert_loop @ Keep looping until it is mvn reg_cycles, r0 @ load new cycle count - ldr r0, [reg_base, #REG_PC] @ load new PC - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags - tst r1, #0x20 @ see if Thumb bit is set - bne 2f - - load_registers_arm() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block - -2: - load_registers_thumb() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block + b execute_pc @ restart execution at PC 4: restore_flags() @@ -749,17 +754,8 @@ lookup_pc: ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set - beq lookup_pc_arm @ if not lookup ARM - -lookup_pc_thumb: - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block - -lookup_pc_arm: - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block + beq execute_pc_arm @ if not lookup ARM + b execute_pc_thumb #define sign_extend_u8(reg) @@ -822,6 +818,7 @@ ext_load_##load_type: ;\ sign_extend_##load_type(r0) /* sign extend result */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.size execute_load_##load_type, .-execute_load_##load_type .pool @@ -844,10 +841,15 @@ defsymbl(spsr) defsymbl(reg_mode) .space 196 -defsymbl(reg) - .space 0x100, 0 defsymbl(oam_ram) .space 0x400 +defsymbl(reg) + .space 0x100, 0 +@ Store pointer tables down here +store_fnptr_table(8) +store_fnptr_table(16) +store_fnptr_table(32) +store_fnptr_table(32_safe) @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. @@ -865,10 +867,13 @@ defsymbl(oam_ram) .align 4 defsymbl(rom_translation_cache) .space ROM_TRANSLATION_CACHE_SIZE +.size rom_translation_cache, .-rom_translation_cache defsymbl(ram_translation_cache) .space RAM_TRANSLATION_CACHE_SIZE +.size ram_translation_cache, .-ram_translation_cache defsymbl(bios_translation_cache) .space BIOS_TRANSLATION_CACHE_SIZE +.size bios_translation_cache, .-bios_translation_cache #endif @@ -165,7 +165,8 @@ void flush_translation_cache_rom(void); void flush_translation_cache_ram(void); void flush_translation_cache_bios(void); void dump_translation_cache(void); -void wipe_caches(void); +void init_caches(void); +void init_emitter(void); extern u32 reg_mode[7][7]; extern u32 spsr[6]; diff --git a/cpu_threaded.c b/cpu_threaded.c index 8b7dc0e..832e212 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -276,6 +276,7 @@ void translate_icache_sync() { check_pc_region(pc); \ opcode = address32(pc_address_block, (pc & 0x7FFF)); \ condition = block_data[block_data_position].condition; \ + emit_trace_arm_instruction(pc); \ \ if((condition != last_condition) || (condition >= 0x20)) \ { \ @@ -1715,6 +1716,7 @@ void translate_icache_sync() { check_pc_region(pc); \ last_opcode = opcode; \ opcode = address16(pc_address_block, (pc & 0x7FFF)); \ + emit_trace_thumb_instruction(pc); \ \ switch((opcode >> 8) & 0xFF) \ { \ @@ -3711,7 +3713,7 @@ void flush_translation_cache_bios(void) memset(bios_rom + 0x4000, 0, 0x4000); } -void wipe_caches(void) +void init_caches(void) { /* Ensure we wipe everything including the SMC mirrors */ flush_translation_cache_rom(); @@ -3721,6 +3723,8 @@ void wipe_caches(void) iwram_code_max = 0x7FFF; flush_translation_cache_ram(); flush_translation_cache_bios(); + /* Ensure 0 and FFFF get zeroed out */ + memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs)); } #define cache_dump_prefix "" diff --git a/frontend/menu.c b/frontend/menu.c index 3ee0a93..44bcd07 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -143,7 +143,7 @@ static int menu_loop_options(int id, int keys) me_loop(e_menu_options, &sel); if (prev_dynarec_enable != dynarec_enable) - wipe_caches(); + init_caches(); return 0; } diff --git a/gba_memory.c b/gba_memory.c index 4e74a9a..8d94ca5 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -3334,7 +3334,7 @@ void gba_load_state(const void* src) #ifdef HAVE_DYNAREC if (dynarec_enable) - wipe_caches(); + init_caches(); #endif reg[OAM_UPDATED] = 1; @@ -690,7 +690,7 @@ static void check_variables(int started_from_load) dynarec_enable = 1; if (dynarec_enable != prevvalue) - wipe_caches(); + init_caches(); } else dynarec_enable = 1; @@ -823,7 +823,7 @@ static void set_input_descriptors() static void set_memory_descriptors(void) { const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM; - struct retro_memory_descriptor desc[9] = { + struct retro_memory_descriptor desc[2] = { { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, { mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL }, }; @@ -114,7 +114,7 @@ void init_main(void) video_count = 960; #ifdef HAVE_DYNAREC - wipe_caches(); + init_caches(); init_emitter(); #endif } diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 818b724..12685e8 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2422,6 +2422,24 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_indirect_branch_cycle_update(dual); \ } \ +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_instruction(pc) \ + emit_save_regs(false); \ + generate_load_imm(reg_a0, pc); \ + genccall(&trace_instruction); \ + emit_restore_regs(false) + #define emit_trace_thumb_instruction(pc) emit_trace_instruction(pc) + #define emit_trace_arm_instruction(pc) emit_trace_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + #define thumb_swi() \ generate_swi_hle_handler(opcode & 0xFF); \ generate_load_pc(reg_a0, (pc + 2)); \ @@ -2932,13 +2950,14 @@ static void emit_palette_hdl( } mips_emit_addu(reg_rv, reg_rv, reg_base); - // Store the data (delay slot from the SMC branch) + // Store the data in real palette memory if (realsize == 2) { - mips_emit_sw(reg_a1, reg_base, 0x100); + mips_emit_sw(reg_a1, reg_rv, 0x100); } else if (realsize == 1) { - mips_emit_sh(reg_a1, reg_base, 0x100); + mips_emit_sh(reg_a1, reg_rv, 0x100); } + // Convert and store in mirror memory palette_convert(); mips_emit_sh(reg_temp, reg_rv, 0x500); @@ -2965,24 +2984,23 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { mips_emit_jr(mips_reg_ra); mips_emit_nop(); - // Region 8-B - tmemst[size][ 8] = tmemst[size][ 9] = - tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr; + // Region 9-C + tmemst[size][ 9] = tmemst[size][10] = + tmemst[size][11] = tmemst[size][12] = (u32)translation_ptr; - mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02 - mips_emit_xori(reg_temp, reg_temp, 0x02); + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_addiu(reg_temp, reg_temp, -9); + mips_emit_srl(reg_temp, reg_temp, 2); mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); mips_emit_nop(); mips_emit_jr(mips_reg_ra); mips_emit_nop(); - // Region C or F (or bigger!) - tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr; + // Region F or higher + tmemst[size][15] = (u32)translation_ptr; mips_emit_srl(reg_temp, reg_a0, 24); - mips_emit_sltiu(reg_rv, reg_temp, 0x0F); - mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store - mips_emit_xori(reg_rv, reg_temp, 0x0C); - mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Is < 15? + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(size)); mips_emit_nop(); mips_emit_jr(mips_reg_ra); mips_emit_nop(); @@ -2990,7 +3008,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { *tr_ptr = translation_ptr; } -// Stubs for regions with EEPROM or flash/SRAM +// Stubs for regions with EEPROM or flash/SRAM (also RTC) static void emit_saveaccess_stub(u8 **tr_ptr) { unsigned opt, i, strop; u8 *translation_ptr = *tr_ptr; @@ -3043,6 +3061,21 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { } } + // RTC writes, only for 16 bit accesses + for (strop = 0; strop <= 3; strop++) { + tmemst[strop][8] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x08); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + if (strop == 1) { + emit_mem_call(&write_rtc, 0xFF); // Addr + } else { + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); // Do nothing + mips_emit_nop(); + } + } + // Region 4 writes // I/O writes are also a bit special, they can trigger things like DMA, IRQs... // Also: aligned (strop==3) accesses do not trigger IRQs diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 62a7731..3c05f52 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -391,8 +391,7 @@ execute_read_spsr: # $4: Current pc execute_swi: - add $sp, $sp, -4 # push $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR collapse_flags # get cpsr in $2 sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR @@ -402,10 +401,10 @@ execute_swi: save_registers li $4, 3 # 3 is supervisor mode cfncall set_cpu_mode, 5 # set the CPU mode to supervisor + lw $ra, REG_SAVE3($16) restore_registers - lw $ra, ($sp) # pop $ra jr $ra # return - add $sp, $sp, 4 # fix stack (delay slot) + nop # $4: pc to restore to # returns in $4 @@ -420,15 +419,13 @@ execute_spsr_restore: lw $1, SPSR_BASE($2) # $1 = spsr[cpu_mode] sw $1, REG_CPSR($16) # cpsr = spsr[cpu_mode] extract_flags_body # extract flags from $1 - addiu $sp, $sp, -4 - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) save_registers cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function restore_registers - addu $4, $2, $0 # move return value to $4 - lw $ra, ($sp) + lw $ra, REG_SAVE3($16) jr $ra - addiu $sp, $sp, 4 + addu $4, $2, $0 # move return value to $4 no_spsr_restore: jr $ra diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 68930e1..ef79110 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -96,6 +96,7 @@ typedef enum x86_opcode_push_reg = 0x50, x86_opcode_push_rm = 0xFF, x86_opcode_push_imm = 0x0668, + x86_opcode_pop_reg = 0x58, x86_opcode_call_offset = 0xE8, x86_opcode_ret = 0xC3, x86_opcode_test_rm_imm = 0x00F7, @@ -266,6 +267,12 @@ typedef enum #define x86_emit_idiv_eax_reg(source) \ x86_emit_opcode_1b_ext_reg(idiv_eax_rm, source) \ +#define x86_emit_pop_reg(regn) \ + x86_emit_opcode_1b(pop_reg, regn) \ + +#define x86_emit_push_reg(regn) \ + x86_emit_opcode_1b(push_reg, regn) \ + #define x86_emit_push_mem(base, offset) \ x86_emit_opcode_1b_mem(push_rm, 0x06, base, offset) \ @@ -523,6 +530,28 @@ typedef enum generate_function_call(execute_##name##_##flags_op##_reg); \ generate_mov(ireg, rv) \ +#ifdef TRACE_INSTRUCTIONS + void function_cc trace_instruction(u32 pc) + { + printf("Executed %x\n", pc); + } + + #define emit_trace_thumb_instruction(pc) \ + x86_emit_push_reg(eax); \ + x86_emit_push_reg(ecx); \ + x86_emit_push_reg(edx); \ + x86_emit_mov_reg_imm(eax, pc); \ + generate_function_call(trace_instruction); \ + x86_emit_pop_reg(edx); \ + x86_emit_pop_reg(ecx); \ + x86_emit_pop_reg(eax); + #define emit_trace_arm_instruction(pc) \ + emit_trace_thumb_instruction(pc) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + u32 function_cc execute_lsl_no_flags_reg(u32 value, u32 shift) { if(shift != 0) |