summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorneonloop2021-05-05 17:12:36 +0000
committerneonloop2021-05-05 17:12:59 +0000
commit997d3f2183eb9e99a3a1578a0060cb2fcd2165a2 (patch)
tree5b2afae111495c75ddf5c2b8fd8e6699da50117e
parent3aa34f7503bbb03e9bcd191f8af4fd5faac66d2e (diff)
parent52088a4d10af9a8c0e95b0eb168d4dfd0a13639f (diff)
downloadpicogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.tar.gz
picogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.tar.bz2
picogpsp-997d3f2183eb9e99a3a1578a0060cb2fcd2165a2.zip
Merge remote-tracking branch 'libretro/master' into pico-fe
-rw-r--r--Makefile1
-rw-r--r--arm/arm_emit.h42
-rw-r--r--arm/arm_stub.S249
-rw-r--r--cpu.h3
-rw-r--r--cpu_threaded.c6
-rw-r--r--frontend/menu.c2
-rw-r--r--gba_memory.c2
-rw-r--r--libretro.c4
-rw-r--r--main.c2
-rw-r--r--psp/mips_emit.h63
-rw-r--r--psp/mips_stub.S15
-rw-r--r--x86/x86_emit.h29
12 files changed, 259 insertions, 159 deletions
diff --git a/Makefile b/Makefile
index 4a5806d..5d28045 100644
--- a/Makefile
+++ b/Makefile
@@ -434,6 +434,7 @@ ifeq ($(FORCE_32BIT_ARCH), 1)
fpic :=
endif
+# Add -DTRACE_INSTRUCTIONS to trace instruction execution
ifeq ($(DEBUG), 1)
OPTIMIZE_SAFE := -O0 -g
OPTIMIZE := -O0 -g
diff --git a/arm/arm_emit.h b/arm/arm_emit.h
index a5dc930..1432617 100644
--- a/arm/arm_emit.h
+++ b/arm/arm_emit.h
@@ -67,9 +67,10 @@ void execute_store_u32_safe(u32 address, u32 source);
#define reg_a1 ARMREG_R1
#define reg_a2 ARMREG_R2
+/* scratch0 is shared with flags, be careful! */
#define reg_s0 ARMREG_R9
-#define reg_base ARMREG_SP
-#define reg_flags ARMREG_R11
+#define reg_base ARMREG_R11
+#define reg_flags ARMREG_R9
#define reg_cycles ARMREG_R12
@@ -110,6 +111,7 @@ void execute_store_u32_safe(u32 address, u32 source);
#define reg_x5 ARMREG_R8
#define mem_reg (~0U)
+#define save1_reg 21
/*
@@ -1227,6 +1229,30 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
return 0;
}
+#ifdef TRACE_INSTRUCTIONS
+ void trace_instruction(u32 pc)
+ {
+ printf("Executed %x\n", pc);
+ }
+
+ #define emit_trace_instruction(pc) \
+ generate_save_flags(); \
+ ARM_LDR_IMM(0, ARMREG_SP, reg_base, 34*4); \
+ ARM_STMDB_WB(0, ARMREG_SP, 0x500C); \
+ arm_load_imm_32bit(reg_a0, pc); \
+ generate_function_call(trace_instruction); \
+ ARM_LDMIA_WB(0, ARMREG_SP, 0x500C); \
+ arm_load_imm_32bit(ARMREG_SP, (u32)reg); \
+ generate_restore_flags();
+ #define emit_trace_thumb_instruction(pc) \
+ emit_trace_instruction(pc)
+ #define emit_trace_arm_instruction(pc) \
+ emit_trace_instruction(pc)
+#else
+ #define emit_trace_thumb_instruction(pc)
+ #define emit_trace_arm_instruction(pc)
+#endif
+
#define arm_psr_load_new_reg() \
generate_load_reg(reg_a0, rm) \
@@ -1391,7 +1417,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
#define arm_block_memory_adjust_pc_load() \
if(reg_list & 0x8000) \
{ \
- generate_mov(reg_a0, reg_rv); \
generate_indirect_branch_arm(); \
} \
@@ -1439,12 +1464,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
arm_block_memory_offset_##offset_type(); \
arm_block_memory_writeback_##access_type(writeback_type); \
ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \
+ generate_store_reg(reg_s0, save1_reg); \
\
for(i = 0; i < 16; i++) \
{ \
if((reg_list >> i) & 0x01) \
{ \
cycle_count++; \
+ generate_load_reg(reg_s0, save1_reg); \
generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \
if(reg_list & ~((2 << i) - 1)) \
{ \
@@ -1469,12 +1496,12 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_load_reg(reg_a0, rn); \
generate_function_call(execute_load_##type); \
write32((pc + 8)); \
- generate_mov(reg_s0, reg_rv); \
+ generate_mov(reg_a2, reg_rv); \
generate_load_reg(reg_a0, rn); \
generate_load_reg(reg_a1, rm); \
+ generate_store_reg(reg_a2, rd); \
generate_function_call(execute_store_##type); \
write32((pc + 4)); \
- generate_store_reg(reg_s0, rd); \
} \
@@ -1705,13 +1732,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
#define thumb_block_memory_extra_down() \
#define thumb_block_memory_extra_pop_pc() \
+ generate_load_reg(reg_s0, save1_reg); \
generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \
generate_function_call(execute_load_u32); \
write32((pc + 4)); \
- generate_mov(reg_a0, reg_rv); \
generate_indirect_branch_cycle_update(thumb) \
#define thumb_block_memory_extra_push_lr(base_reg) \
+ generate_load_reg(reg_s0, save1_reg); \
generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \
generate_load_reg(reg_a1, REG_LR); \
generate_function_call(execute_store_u32_safe) \
@@ -1758,12 +1786,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
ARM_BIC_REG_IMM(0, reg_s0, reg_s0, 0x03, 0); \
thumb_block_address_preadjust_##pre_op(); \
thumb_block_address_postadjust_##post_op(base_reg); \
+ generate_store_reg(reg_s0, save1_reg); \
\
for(i = 0; i < 8; i++) \
{ \
if((reg_list >> i) & 0x01) \
{ \
cycle_count++; \
+ generate_load_reg(reg_s0, save1_reg); \
generate_add_reg_reg_imm(reg_a0, reg_s0, offset, 0); \
if(reg_list & ~((2 << i) - 1)) \
{ \
diff --git a/arm/arm_stub.S b/arm/arm_stub.S
index 723c185..e0f02f4 100644
--- a/arm/arm_stub.S
+++ b/arm/arm_stub.S
@@ -2,6 +2,7 @@
#include "../gpsp_config.h"
#define defsymbl(symbol) \
+.type symbol, %function ;\
.global symbol ; \
.global _##symbol ; \
symbol: \
@@ -44,15 +45,14 @@ _##symbol:
#define CHANGED_PC_STATUS (31 * 4)
#define COMPLETED_FRAME (32 * 4)
#define OAM_UPDATED (33 * 4)
-#define MAIN_THREAD_SP (34 * 4)
#define reg_a0 r0
#define reg_a1 r1
#define reg_a2 r2
#define reg_s0 r9
-#define reg_base sp
-#define reg_flags r11
+#define reg_base r11
+#define reg_flags r9
#define reg_cycles r12
@@ -150,12 +150,85 @@ _##symbol:
@ registers which are important to the dynarec.
#define call_c_function(function) ;\
- ldr sp, [reg_base, #MAIN_THREAD_SP] ;\
stmdb sp!, { call_c_saved_regs } ;\
bl function ;\
ldmia sp!, { call_c_saved_regs } ;\
- ldr sp, =reg ;\
+@ Jumps to PC (ARM or Thumb modes)
+@ This is really two functions/routines in one
+@ r0 contains the PC
+
+.align 2
+#define execute_pc_builder(mode, align) ;\
+defsymbl(arm_indirect_branch_##mode) ;\
+ save_flags() ;\
+execute_pc_##mode: ;\
+ bic r0, r0, #(align) /* Align PC */;\
+ mov r1, r0, lsr #24 /* Get region */;\
+ ldr pc, [pc, r1, lsl #2] ;\
+ nop ;\
+ .long 3f /* 0 BIOS (like ROM) */;\
+ .long 3f /* 1 Bad region */;\
+ .long 1f /* 2 EWRAM */;\
+ .long 2f /* 3 IWRAM */;\
+ .long 3f /* 4 Not supported */;\
+ .long 3f /* 5 Not supported */;\
+ .long 3f /* 6 Not supported */;\
+ .long 3f /* 7 Not supported */;\
+ .long 3f /* 8 ROM */;\
+ .long 3f /* 9 ROM */;\
+ .long 3f /* A ROM */;\
+ .long 3f /* B ROM */;\
+ .long 3f /* C ROM */;\
+ .long 3f /* D ROM */;\
+ .long 3f /* E ROM */;\
+ .long 3f /* F Bad region */;\
+ ;\
+3: ;\
+ call_c_function(block_lookup_address_##mode) ;\
+ restore_flags() ;\
+ bx r0 ;\
+1: ;\
+ ldr r1, =(ewram+0x40000) /* Load base addr */;\
+ mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\
+ mov r2, r2, lsr #14 ;\
+ ldrh r2, [r1, r2] /* Load half word there */;\
+ ldr r1, =(ram_block_ptrs) ;\
+ ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
+ cmp r1, #0 /* NULL means not translated */;\
+ beq 3b /* Need to translate */;\
+ restore_flags() ;\
+ bx r1 ;\
+2: ;\
+ ldr r1, =(iwram) /* Load base addr */;\
+ mov r2, r0, lsl #17 /* addr &= 0x7fff */;\
+ mov r2, r2, lsr #17 ;\
+ ldrh r2, [r1, r2] /* Load half word there */;\
+ ldr r1, =(ram_block_ptrs) ;\
+ ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
+ cmp r1, #0 /* NULL means not translated */;\
+ beq 3b /* Need to translate */;\
+ restore_flags() ;\
+ bx r1 ;\
+.size arm_indirect_branch_##mode, .-arm_indirect_branch_##mode
+
+execute_pc_builder(arm, 0x3)
+execute_pc_builder(thumb, 0x1)
+
+@ Resumes execution from saved PC, in any mode
+
+execute_pc:
+ ldr r0, [reg_base, #REG_PC] @ load new PC
+ ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
+ tst r1, #0x20 @ see if Thumb bit is set
+ bne 2f
+
+ load_registers_arm()
+ b execute_pc_arm
+
+2:
+ load_registers_thumb()
+ b execute_pc_thumb
@ Update the GBA hardware (video, sound, input, etc)
@@ -201,29 +274,12 @@ wait_halt_##name: ;\
;\
ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\
cmp r0, #0 /* see if PC has changed */;\
- beq 1f /* if not return */;\
+ bne execute_pc /* go jump/translate */;\
;\
- ldr r0, [reg_base, #REG_PC] /* load new PC */;\
- ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\
- tst r1, #0x20 /* see if Thumb bit is set */;\
- bne 2f /* if so load Thumb PC */;\
- ;\
- load_registers_arm() /* load ARM regs */;\
- call_c_function(block_lookup_address_arm) ;\
- restore_flags() ;\
- bx r0 /* jump to new ARM block */;\
- ;\
-1: ;\
load_registers_##mode() /* reload registers */;\
restore_flags() ;\
- return_##return_op() ;\
- ;\
-2: ;\
- load_registers_thumb() /* load Thumb regs */;\
- call_c_function(block_lookup_address_thumb) ;\
- restore_flags() ;\
- bx r0 /* jump to new ARM block */;\
-
+ return_##return_op() /* continue, no PC change */;\
+.size arm_update_gba_##mode, .-arm_update_gba_##mode
arm_update_gba_builder(arm, arm, straight)
arm_update_gba_builder(thumb, thumb, straight)
@@ -240,59 +296,33 @@ arm_update_gba_builder(idle_thumb, thumb, add)
@ r0: PC to branch to
.align 2
-defsymbl(arm_indirect_branch_arm)
- save_flags()
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0
-
-.align 2
-defsymbl(arm_indirect_branch_thumb)
- save_flags()
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0
-
-.align 2
defsymbl(arm_indirect_branch_dual_arm)
save_flags()
tst r0, #0x01 @ check lower bit
- bne 1f @ if set going to Thumb mode
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ return
+ beq execute_pc_arm @ Keep executing ARM code
-1:
- bic r0, r0, #0x01
+ bic r0, r0, #0x01 @ Switch to Thumb mode
store_registers_arm() @ save out ARM registers
load_registers_thumb() @ load in Thumb registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
orr r1, r1, #0x20 @ set Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ return
+ b execute_pc_thumb @ Now execute Thumb
+.size arm_indirect_branch_dual_arm, .-arm_indirect_branch_dual_arm
.align 2
defsymbl(arm_indirect_branch_dual_thumb)
save_flags()
tst r0, #0x01 @ check lower bit
- beq 1f @ if set going to ARM mode
- bic r0, r0, #0x01
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ return
+ bne execute_pc_thumb @ Keep executing Thumb mode
-1:
store_registers_thumb() @ save out Thumb registers
load_registers_arm() @ load in ARM registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
bic r1, r1, #0x20 @ clear Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ return
-
+ b execute_pc_arm @ Now execute ARM
+.size arm_indirect_branch_dual_thumb, .-arm_indirect_branch_dual_thumb
@ Update the cpsr.
@@ -319,15 +349,12 @@ defsymbl(execute_store_cpsr)
cmp r0, #0 @ check new PC
beq 1f @ if it's zero, return
- call_c_function(block_lookup_address_arm)
-
- restore_flags()
- bx r0 @ return to new ARM address
+ b execute_pc_arm
1:
restore_flags()
add pc, lr, #4 @ return
-
+.size execute_store_cpsr, .-execute_store_cpsr
@ Update the current spsr.
@@ -341,6 +368,7 @@ defsymbl(execute_store_spsr)
ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE
str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr
bx lr
+.size execute_store_spsr, .-execute_store_spsr
@ Read the current spsr.
@@ -353,7 +381,7 @@ defsymbl(execute_read_spsr)
ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE
ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE]
bx lr @ return
-
+.size execute_read_spsr, .-execute_read_spsr
@ Restore the cpsr from the mode spsr and mode shift.
@@ -378,16 +406,11 @@ defsymbl(execute_spsr_restore)
bne 2f @ if so handle it
load_registers_arm() @ restore ARM registers
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0
+ b execute_pc_arm
2:
load_registers_thumb() @ load Thumb registers
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0
-
+ b execute_pc_thumb
@ Setup the mode transition work for calling an SWI.
@@ -459,9 +482,7 @@ defsymbl(execute_arm_translate)
@ save the registers to be able to return later
stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr }
- ldr r1, =reg @ reg to r1
- str sp, [r1, #MAIN_THREAD_SP] @ store the current sp
- ldr sp, =reg @ reg_base = sp (loading addr)
+ ldr reg_base, =reg @ init base_reg
mvn reg_cycles, r0 @ load cycle counter
@@ -491,8 +512,6 @@ defsymbl(execute_arm_translate)
@ Epilogue to return to the main thread (whatever called execute_arm_translate)
return_to_main:
- @ restore the stack pointer
- ldr sp, [reg_base, #MAIN_THREAD_SP]
@ restore the saved regs and return
ldmia sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr }
bx lr
@@ -540,19 +559,18 @@ return_to_main:
@ The instruction at LR is not an inst but a u32 data that contains the PC
@ Used for SMC. That's why return is essentially `pc = lr + 4`
-#define execute_store_body(store_type) ;\
+#define execute_store_body(store_type, tblnum) ;\
save_flags() ;\
str lr, [reg_base, #REG_SAVE3] /* save lr */;\
- str r4, [reg_base, #REG_SAVE2] /* save r4 */;\
- tst r0, #0xF0000000 /* make sure address is in range */;\
- bne ext_store_u##store_type /* if not do ext store */;\
;\
- ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\
- mov r4, r0, lsr #24 /* r4 = region number */;\
- ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\
- ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\
- bx lr /* jump to handler */;\
+ mov lr, r0, lsr #24 /* lr = region number */;\
+ cmp lr, #15 ;\
+ movcs lr, #15 /* lr = min(lr, 15) */;\
;\
+ add lr, lr, #(16*tblnum + 64) /* lr += table offset */;\
+ ldr pc, [reg_base, lr, lsl #2] /* jump to handler */;\
+
+#define store_fnptr_table(store_type) ;\
ptr_tbl_##store_type: ;\
.word ext_store_ignore /* 0x00: BIOS, ignore */;\
.word ext_store_ignore /* 0x01: ignore */;\
@@ -578,11 +596,11 @@ ext_store_ignore:
add pc, lr, #4 @ return
-#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\
+#define execute_store_builder(store_type, store_op, store_op16, load_op, tn) ;\
;\
.align 2 ;\
defsymbl(execute_store_u##store_type) ;\
- execute_store_body(store_type) ;\
+ execute_store_body(store_type, tn) ;\
;\
ext_store_u##store_type: ;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
@@ -628,7 +646,7 @@ ext_store_vram_u##store_type: ;\
;\
ext_store_oam_ram_u##store_type: ;\
mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\
- add r2, reg_base, #256 /* r2 = oam ram base */;\
+ sub r2, reg_base, #0x400 /* r2 = oam ram base */;\
store_op16 r1, [r0, r2] /* store data */;\
str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
@@ -640,16 +658,16 @@ ext_store_oam_ram_u##store_type: ;\
ldr r0, [lr] /* load PC */;\
str r0, [reg_base, #REG_PC] /* write out PC */;\
b smc_write /* perform smc write */;\
+.size execute_store_u##store_type, .-execute_store_u##store_type
-
-execute_store_builder(8, strb, strh, ldrb)
-execute_store_builder(16, strh, strh, ldrh)
-execute_store_builder(32, str, str, ldr)
+execute_store_builder(8, strb, strh, ldrb, 0)
+execute_store_builder(16, strh, strh, ldrh, 1)
+execute_store_builder(32, str, str, ldr, 2)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
defsymbl(execute_store_u32_safe)
- execute_store_body(32_safe)
+ execute_store_body(32_safe, 3)
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
@@ -684,11 +702,12 @@ ext_store_vram_u32_safe:
ext_store_oam_ram_u32_safe:
mask_addr_8(10) @ Mask to mirror memory (no need to align!)
- add r2, reg_base, #256 @ r2 = oam ram base
+ sub r2, reg_base, #0x400 @ r2 = oam ram base
str r1, [r0, r2] @ store data
str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
+.size execute_store_u32_safe, .-execute_store_u32_safe
write_epilogue:
cmp r0, #0 @ check if the write rose an alert
@@ -721,21 +740,7 @@ alert_loop:
bne alert_loop @ Keep looping until it is
mvn reg_cycles, r0 @ load new cycle count
- ldr r0, [reg_base, #REG_PC] @ load new PC
- ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
- tst r1, #0x20 @ see if Thumb bit is set
- bne 2f
-
- load_registers_arm()
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ jump to new ARM block
-
-2:
- load_registers_thumb()
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ jump to new Thumb block
+ b execute_pc @ restart execution at PC
4:
restore_flags()
@@ -749,17 +754,8 @@ lookup_pc:
ldr r0, [reg_base, #REG_PC] @ r0 = new pc
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
tst r1, #0x20 @ see if Thumb bit is set
- beq lookup_pc_arm @ if not lookup ARM
-
-lookup_pc_thumb:
- call_c_function(block_lookup_address_thumb)
- restore_flags()
- bx r0 @ jump to new Thumb block
-
-lookup_pc_arm:
- call_c_function(block_lookup_address_arm)
- restore_flags()
- bx r0 @ jump to new ARM block
+ beq execute_pc_arm @ if not lookup ARM
+ b execute_pc_thumb
#define sign_extend_u8(reg)
@@ -822,6 +818,7 @@ ext_load_##load_type: ;\
sign_extend_##load_type(r0) /* sign extend result */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
+.size execute_load_##load_type, .-execute_load_##load_type
.pool
@@ -844,10 +841,15 @@ defsymbl(spsr)
defsymbl(reg_mode)
.space 196
-defsymbl(reg)
- .space 0x100, 0
defsymbl(oam_ram)
.space 0x400
+defsymbl(reg)
+ .space 0x100, 0
+@ Store pointer tables down here
+store_fnptr_table(8)
+store_fnptr_table(16)
+store_fnptr_table(32)
+store_fnptr_table(32_safe)
@ Vita and 3DS (and of course mmap) map their own cache sections through some
@ platform-speficic mechanisms.
@@ -865,10 +867,13 @@ defsymbl(oam_ram)
.align 4
defsymbl(rom_translation_cache)
.space ROM_TRANSLATION_CACHE_SIZE
+.size rom_translation_cache, .-rom_translation_cache
defsymbl(ram_translation_cache)
.space RAM_TRANSLATION_CACHE_SIZE
+.size ram_translation_cache, .-ram_translation_cache
defsymbl(bios_translation_cache)
.space BIOS_TRANSLATION_CACHE_SIZE
+.size bios_translation_cache, .-bios_translation_cache
#endif
diff --git a/cpu.h b/cpu.h
index 9868866..2d252d1 100644
--- a/cpu.h
+++ b/cpu.h
@@ -165,7 +165,8 @@ void flush_translation_cache_rom(void);
void flush_translation_cache_ram(void);
void flush_translation_cache_bios(void);
void dump_translation_cache(void);
-void wipe_caches(void);
+void init_caches(void);
+void init_emitter(void);
extern u32 reg_mode[7][7];
extern u32 spsr[6];
diff --git a/cpu_threaded.c b/cpu_threaded.c
index 8b7dc0e..832e212 100644
--- a/cpu_threaded.c
+++ b/cpu_threaded.c
@@ -276,6 +276,7 @@ void translate_icache_sync() {
check_pc_region(pc); \
opcode = address32(pc_address_block, (pc & 0x7FFF)); \
condition = block_data[block_data_position].condition; \
+ emit_trace_arm_instruction(pc); \
\
if((condition != last_condition) || (condition >= 0x20)) \
{ \
@@ -1715,6 +1716,7 @@ void translate_icache_sync() {
check_pc_region(pc); \
last_opcode = opcode; \
opcode = address16(pc_address_block, (pc & 0x7FFF)); \
+ emit_trace_thumb_instruction(pc); \
\
switch((opcode >> 8) & 0xFF) \
{ \
@@ -3711,7 +3713,7 @@ void flush_translation_cache_bios(void)
memset(bios_rom + 0x4000, 0, 0x4000);
}
-void wipe_caches(void)
+void init_caches(void)
{
/* Ensure we wipe everything including the SMC mirrors */
flush_translation_cache_rom();
@@ -3721,6 +3723,8 @@ void wipe_caches(void)
iwram_code_max = 0x7FFF;
flush_translation_cache_ram();
flush_translation_cache_bios();
+ /* Ensure 0 and FFFF get zeroed out */
+ memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs));
}
#define cache_dump_prefix ""
diff --git a/frontend/menu.c b/frontend/menu.c
index 3ee0a93..44bcd07 100644
--- a/frontend/menu.c
+++ b/frontend/menu.c
@@ -143,7 +143,7 @@ static int menu_loop_options(int id, int keys)
me_loop(e_menu_options, &sel);
if (prev_dynarec_enable != dynarec_enable)
- wipe_caches();
+ init_caches();
return 0;
}
diff --git a/gba_memory.c b/gba_memory.c
index 4e74a9a..8d94ca5 100644
--- a/gba_memory.c
+++ b/gba_memory.c
@@ -3334,7 +3334,7 @@ void gba_load_state(const void* src)
#ifdef HAVE_DYNAREC
if (dynarec_enable)
- wipe_caches();
+ init_caches();
#endif
reg[OAM_UPDATED] = 1;
diff --git a/libretro.c b/libretro.c
index 9bd17fa..f74c0cd 100644
--- a/libretro.c
+++ b/libretro.c
@@ -690,7 +690,7 @@ static void check_variables(int started_from_load)
dynarec_enable = 1;
if (dynarec_enable != prevvalue)
- wipe_caches();
+ init_caches();
}
else
dynarec_enable = 1;
@@ -823,7 +823,7 @@ static void set_input_descriptors()
static void set_memory_descriptors(void)
{
const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM;
- struct retro_memory_descriptor desc[9] = {
+ struct retro_memory_descriptor desc[2] = {
{ mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL },
};
diff --git a/main.c b/main.c
index 2a82338..759aa94 100644
--- a/main.c
+++ b/main.c
@@ -114,7 +114,7 @@ void init_main(void)
video_count = 960;
#ifdef HAVE_DYNAREC
- wipe_caches();
+ init_caches();
init_emitter();
#endif
}
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index 818b724..12685e8 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -2422,6 +2422,24 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_indirect_branch_cycle_update(dual); \
} \
+#ifdef TRACE_INSTRUCTIONS
+ void trace_instruction(u32 pc)
+ {
+ printf("Executed %x\n", pc);
+ }
+
+ #define emit_trace_instruction(pc) \
+ emit_save_regs(false); \
+ generate_load_imm(reg_a0, pc); \
+ genccall(&trace_instruction); \
+ emit_restore_regs(false)
+ #define emit_trace_thumb_instruction(pc) emit_trace_instruction(pc)
+ #define emit_trace_arm_instruction(pc) emit_trace_instruction(pc)
+#else
+ #define emit_trace_thumb_instruction(pc)
+ #define emit_trace_arm_instruction(pc)
+#endif
+
#define thumb_swi() \
generate_swi_hle_handler(opcode & 0xFF); \
generate_load_pc(reg_a0, (pc + 2)); \
@@ -2932,13 +2950,14 @@ static void emit_palette_hdl(
}
mips_emit_addu(reg_rv, reg_rv, reg_base);
- // Store the data (delay slot from the SMC branch)
+ // Store the data in real palette memory
if (realsize == 2) {
- mips_emit_sw(reg_a1, reg_base, 0x100);
+ mips_emit_sw(reg_a1, reg_rv, 0x100);
} else if (realsize == 1) {
- mips_emit_sh(reg_a1, reg_base, 0x100);
+ mips_emit_sh(reg_a1, reg_rv, 0x100);
}
+ // Convert and store in mirror memory
palette_convert();
mips_emit_sh(reg_temp, reg_rv, 0x500);
@@ -2965,24 +2984,23 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
- // Region 8-B
- tmemst[size][ 8] = tmemst[size][ 9] =
- tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr;
+ // Region 9-C
+ tmemst[size][ 9] = tmemst[size][10] =
+ tmemst[size][11] = tmemst[size][12] = (u32)translation_ptr;
- mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02
- mips_emit_xori(reg_temp, reg_temp, 0x02);
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_addiu(reg_temp, reg_temp, -9);
+ mips_emit_srl(reg_temp, reg_temp, 2);
mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
mips_emit_nop();
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
- // Region C or F (or bigger!)
- tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr;
+ // Region F or higher
+ tmemst[size][15] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
- mips_emit_sltiu(reg_rv, reg_temp, 0x0F);
- mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store
- mips_emit_xori(reg_rv, reg_temp, 0x0C);
- mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
+ mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Is < 15?
+ mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(size));
mips_emit_nop();
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
@@ -2990,7 +3008,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
*tr_ptr = translation_ptr;
}
-// Stubs for regions with EEPROM or flash/SRAM
+// Stubs for regions with EEPROM or flash/SRAM (also RTC)
static void emit_saveaccess_stub(u8 **tr_ptr) {
unsigned opt, i, strop;
u8 *translation_ptr = *tr_ptr;
@@ -3043,6 +3061,21 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
}
}
+ // RTC writes, only for 16 bit accesses
+ for (strop = 0; strop <= 3; strop++) {
+ tmemst[strop][8] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_rv, reg_temp, 0x08);
+ mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop));
+ if (strop == 1) {
+ emit_mem_call(&write_rtc, 0xFF); // Addr
+ } else {
+ mips_emit_nop();
+ mips_emit_jr(mips_reg_ra); // Do nothing
+ mips_emit_nop();
+ }
+ }
+
// Region 4 writes
// I/O writes are also a bit special, they can trigger things like DMA, IRQs...
// Also: aligned (strop==3) accesses do not trigger IRQs
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 62a7731..3c05f52 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -391,8 +391,7 @@ execute_read_spsr:
# $4: Current pc
execute_swi:
- add $sp, $sp, -4 # push $ra
- sw $ra, ($sp)
+ sw $ra, REG_SAVE3($16)
sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR
collapse_flags # get cpsr in $2
sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR
@@ -402,10 +401,10 @@ execute_swi:
save_registers
li $4, 3 # 3 is supervisor mode
cfncall set_cpu_mode, 5 # set the CPU mode to supervisor
+ lw $ra, REG_SAVE3($16)
restore_registers
- lw $ra, ($sp) # pop $ra
jr $ra # return
- add $sp, $sp, 4 # fix stack (delay slot)
+ nop
# $4: pc to restore to
# returns in $4
@@ -420,15 +419,13 @@ execute_spsr_restore:
lw $1, SPSR_BASE($2) # $1 = spsr[cpu_mode]
sw $1, REG_CPSR($16) # cpsr = spsr[cpu_mode]
extract_flags_body # extract flags from $1
- addiu $sp, $sp, -4
- sw $ra, ($sp)
+ sw $ra, REG_SAVE3($16)
save_registers
cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function
restore_registers
- addu $4, $2, $0 # move return value to $4
- lw $ra, ($sp)
+ lw $ra, REG_SAVE3($16)
jr $ra
- addiu $sp, $sp, 4
+ addu $4, $2, $0 # move return value to $4
no_spsr_restore:
jr $ra
diff --git a/x86/x86_emit.h b/x86/x86_emit.h
index 68930e1..ef79110 100644
--- a/x86/x86_emit.h
+++ b/x86/x86_emit.h
@@ -96,6 +96,7 @@ typedef enum
x86_opcode_push_reg = 0x50,
x86_opcode_push_rm = 0xFF,
x86_opcode_push_imm = 0x0668,
+ x86_opcode_pop_reg = 0x58,
x86_opcode_call_offset = 0xE8,
x86_opcode_ret = 0xC3,
x86_opcode_test_rm_imm = 0x00F7,
@@ -266,6 +267,12 @@ typedef enum
#define x86_emit_idiv_eax_reg(source) \
x86_emit_opcode_1b_ext_reg(idiv_eax_rm, source) \
+#define x86_emit_pop_reg(regn) \
+ x86_emit_opcode_1b(pop_reg, regn) \
+
+#define x86_emit_push_reg(regn) \
+ x86_emit_opcode_1b(push_reg, regn) \
+
#define x86_emit_push_mem(base, offset) \
x86_emit_opcode_1b_mem(push_rm, 0x06, base, offset) \
@@ -523,6 +530,28 @@ typedef enum
generate_function_call(execute_##name##_##flags_op##_reg); \
generate_mov(ireg, rv) \
+#ifdef TRACE_INSTRUCTIONS
+ void function_cc trace_instruction(u32 pc)
+ {
+ printf("Executed %x\n", pc);
+ }
+
+ #define emit_trace_thumb_instruction(pc) \
+ x86_emit_push_reg(eax); \
+ x86_emit_push_reg(ecx); \
+ x86_emit_push_reg(edx); \
+ x86_emit_mov_reg_imm(eax, pc); \
+ generate_function_call(trace_instruction); \
+ x86_emit_pop_reg(edx); \
+ x86_emit_pop_reg(ecx); \
+ x86_emit_pop_reg(eax);
+ #define emit_trace_arm_instruction(pc) \
+ emit_trace_thumb_instruction(pc)
+#else
+ #define emit_trace_thumb_instruction(pc)
+ #define emit_trace_arm_instruction(pc)
+#endif
+
u32 function_cc execute_lsl_no_flags_reg(u32 value, u32 shift)
{
if(shift != 0)