From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- arm/arm_stub.S | 168 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 39 deletions(-) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index e8f7316..f5fceb0 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -4,7 +4,6 @@ .globl invalidate_cache_region .globl memory_map_read -.globl memory_map_write .globl reg .globl palette_ram .globl palette_ram_converted @@ -533,40 +532,84 @@ return_to_main: bx lr +#define store_align_8() ;\ + and r1, r1, #0xff ;\ + +#define store_align_16() ;\ + bic r0, r0, #0x01 ;\ + extract_u16(r1, r1) ;\ + +#define store_align_32() ;\ + bic r0, r0, #0x03 ;\ + +#define mask_addr_8(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits) /* high bits are now clear */;\ + +#define mask_addr_16(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 1) /* high bits are now clear */;\ + mov r0, r0, lsl #1 /* LSB is also zero */;\ + +#define mask_addr_32(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 2) /* high bits are now clear */;\ + mov r0, r0, lsl #2 /* 2 LSB are also zero */;\ + +@ Vram, OAM and palette memories can only be accessed at a 16 bit boundary +#define mask_addr_bus16_32(nbits) mask_addr_32(nbits) +#define mask_addr_bus16_16(nbits) mask_addr_16(nbits) +#define mask_addr_bus16_8(nbits) \ + mask_addr_16(nbits) \ + extract_u16(r1, r1) + + @ Write out to memory. @ Input: @ r0: address @ r1: value @ r2: current pc +@ +@ The instruction at LR is not an inst but a u32 data that contains the PC +@ Used for SMC. That's why return is essentially `pc = lr + 4` #define execute_store_body(store_type, store_op) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ + str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ tst r0, #0xF0000000 /* make sure address is in range */;\ bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr r2, =memory_map_write /* r2 = memory_map_write */;\ - mov lr, r0, lsr #15 /* lr = page index of address */;\ - ldr r2, [r2, lr, lsl #2] /* r2 = memory page */;\ - ;\ - cmp r2, #0 /* see if map is ext */;\ - beq ext_store_u##store_type /* if so do ext store */;\ + ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ + mov r4, r0, lsr #24 /* r4 = region number */;\ + ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ + ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ + bx lr /* jump to handler */;\ ;\ - mov r0, r0, lsl #17 /* isolate bottom 15 bits in top */;\ - mov r0, r0, lsr #17 /* like performing and 0x7FFF */;\ - store_op r1, [r2, r0] /* store result */;\ - - -#define store_align_8() ;\ - and r1, r1, #0xff ;\ - -#define store_align_16() ;\ - bic r0, r0, #0x01 ;\ - extract_u16(r1, r1) ;\ - -#define store_align_32() ;\ - bic r0, r0, #0x03 ;\ +ptr_tbl_##store_type: ;\ + .word ext_store_ignore /* 0x00: BIOS, ignore */;\ + .word ext_store_ignore /* 0x01: ignore */;\ + .word ext_store_ewram_u##store_type /* 0x02: ewram */;\ + .word ext_store_iwram_u##store_type /* 0x03: iwram */;\ + .word ext_store_u##store_type /* 0x04: I/O regs */;\ + .word ext_store_u##store_type /* 0x05: palette RAM */;\ + .word ext_store_vram_u##store_type /* 0x06: vram */;\ + .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0B: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0C: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0D: EEPROM */;\ + .word ext_store_u##store_type /* 0x0E: backup */;\ + .word ext_store_ignore /* 0x0F: ignore */;\ + +@ for ignored areas, just return +ext_store_ignore: + ldr lr, [reg_base, #REG_SAVE3] @ pop lr off of stack + restore_flags() + add pc, lr, #4 @ return #define execute_store_builder(store_type, store_op, load_op) ;\ @@ -577,20 +620,6 @@ return_to_main: execute_store_u##store_type: ;\ _execute_store_u##store_type: ;\ execute_store_body(store_type, store_op) ;\ - sub r2, r2, #0x8000 /* Pointer to code status data */;\ - load_op r0, [r2, r0] /* check code flag */;\ - ;\ - cmp r0, #0 /* see if it's not 0 */;\ - bne 2f /* if so perform smc write */;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - restore_flags() ;\ - add pc, lr, #4 /* return */;\ - ;\ -2: ;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - ldr r0, [lr] /* load PC */;\ - str r0, [reg_base, #REG_PC] /* write out PC */;\ - b smc_write /* perform smc write */;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -599,11 +628,53 @@ ext_store_u##store_type: ;\ store_align_##store_type() ;\ call_c_function(write_memory##store_type) ;\ b write_epilogue /* handle additional write stuff */;\ + ;\ +ext_store_iwram_u##store_type: ;\ + mask_addr_##store_type(15) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(iwram+0x8000) /* r2 = iwram base */;\ + store_op r1, [r0, r2] /* store data */;\ + sub r2, r2, #0x8000 /* r2 = iwram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_ewram_u##store_type: ;\ + mask_addr_##store_type(18) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(ewram) /* r2 = ewram base */;\ + store_op r1, [r0, r2] /* store data */;\ + add r2, r2, #0x40000 /* r2 = ewram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_vram_u##store_type: ;\ + mask_addr_bus16_##store_type(17) /* Mask to mirror memory (+align)*/;\ + cmp r0, #0x18000 /* Check if exceeds 96KB */;\ + subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ + ldr r2, =(vram) /* r2 = vram base */;\ + store_op r1, [r0, r2] /* store data */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +3: ;\ + ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ + ldr r0, [lr] /* load PC */;\ + str r0, [reg_base, #REG_PC] /* write out PC */;\ + b smc_write /* perform smc write */;\ + execute_store_builder(8, strb, ldrb) execute_store_builder(16, strh, ldrh) execute_store_builder(32, str, ldr) +@ This is a store that is executed in a strm case (so no SMC checks in-between) .globl execute_store_u32_safe .globl _execute_store_u32_safe @@ -619,6 +690,28 @@ ext_store_u32_safe: restore_flags() bx lr @ Return +ext_store_iwram_u32_safe: + mask_addr_8(15) @ Mask to mirror memory (no need to align!) + ldr r2, =(iwram+0x8000) @ r2 = iwram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_ewram_u32_safe: + mask_addr_8(18) @ Mask to mirror memory (no need to align!) + ldr r2, =(ewram) @ r2 = ewram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_vram_u32_safe: + mask_addr_8(17) @ Mask to mirror memory (no need to align!) + ldr r2, =(vram) @ r2 = vram base + cmp r0, #0x18000 @ Check if exceeds 96KB + subcs r0, r0, #0x8000 @ Mirror to the last bank + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -756,6 +849,7 @@ ext_load_##load_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.pool execute_load_builder(u8, 8, ldrneb, #0xF0000000) execute_load_builder(s8, 8, ldrnesb, #0xF0000000) @@ -763,14 +857,10 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001) execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001) execute_load_builder(u32, 32, ldrne, #0xF0000000) -.pool - .data memory_map_read: .space 0x8000 -memory_map_write: - .space 0x8000 palette_ram: .space 0x400 palette_ram_converted: -- cgit v1.2.3 From ff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:47:51 +0100 Subject: Move caches to stub files to get around gcc 10 Seems that using the __atribute__ magic for sections is not the best way of doing this, since it injects some default atributtes that collide with the user defined ones. Using assembly is far easier in this case. Reworked definitions a bit to make it easier to import from assembly. Also wrapped stuff around macros for easy and less verbose implementation of the symbol prefix issue. --- arm/arm_stub.S | 128 +++++++++++++++++++++++---------------------------------- 1 file changed, 51 insertions(+), 77 deletions(-) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f5fceb0..f0b7f52 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -1,14 +1,14 @@ -.align 2 -.globl invalidate_icache_region -.globl invalidate_cache_region +#include "../gpsp_config.h" + +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: -.globl memory_map_read -.globl reg -.globl palette_ram -.globl palette_ram_converted -.globl reg_mode -.globl spsr +.text +.align 2 #define REG_R0 (0 * 4) #define REG_R1 (1 * 4) @@ -178,10 +178,7 @@ #define arm_update_gba_builder(name, mode, return_op) ;\ ;\ .align 2 ;\ -.globl arm_update_gba_##name ;\ -.globl _arm_update_gba_##name ;\ -arm_update_gba_##name: ;\ -_arm_update_gba_##name: ;\ +defsymbl(arm_update_gba_##name) ;\ load_pc_##return_op() ;\ str r0, [reg_base, #REG_PC] /* write out the PC */;\ ;\ @@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ r0: PC to branch to .align 2 -.globl arm_indirect_branch_arm -.globl _arm_indirect_branch_arm -arm_indirect_branch_arm: -_arm_indirect_branch_arm: +defsymbl(arm_indirect_branch_arm) save_flags() call_c_function(block_lookup_address_arm) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_thumb -.globl _arm_indirect_branch_thumb -arm_indirect_branch_thumb: -_arm_indirect_branch_thumb: +defsymbl(arm_indirect_branch_thumb) save_flags() call_c_function(block_lookup_address_thumb) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_dual_arm -.globl _arm_indirect_branch_dual_arm -arm_indirect_branch_dual_arm: -_arm_indirect_branch_dual_arm: +defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit bne 1f @ if set going to Thumb mode @@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm: bx r0 @ return .align 2 -.globl arm_indirect_branch_dual_thumb -.globl _arm_indirect_branch_dual_thumb -arm_indirect_branch_dual_thumb: -_arm_indirect_branch_dual_thumb: +defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit beq 1f @ if set going to ARM mode @@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb: @ r2: current PC .align 2 -.globl execute_store_cpsr -.globl _execute_store_cpsr -execute_store_cpsr: -_execute_store_cpsr: +defsymbl(execute_store_cpsr) save_flags() and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr @@ -354,10 +336,7 @@ _execute_store_cpsr: @ r1: bitmask of which bits in spsr to update .align 2 -.globl execute_store_spsr -.globl _execute_store_spsr -execute_store_spsr: -_execute_store_spsr: +defsymbl(execute_store_spsr) ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr @@ -369,10 +348,7 @@ _execute_store_spsr: @ r0: spsr .align 2 -.globl execute_read_spsr -.globl _execute_read_spsr -execute_read_spsr: -_execute_read_spsr: +defsymbl(execute_read_spsr) ldr r0, =spsr @ r0 = spsr ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] @@ -385,10 +361,7 @@ _execute_read_spsr: @ r0: current pc .align 2 -.globl execute_spsr_restore -.globl _execute_spsr_restore -execute_spsr_restore: -_execute_spsr_restore: +defsymbl(execute_spsr_restore) save_flags() ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode @@ -425,10 +398,7 @@ _execute_spsr_restore: #define execute_swi_builder(mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_##mode ;\ -.globl _execute_swi_##mode ;\ -execute_swi_##mode: ;\ -_execute_swi_##mode: ;\ +defsymbl(execute_swi_##mode) ;\ save_flags() ;\ ldr r1, =reg_mode /* r1 = reg_mode */;\ /* reg_mode[MODE_SUPERVISOR][6] = pc */;\ @@ -460,10 +430,7 @@ execute_swi_builder(thumb) #define execute_swi_function_builder(swi_function, mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_hle_##swi_function##_##mode ;\ -.globl _execute_swi_hle_##swi_function##_##mode ;\ -execute_swi_hle_##swi_function##_##mode: ;\ -_execute_swi_hle_##swi_function##_##mode: ;\ +defsymbl(execute_swi_hle_##swi_function##_##mode) ;\ save_flags() ;\ store_registers_##mode() ;\ call_c_function(execute_swi_hle_##swi_function##_c) ;\ @@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb) @ Uses sp as reg_base; must hold consistently true. .align 2 -.globl execute_arm_translate -.globl _execute_arm_translate -execute_arm_translate: -_execute_arm_translate: +defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } @@ -615,10 +579,7 @@ ext_store_ignore: #define execute_store_builder(store_type, store_op, load_op) ;\ ;\ .align 2 ;\ -.globl execute_store_u##store_type ;\ -.globl _execute_store_u##store_type ;\ -execute_store_u##store_type: ;\ -_execute_store_u##store_type: ;\ +defsymbl(execute_store_u##store_type) ;\ execute_store_body(store_type, store_op) ;\ ;\ ext_store_u##store_type: ;\ @@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) -.globl execute_store_u32_safe -.globl _execute_store_u32_safe -execute_store_u32_safe: -_execute_store_u32_safe: +defsymbl(execute_store_u32_safe) execute_store_body(32_safe, str) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -822,10 +780,7 @@ lookup_pc_arm: #define execute_load_builder(load_type, load_function, load_op, mask) ;\ ;\ .align 2 ;\ -.globl execute_load_##load_type ;\ -.globl _execute_load_##load_type ;\ -execute_load_##load_type: ;\ -_execute_load_##load_type: ;\ +defsymbl(execute_load_##load_type) ;\ save_flags() ;\ tst r0, mask /* make sure address is in range */;\ bne ext_load_##load_type /* if not do ext load */;\ @@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000) .data -memory_map_read: +defsymbl(memory_map_read) .space 0x8000 -palette_ram: +defsymbl(palette_ram) .space 0x400 -palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -spsr: +defsymbl(spsr) .space 24 -reg_mode: +defsymbl(reg_mode) .space 196 -.globl reg -.globl _reg -reg: +defsymbl(reg) .space 0x100, 0 +@ Vita and 3DS (and of course mmap) map their own cache sections through some +@ platform-speficic mechanisms. +#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS) + +@ Make this section executable! +.text +#ifdef __ANDROID__ +@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros +@ TODO: Revisit this whenever we upgrade to the latest clang NDK +.section .jit,"awx",%progbits +#else +.section .jit,"awx",%nobits +#endif +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + -- cgit v1.2.3 From a494a3f00ee3bd35ee9ab76f8cd4f164da080113 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 21:02:06 +0100 Subject: Move OAM update flag to a register Fix a small bug in MIPS dynarec that affects non -G0 targets --- arm/arm_stub.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f0b7f52..8e6cc9b 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -43,8 +43,8 @@ _##symbol: #define CPU_HALT_STATE (30 * 4) #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) - -#define MAIN_THREAD_SP (33 * 4) +#define OAM_UPDATED (33 * 4) +#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 -- cgit v1.2.3 From 7ea6c5e247a742af6f7acfbf215c23264410451f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 23:01:20 +0100 Subject: Move OAM RAM to stubs also Makes accesses more efficient for MIPS. Make accesses also fast for palette reads. --- arm/arm_stub.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 8e6cc9b..374daba 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -820,6 +820,8 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 +defsymbl(oam_ram) + .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) -- cgit v1.2.3 From d284c868e9e23fb210b8c448cdace39f394cb895 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 26 Mar 2021 13:00:08 +0100 Subject: Improve ARM store accesses --- arm/arm_stub.S | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 374daba..1db913e 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -559,7 +559,7 @@ ptr_tbl_##store_type: ;\ .word ext_store_u##store_type /* 0x04: I/O regs */;\ .word ext_store_u##store_type /* 0x05: palette RAM */;\ .word ext_store_vram_u##store_type /* 0x06: vram */;\ - .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\ .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ @@ -624,6 +624,15 @@ ext_store_vram_u##store_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ ;\ +ext_store_oam_ram_u##store_type: ;\ + mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ + add r2, reg_base, #256 /* r2 = oam ram base */;\ + store_op r1, [r0, r2] /* store data */;\ + str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ 3: ;\ ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ ldr r0, [lr] /* load PC */;\ @@ -671,6 +680,14 @@ ext_store_vram_u32_safe: restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +ext_store_oam_ram_u32_safe: + mask_addr_8(10) @ Mask to mirror memory (no need to align!) + add r2, reg_base, #256 @ r2 = oam ram base + str r1, [r0, r2] @ store data + str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + write_epilogue: cmp r0, #0 @ check if the write rose an alert beq 4f @ if not we can exit @@ -820,8 +837,6 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 -defsymbl(oam_ram) - .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) @@ -829,6 +844,8 @@ defsymbl(reg_mode) defsymbl(reg) .space 0x100, 0 +defsymbl(oam_ram) + .space 0x400 @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. -- cgit v1.2.3 From 452ba76ba898c5fc6d176ae8f8e2d77cf15f64a2 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 26 Mar 2021 13:25:50 +0100 Subject: Fix 16 bit RAM stores (VRAM and OAM) in ARM --- arm/arm_stub.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arm/arm_stub.S') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 1db913e..5917e82 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -538,7 +538,7 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type, store_op) ;\ +#define execute_store_body(store_type) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ @@ -576,11 +576,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type, store_op) ;\ + execute_store_body(store_type) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -619,7 +619,7 @@ ext_store_vram_u##store_type: ;\ cmp r0, #0x18000 /* Check if exceeds 96KB */;\ subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ ldr r2, =(vram) /* r2 = vram base */;\ - store_op r1, [r0, r2] /* store data */;\ + store_op16 r1, [r0, r2] /* store data */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ @@ -627,7 +627,7 @@ ext_store_vram_u##store_type: ;\ ext_store_oam_ram_u##store_type: ;\ mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ add r2, reg_base, #256 /* r2 = oam ram base */;\ - store_op r1, [r0, r2] /* store data */;\ + store_op16 r1, [r0, r2] /* store data */;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ restore_flags() ;\ @@ -640,14 +640,14 @@ ext_store_oam_ram_u##store_type: ;\ b smc_write /* perform smc write */;\ -execute_store_builder(8, strb, ldrb) -execute_store_builder(16, strh, ldrh) -execute_store_builder(32, str, ldr) +execute_store_builder(8, strb, strh, ldrb) +execute_store_builder(16, strh, strh, ldrh) +execute_store_builder(32, str, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe, str) + execute_store_body(32_safe) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return -- cgit v1.2.3