From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- arm/arm_stub.S | 168 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 39 deletions(-) (limited to 'arm') diff --git a/arm/arm_stub.S b/arm/arm_stub.S index e8f7316..f5fceb0 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -4,7 +4,6 @@ .globl invalidate_cache_region .globl memory_map_read -.globl memory_map_write .globl reg .globl palette_ram .globl palette_ram_converted @@ -533,40 +532,84 @@ return_to_main: bx lr +#define store_align_8() ;\ + and r1, r1, #0xff ;\ + +#define store_align_16() ;\ + bic r0, r0, #0x01 ;\ + extract_u16(r1, r1) ;\ + +#define store_align_32() ;\ + bic r0, r0, #0x03 ;\ + +#define mask_addr_8(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits) /* high bits are now clear */;\ + +#define mask_addr_16(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 1) /* high bits are now clear */;\ + mov r0, r0, lsl #1 /* LSB is also zero */;\ + +#define mask_addr_32(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 2) /* high bits are now clear */;\ + mov r0, r0, lsl #2 /* 2 LSB are also zero */;\ + +@ Vram, OAM and palette memories can only be accessed at a 16 bit boundary +#define mask_addr_bus16_32(nbits) mask_addr_32(nbits) +#define mask_addr_bus16_16(nbits) mask_addr_16(nbits) +#define mask_addr_bus16_8(nbits) \ + mask_addr_16(nbits) \ + extract_u16(r1, r1) + + @ Write out to memory. @ Input: @ r0: address @ r1: value @ r2: current pc +@ +@ The instruction at LR is not an inst but a u32 data that contains the PC +@ Used for SMC. That's why return is essentially `pc = lr + 4` #define execute_store_body(store_type, store_op) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ + str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ tst r0, #0xF0000000 /* make sure address is in range */;\ bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr r2, =memory_map_write /* r2 = memory_map_write */;\ - mov lr, r0, lsr #15 /* lr = page index of address */;\ - ldr r2, [r2, lr, lsl #2] /* r2 = memory page */;\ - ;\ - cmp r2, #0 /* see if map is ext */;\ - beq ext_store_u##store_type /* if so do ext store */;\ + ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ + mov r4, r0, lsr #24 /* r4 = region number */;\ + ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ + ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ + bx lr /* jump to handler */;\ ;\ - mov r0, r0, lsl #17 /* isolate bottom 15 bits in top */;\ - mov r0, r0, lsr #17 /* like performing and 0x7FFF */;\ - store_op r1, [r2, r0] /* store result */;\ - - -#define store_align_8() ;\ - and r1, r1, #0xff ;\ - -#define store_align_16() ;\ - bic r0, r0, #0x01 ;\ - extract_u16(r1, r1) ;\ - -#define store_align_32() ;\ - bic r0, r0, #0x03 ;\ +ptr_tbl_##store_type: ;\ + .word ext_store_ignore /* 0x00: BIOS, ignore */;\ + .word ext_store_ignore /* 0x01: ignore */;\ + .word ext_store_ewram_u##store_type /* 0x02: ewram */;\ + .word ext_store_iwram_u##store_type /* 0x03: iwram */;\ + .word ext_store_u##store_type /* 0x04: I/O regs */;\ + .word ext_store_u##store_type /* 0x05: palette RAM */;\ + .word ext_store_vram_u##store_type /* 0x06: vram */;\ + .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0B: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0C: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0D: EEPROM */;\ + .word ext_store_u##store_type /* 0x0E: backup */;\ + .word ext_store_ignore /* 0x0F: ignore */;\ + +@ for ignored areas, just return +ext_store_ignore: + ldr lr, [reg_base, #REG_SAVE3] @ pop lr off of stack + restore_flags() + add pc, lr, #4 @ return #define execute_store_builder(store_type, store_op, load_op) ;\ @@ -577,20 +620,6 @@ return_to_main: execute_store_u##store_type: ;\ _execute_store_u##store_type: ;\ execute_store_body(store_type, store_op) ;\ - sub r2, r2, #0x8000 /* Pointer to code status data */;\ - load_op r0, [r2, r0] /* check code flag */;\ - ;\ - cmp r0, #0 /* see if it's not 0 */;\ - bne 2f /* if so perform smc write */;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - restore_flags() ;\ - add pc, lr, #4 /* return */;\ - ;\ -2: ;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - ldr r0, [lr] /* load PC */;\ - str r0, [reg_base, #REG_PC] /* write out PC */;\ - b smc_write /* perform smc write */;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -599,11 +628,53 @@ ext_store_u##store_type: ;\ store_align_##store_type() ;\ call_c_function(write_memory##store_type) ;\ b write_epilogue /* handle additional write stuff */;\ + ;\ +ext_store_iwram_u##store_type: ;\ + mask_addr_##store_type(15) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(iwram+0x8000) /* r2 = iwram base */;\ + store_op r1, [r0, r2] /* store data */;\ + sub r2, r2, #0x8000 /* r2 = iwram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_ewram_u##store_type: ;\ + mask_addr_##store_type(18) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(ewram) /* r2 = ewram base */;\ + store_op r1, [r0, r2] /* store data */;\ + add r2, r2, #0x40000 /* r2 = ewram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_vram_u##store_type: ;\ + mask_addr_bus16_##store_type(17) /* Mask to mirror memory (+align)*/;\ + cmp r0, #0x18000 /* Check if exceeds 96KB */;\ + subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ + ldr r2, =(vram) /* r2 = vram base */;\ + store_op r1, [r0, r2] /* store data */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +3: ;\ + ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ + ldr r0, [lr] /* load PC */;\ + str r0, [reg_base, #REG_PC] /* write out PC */;\ + b smc_write /* perform smc write */;\ + execute_store_builder(8, strb, ldrb) execute_store_builder(16, strh, ldrh) execute_store_builder(32, str, ldr) +@ This is a store that is executed in a strm case (so no SMC checks in-between) .globl execute_store_u32_safe .globl _execute_store_u32_safe @@ -619,6 +690,28 @@ ext_store_u32_safe: restore_flags() bx lr @ Return +ext_store_iwram_u32_safe: + mask_addr_8(15) @ Mask to mirror memory (no need to align!) + ldr r2, =(iwram+0x8000) @ r2 = iwram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_ewram_u32_safe: + mask_addr_8(18) @ Mask to mirror memory (no need to align!) + ldr r2, =(ewram) @ r2 = ewram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_vram_u32_safe: + mask_addr_8(17) @ Mask to mirror memory (no need to align!) + ldr r2, =(vram) @ r2 = vram base + cmp r0, #0x18000 @ Check if exceeds 96KB + subcs r0, r0, #0x8000 @ Mirror to the last bank + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -756,6 +849,7 @@ ext_load_##load_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.pool execute_load_builder(u8, 8, ldrneb, #0xF0000000) execute_load_builder(s8, 8, ldrnesb, #0xF0000000) @@ -763,14 +857,10 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001) execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001) execute_load_builder(u32, 32, ldrne, #0xF0000000) -.pool - .data memory_map_read: .space 0x8000 -memory_map_write: - .space 0x8000 palette_ram: .space 0x400 palette_ram_converted: -- cgit v1.2.3