diff options
author | David Guillen Fandos | 2021-03-23 19:05:35 +0100 |
---|---|---|
committer | David Guillen Fandos | 2021-03-23 19:09:56 +0100 |
commit | 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 (patch) | |
tree | 0af3ed99246d3bdb2d2b22f1420bddf2fafba507 | |
parent | 7e27010a3c08811e4ed04097e1961009c3fef8d7 (diff) | |
download | picogpsp-11ec213c99d5d22905ff82cf3fb26ba6a8adf290.tar.gz picogpsp-11ec213c99d5d22905ff82cf3fb26ba6a8adf290.tar.bz2 picogpsp-11ec213c99d5d22905ff82cf3fb26ba6a8adf290.zip |
Make ewram memory lineal
This saves a few cycles in MIPS and simplifies a bit the core.
Removed the write map, only affects interpreter performance very
minimally. Rewired ARM and x86 handlers to support direct access to
I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly
better but code is cleaner and allows for further improvements in the
dynarecs.
-rw-r--r-- | arm/arm_stub.S | 168 | ||||
-rw-r--r-- | cpu.c | 30 | ||||
-rw-r--r-- | cpu.h | 4 | ||||
-rw-r--r-- | cpu_threaded.c | 60 | ||||
-rw-r--r-- | gba_memory.c | 121 | ||||
-rw-r--r-- | gba_memory.h | 1 | ||||
-rw-r--r-- | libretro.c | 11 | ||||
-rw-r--r-- | psp/mips_emit.h | 80 | ||||
-rw-r--r-- | psp/mips_stub.S | 4 | ||||
-rw-r--r-- | x86/x86_emit.h | 16 | ||||
-rw-r--r-- | x86/x86_stub.S | 126 |
11 files changed, 262 insertions, 359 deletions
diff --git a/arm/arm_stub.S b/arm/arm_stub.S index e8f7316..f5fceb0 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -4,7 +4,6 @@ .globl invalidate_cache_region .globl memory_map_read -.globl memory_map_write .globl reg .globl palette_ram .globl palette_ram_converted @@ -533,40 +532,84 @@ return_to_main: bx lr +#define store_align_8() ;\ + and r1, r1, #0xff ;\ + +#define store_align_16() ;\ + bic r0, r0, #0x01 ;\ + extract_u16(r1, r1) ;\ + +#define store_align_32() ;\ + bic r0, r0, #0x03 ;\ + +#define mask_addr_8(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits) /* high bits are now clear */;\ + +#define mask_addr_16(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 1) /* high bits are now clear */;\ + mov r0, r0, lsl #1 /* LSB is also zero */;\ + +#define mask_addr_32(nbits) ;\ + mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\ + mov r0, r0, lsr #(32 - nbits + 2) /* high bits are now clear */;\ + mov r0, r0, lsl #2 /* 2 LSB are also zero */;\ + +@ Vram, OAM and palette memories can only be accessed at a 16 bit boundary +#define mask_addr_bus16_32(nbits) mask_addr_32(nbits) +#define mask_addr_bus16_16(nbits) mask_addr_16(nbits) +#define mask_addr_bus16_8(nbits) \ + mask_addr_16(nbits) \ + extract_u16(r1, r1) + + @ Write out to memory. @ Input: @ r0: address @ r1: value @ r2: current pc +@ +@ The instruction at LR is not an inst but a u32 data that contains the PC +@ Used for SMC. That's why return is essentially `pc = lr + 4` #define execute_store_body(store_type, store_op) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ + str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ tst r0, #0xF0000000 /* make sure address is in range */;\ bne ext_store_u##store_type /* if not do ext store */;\ ;\ - ldr r2, =memory_map_write /* r2 = memory_map_write */;\ - mov lr, r0, lsr #15 /* lr = page index of address */;\ - ldr r2, [r2, lr, lsl #2] /* r2 = memory page */;\ - ;\ - cmp r2, #0 /* see if map is ext */;\ - beq ext_store_u##store_type /* if so do ext store */;\ + ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\ + mov r4, r0, lsr #24 /* r4 = region number */;\ + ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\ + ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\ + bx lr /* jump to handler */;\ ;\ - mov r0, r0, lsl #17 /* isolate bottom 15 bits in top */;\ - mov r0, r0, lsr #17 /* like performing and 0x7FFF */;\ - store_op r1, [r2, r0] /* store result */;\ - - -#define store_align_8() ;\ - and r1, r1, #0xff ;\ - -#define store_align_16() ;\ - bic r0, r0, #0x01 ;\ - extract_u16(r1, r1) ;\ - -#define store_align_32() ;\ - bic r0, r0, #0x03 ;\ +ptr_tbl_##store_type: ;\ + .word ext_store_ignore /* 0x00: BIOS, ignore */;\ + .word ext_store_ignore /* 0x01: ignore */;\ + .word ext_store_ewram_u##store_type /* 0x02: ewram */;\ + .word ext_store_iwram_u##store_type /* 0x03: iwram */;\ + .word ext_store_u##store_type /* 0x04: I/O regs */;\ + .word ext_store_u##store_type /* 0x05: palette RAM */;\ + .word ext_store_vram_u##store_type /* 0x06: vram */;\ + .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0B: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0C: gamepak: ignore */;\ + .word ext_store_u##store_type /* 0x0D: EEPROM */;\ + .word ext_store_u##store_type /* 0x0E: backup */;\ + .word ext_store_ignore /* 0x0F: ignore */;\ + +@ for ignored areas, just return +ext_store_ignore: + ldr lr, [reg_base, #REG_SAVE3] @ pop lr off of stack + restore_flags() + add pc, lr, #4 @ return #define execute_store_builder(store_type, store_op, load_op) ;\ @@ -577,20 +620,6 @@ return_to_main: execute_store_u##store_type: ;\ _execute_store_u##store_type: ;\ execute_store_body(store_type, store_op) ;\ - sub r2, r2, #0x8000 /* Pointer to code status data */;\ - load_op r0, [r2, r0] /* check code flag */;\ - ;\ - cmp r0, #0 /* see if it's not 0 */;\ - bne 2f /* if so perform smc write */;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - restore_flags() ;\ - add pc, lr, #4 /* return */;\ - ;\ -2: ;\ - ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ - ldr r0, [lr] /* load PC */;\ - str r0, [reg_base, #REG_PC] /* write out PC */;\ - b smc_write /* perform smc write */;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -599,11 +628,53 @@ ext_store_u##store_type: ;\ store_align_##store_type() ;\ call_c_function(write_memory##store_type) ;\ b write_epilogue /* handle additional write stuff */;\ + ;\ +ext_store_iwram_u##store_type: ;\ + mask_addr_##store_type(15) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(iwram+0x8000) /* r2 = iwram base */;\ + store_op r1, [r0, r2] /* store data */;\ + sub r2, r2, #0x8000 /* r2 = iwram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_ewram_u##store_type: ;\ + mask_addr_##store_type(18) /* Mask to mirror memory (+align)*/;\ + ldr r2, =(ewram) /* r2 = ewram base */;\ + store_op r1, [r0, r2] /* store data */;\ + add r2, r2, #0x40000 /* r2 = ewram smc base */;\ + load_op r1, [r0, r2] /* r1 = SMC sentinel */;\ + cmp r1, #0 /* see if it's not 0 */;\ + bne 3f /* if so perform smc write */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_vram_u##store_type: ;\ + mask_addr_bus16_##store_type(17) /* Mask to mirror memory (+align)*/;\ + cmp r0, #0x18000 /* Check if exceeds 96KB */;\ + subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ + ldr r2, =(vram) /* r2 = vram base */;\ + store_op r1, [r0, r2] /* store data */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +3: ;\ + ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\ + ldr r0, [lr] /* load PC */;\ + str r0, [reg_base, #REG_PC] /* write out PC */;\ + b smc_write /* perform smc write */;\ + execute_store_builder(8, strb, ldrb) execute_store_builder(16, strh, ldrh) execute_store_builder(32, str, ldr) +@ This is a store that is executed in a strm case (so no SMC checks in-between) .globl execute_store_u32_safe .globl _execute_store_u32_safe @@ -619,6 +690,28 @@ ext_store_u32_safe: restore_flags() bx lr @ Return +ext_store_iwram_u32_safe: + mask_addr_8(15) @ Mask to mirror memory (no need to align!) + ldr r2, =(iwram+0x8000) @ r2 = iwram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_ewram_u32_safe: + mask_addr_8(18) @ Mask to mirror memory (no need to align!) + ldr r2, =(ewram) @ r2 = ewram base + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + +ext_store_vram_u32_safe: + mask_addr_8(17) @ Mask to mirror memory (no need to align!) + ldr r2, =(vram) @ r2 = vram base + cmp r0, #0x18000 @ Check if exceeds 96KB + subcs r0, r0, #0x8000 @ Mirror to the last bank + str r1, [r0, r2] @ store data + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return write_epilogue: cmp r0, #0 @ check if the write rose an alert @@ -756,6 +849,7 @@ ext_load_##load_type: ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ +.pool execute_load_builder(u8, 8, ldrneb, #0xF0000000) execute_load_builder(s8, 8, ldrnesb, #0xF0000000) @@ -763,14 +857,10 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001) execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001) execute_load_builder(u32, 32, ldrne, #0xF0000000) -.pool - .data memory_map_read: .space 0x8000 -memory_map_write: - .space 0x8000 palette_ram: .space 0x400 palette_ram_converted: @@ -1003,7 +1003,6 @@ const u32 psr_masks[16] = #define fast_write_memory(size, type, address, value) \ { \ - u8 *map; \ u32 _address = (address) & ~(aligned_address_mask##size & 0x03); \ if(_address < 0x10000000) \ { \ @@ -1011,17 +1010,9 @@ const u32 psr_masks[16] = memory_writes_##type++; \ } \ \ - if(((_address & aligned_address_mask##size) == 0) && \ - (map = memory_map_write[_address >> 15])) \ - { \ - *((type *)((u8 *)map + (_address & 0x7FFF))) = value; \ - } \ - else \ - { \ - cpu_alert = write_memory##size(_address, value); \ - if(cpu_alert) \ - goto alert; \ - } \ + cpu_alert = write_memory##size(_address, value); \ + if(cpu_alert) \ + goto alert; \ } \ #define load_aligned32(address, dest) \ @@ -1046,22 +1037,14 @@ const u32 psr_masks[16] = #define store_aligned32(address, value) \ { \ u32 _address = address; \ - u8 *map = memory_map_write[_address >> 15]; \ if(_address < 0x10000000) \ { \ memory_region_access_write_u32[_address >> 24]++; \ memory_writes_u32++; \ } \ - if(map) \ - { \ - address32(map, _address & 0x7FFF) = value; \ - } \ - else \ - { \ - cpu_alert = write_memory32(_address, value); \ - if(cpu_alert) \ - goto alert; \ - } \ + cpu_alert = write_memory32(_address, value); \ + if(cpu_alert) \ + goto alert; \ } \ #define load_memory_u8(address, dest) \ @@ -1647,7 +1630,6 @@ void raise_interrupt(irq_type irq_raised) #ifndef HAVE_DYNAREC u8 *memory_map_read [8 * 1024]; -u8 *memory_map_write[8 * 1024]; u16 palette_ram[512]; u16 palette_ram_converted[512]; #endif @@ -155,11 +155,7 @@ extern u8 *ram_translation_ptr; #define MAX_TRANSLATION_GATES 8 extern u32 idle_loop_target_pc; -extern u32 force_pc_update_target; extern u32 iwram_stack_optimize; -extern u32 allow_smc_ram_u8; -extern u32 allow_smc_ram_u16; -extern u32 allow_smc_ram_u32; extern u32 direct_map_vram; extern u32 translation_gate_targets; extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; diff --git a/cpu_threaded.c b/cpu_threaded.c index 519d7f0..555b9c6 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -76,12 +76,6 @@ u32 ewram_code_max = 0xFFFFFFFF; u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; -// Default -u32 force_pc_update_target = 0xFFFFFFFF; -u32 allow_smc_ram_u8 = 1; -u32 allow_smc_ram_u16 = 1; -u32 allow_smc_ram_u32 = 1; - typedef struct { u8 *block_offset; @@ -2813,7 +2807,7 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \ switch(pc >> 24) \ { \ case 0x2: \ - location = (u16 *)(ewram + (pc & 0x7FFF) + ((pc & 0x38000) * 2)); \ + location = (u16 *)(ewram + (pc & 0x3FFFF) + 0x40000); \ block_lookup_translate(type, ram, 1); \ break; \ \ @@ -3119,18 +3113,22 @@ block_lookup_address_builder(dual); block_data_type block_data[MAX_BLOCK_SIZE]; block_exit_type block_exits[MAX_EXITS]; -#define smc_write_arm_yes() \ - if(address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \ +#define smc_write_arm_yes() { \ + int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + if(address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ - address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = \ + address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) = \ 0xFFFFFFFF; \ } \ +} -#define smc_write_thumb_yes() \ - if(address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \ +#define smc_write_thumb_yes() { \ + int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + if(address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ - address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = 0xFFFF; \ + address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) = 0xFFFF; \ } \ +} #define smc_write_arm_no() \ @@ -3428,7 +3426,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type u32 opcode = 0; u32 last_opcode; u32 condition; - u32 last_condition; u32 pc_region = (pc >> 15); u32 new_pc_region; u8 *pc_address_block = memory_map_read[pc_region]; @@ -3514,8 +3511,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type block_exit_position = 0; block_data_position = 0; - last_condition = 0x0E; - while(pc != block_end_pc) { block_data[block_data_position].block_offset = translation_ptr; @@ -3627,13 +3622,16 @@ s32 translate_block_thumb(u32 pc, translation_region_type void flush_translation_cache_ram(void) { flush_ram_count++; -/* printf("ram flush %d (pc %x), %x to %x, %x to %x\n", + /*printf("ram flush %d (pc %x), %x to %x, %x to %x\n", flush_ram_count, reg[REG_PC], iwram_code_min, iwram_code_max, - ewram_code_min, ewram_code_max); */ + ewram_code_min, ewram_code_max);*/ last_ram_translation_ptr = ram_translation_cache; ram_translation_ptr = ram_translation_cache; ram_block_tag_top = 0x0101; + + // Proceed to clean the SMC area if needed + // (also try to memset as little as possible for performance) if(iwram_code_min != 0xFFFFFFFF) { iwram_code_min &= 0x7FFF; @@ -3643,33 +3641,9 @@ void flush_translation_cache_ram(void) if(ewram_code_min != 0xFFFFFFFF) { - u32 ewram_code_min_page; - u32 ewram_code_max_page; - u32 ewram_code_min_offset; - u32 ewram_code_max_offset; - u32 i; - ewram_code_min &= 0x3FFFF; ewram_code_max &= 0x3FFFF; - - ewram_code_min_page = ewram_code_min >> 15; - ewram_code_max_page = ewram_code_max >> 15; - ewram_code_min_offset = ewram_code_min & 0x7FFF; - ewram_code_max_offset = ewram_code_max & 0x7FFF; - - if(ewram_code_min_page == ewram_code_max_page) - { - memset(ewram + (ewram_code_min_page * 0x10000) + - ewram_code_min_offset, 0, - ewram_code_max_offset - ewram_code_min_offset); - } - else - { - for(i = ewram_code_min_page + 1; i < ewram_code_max_page; i++) - memset(ewram + (i * 0x10000), 0, 0x8000); - - memset(ewram, 0, ewram_code_max_offset); - } + memset(&ewram[0x40000 + ewram_code_min], 0, ewram_code_max - ewram_code_min); } iwram_code_min = 0xFFFFFFFF; diff --git a/gba_memory.c b/gba_memory.c index 948bcc5..a51f183 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -596,8 +596,7 @@ u32 function_cc read_eeprom(void) \ case 0x02: \ /* external work RAM */ \ - address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \ - value = address##type(ewram, address); \ + value = address##type(ewram, (address & 0x3FFFF)); \ break; \ \ case 0x03: \ @@ -1907,8 +1906,7 @@ void function_cc write_rtc(u32 address, u32 value) { \ case 0x02: \ /* external work RAM */ \ - address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \ - address##type(ewram, address) = value; \ + address##type(ewram, (address & 0x3FFFF)) = value; \ break; \ \ case 0x03: \ @@ -2454,7 +2452,7 @@ s32 load_bios(char *name) // DMA memory regions can be one of the following: // IWRAM - 32kb offset from the contiguous iwram region. -// EWRAM - like segmented but with self modifying code check. +// EWRAM - also contiguous but with self modifying code check mirror. // VRAM - 96kb offset from the contiguous vram region, should take care // Palette RAM - Converts palette entries when written to. // OAM RAM - Sets OAM modified flag to true. @@ -2527,11 +2525,8 @@ dma_region_type dma_region_map[16] = #define dma_vars_iwram(type) \ dma_smc_vars_##type() \ -#define dma_vars_vram(type) \ - -#define dma_vars_palette_ram(type) \ - -#define dma_oam_ram_src() \ +#define dma_vars_ewram(type) \ + dma_smc_vars_##type() #define dma_oam_ram_dest() \ oam_update = 1 \ @@ -2539,14 +2534,17 @@ dma_region_type dma_region_map[16] = #define dma_vars_oam_ram(type) \ dma_oam_ram_##type() \ -#define dma_vars_io(type) \ +#define dma_vars_io(type) +#define dma_vars_vram(type) +#define dma_vars_palette_ram(type) +#define dma_vars_bios(type) +#define dma_vars_ext(type) + +#define dma_oam_ram_src() #define dma_segmented_load_src() \ memory_map_read[src_current_region] \ -#define dma_segmented_load_dest() \ - memory_map_write[dest_current_region] \ - #define dma_vars_gamepak(type) \ u32 type##_new_region; \ u32 type##_current_region = type##_ptr >> 15; \ @@ -2558,24 +2556,6 @@ dma_region_type dma_region_map[16] = type##_address_block = load_gamepak_page(type##_current_region & 0x3FF); \ } \ -#define dma_vars_ewram(type) \ - dma_smc_vars_##type(); \ - u32 type##_new_region; \ - u32 type##_current_region = type##_ptr >> 15; \ - u8 *type##_address_block = dma_segmented_load_##type() \ - -#define dma_vars_bios(type) \ - -#define dma_vars_ext(type) \ - -#define dma_ewram_check_region(type) \ - type##_new_region = (type##_ptr >> 15); \ - if(type##_new_region != type##_current_region) \ - { \ - type##_current_region = type##_new_region; \ - type##_address_block = dma_segmented_load_##type(); \ - } \ - #define dma_gamepak_check_region(type) \ type##_new_region = (type##_ptr >> 15); \ if(type##_new_region != type##_current_region) \ @@ -2605,9 +2585,7 @@ dma_region_type dma_region_map[16] = read_value = address##transfer_size(palette_ram, type##_ptr & 0x3FF) \ #define dma_read_ewram(type, transfer_size) \ - dma_ewram_check_region(type); \ - read_value = address##transfer_size(type##_address_block, \ - type##_ptr & 0x7FFF) \ + read_value = address##transfer_size(ewram, type##_ptr & 0x3FFFF) \ #define dma_read_gamepak(type, transfer_size) \ dma_gamepak_check_region(type); \ @@ -2642,12 +2620,9 @@ dma_region_type dma_region_map[16] = write_memory##transfer_size(type##_ptr, read_value) \ #define dma_write_ewram(type, transfer_size) \ - dma_ewram_check_region(type); \ - \ - address##transfer_size(type##_address_block, type##_ptr & 0x7FFF) = \ - read_value; \ - smc_trigger |= address##transfer_size(type##_address_block, \ - (type##_ptr & 0x7FFF) - 0x8000) \ + address##transfer_size(ewram, type##_ptr & 0x3FFFF) = read_value; \ + smc_trigger |= address##transfer_size(ewram, \ + (type##_ptr & 0x3FFFF) + 0x40000) \ #define dma_epilogue_iwram() \ if(smc_trigger) \ @@ -3105,14 +3080,6 @@ cpu_alert_type dma_transfer(dma_transfer_type *dma) map_offset++) \ memory_map_##type[map_offset] = NULL; \ -#define map_ram_region(type, start, end, mirror_blocks, region) \ - for(map_offset = (start) / 0x8000; map_offset < \ - ((end) / 0x8000); map_offset++) \ - { \ - memory_map_##type[map_offset] = \ - ((u8 *)region) + ((map_offset % mirror_blocks) * 0x10000) + 0x8000; \ - } \ - #define map_vram(type) \ for(map_offset = 0x6000000 / 0x8000; map_offset < (0x7000000 / 0x8000); \ map_offset += 4) \ @@ -3274,8 +3241,8 @@ void init_memory(void) // Fill memory map regions, areas marked as NULL must be checked directly map_region(read, 0x0000000, 0x1000000, 1, bios_rom); map_null(read, 0x1000000, 0x2000000); - map_ram_region(read, 0x2000000, 0x3000000, 8, ewram); - map_ram_region(read, 0x3000000, 0x4000000, 1, iwram); + map_region(read, 0x2000000, 0x3000000, 8, ewram); + map_region(read, 0x3000000, 0x4000000, 1, &iwram[0x8000]); map_region(read, 0x4000000, 0x5000000, 1, io_registers); map_null(read, 0x5000000, 0x6000000); map_null(read, 0x6000000, 0x7000000); @@ -3284,45 +3251,12 @@ void init_memory(void) init_memory_gamepak(); map_null(read, 0xE000000, 0x10000000); - // Fill memory map regions, areas marked as NULL must be checked directly - map_null(write, 0x0000000, 0x2000000); - map_ram_region(write, 0x2000000, 0x3000000, 8, ewram); - map_ram_region(write, 0x3000000, 0x4000000, 1, iwram); - map_null(write, 0x4000000, 0x5000000); - map_null(write, 0x5000000, 0x6000000); - - // The problem here is that the current method of handling self-modifying code - // requires writeable memory to be proceeded by 32KB SMC data areas or be - // indirectly writeable. It's possible to get around this if you turn off the SMC - // check altogether, but this will make a good number of ROMs crash (perhaps most - // of the ones that actually need it? This has yet to be determined). - - // This is because VRAM cannot be efficiently made incontiguous, and still allow - // the renderer to work as efficiently. It would, at the very least, require a - // lot of hacking of the renderer which I'm not prepared to do. - // TODO(davidgfnet): add SMC VRAM detection - - // However, it IS possible to directly map the first page no matter what because - // there's 32kb of blank stuff sitting beneath it. - if(direct_map_vram) - { - map_vram(write); - } - else - { - map_null(write, 0x6000000, 0x7000000); - } - - map_null(write, 0x7000000, 0x8000000); - map_null(write, 0x8000000, 0xE000000); - map_null(write, 0xE000000, 0x10000000); - - memset(io_registers, 0, 0x8000); - memset(oam_ram, 0, 0x400); - memset(palette_ram, 0, 0x400); - memset(iwram, 0, 0x10000); - memset(ewram, 0, 0x80000); - memset(vram, 0, 0x18000); + memset(io_registers, 0, sizeof(io_registers)); + memset(oam_ram, 0, sizeof(oam_ram)); + memset(palette_ram, 0, sizeof(palette_ram)); + memset(iwram, 0, sizeof(iwram)); + memset(ewram, 0, sizeof(ewram)); + memset(vram, 0, sizeof(vram)); io_registers[REG_DISPCNT] = 0x80; io_registers[REG_P1] = 0x3FF; @@ -3426,8 +3360,6 @@ void gba_save_state(void* dst) #define memory_savestate_builder(type) \ void memory_##type##_savestate(void) \ { \ - u32 i; \ - \ state_mem_##type##_variable(backup_type); \ state_mem_##type##_variable(sram_size); \ state_mem_##type##_variable(flash_mode); \ @@ -3453,10 +3385,7 @@ void memory_##type##_savestate(void) \ state_mem_##type##_array(dma); \ \ state_mem_##type(iwram + 0x8000, 0x8000); \ - for(i = 0; i < 8; i++) \ - { \ - state_mem_##type(ewram + (i * 0x10000) + 0x8000, 0x8000); \ - } \ + state_mem_##type(ewram, 0x40000); \ state_mem_##type(vram, 0x18000); \ state_mem_##type(oam_ram, 0x400); \ state_mem_##type(palette_ram, 0x400); \ diff --git a/gba_memory.h b/gba_memory.h index eaa5bab..14c8394 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -214,7 +214,6 @@ extern u8 ewram[1024 * 256 * 2]; extern u8 iwram[1024 * 32 * 2]; extern u8 *memory_map_read[8 * 1024]; -extern u8 *memory_map_write[8 * 1024]; extern u32 reg[64]; @@ -809,15 +809,8 @@ static void set_memory_descriptors(void) { const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM; struct retro_memory_descriptor desc[9] = { - { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x00000 + 0x8000, 0x2000000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x10000 + 0x8000, 0x2008000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x20000 + 0x8000, 0x2010000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x30000 + 0x8000, 0x2018000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x40000 + 0x8000, 0x2020000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x50000 + 0x8000, 0x2028000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x60000 + 0x8000, 0x2030000, 0, 0, 0x8000, NULL }, - { mem, ewram, 0x70000 + 0x8000, 0x2038000, 0, 0, 0x8000, NULL } + { mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL }, + { mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL }, }; struct retro_memory_map retromap = { desc, diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 8d1d8d8..b75f7f5 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -1010,47 +1010,10 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ { \ u32 _address = (u32)(address); \ u32 _address_hi = (_address + 0x8000) >> 16; \ - generate_load_imm(ireg, address); \ mips_emit_lui(ireg, _address_hi >> 16) \ generate_load_memory_##type(ireg, _address - (_address_hi << 16)); \ } \ -#define generate_known_address_load_builder(type) \ - u32 generate_known_address_load_##type(u32 rd, u32 address) \ - { \ - switch(address >> 24) \ - { \ - /* Read from the BIOS ROM, can be converted to an immediate load. \ - Only really possible to do this from the BIOS but should be okay \ - to allow it everywhere */ \ - case 0x00: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - /* Read from RAM, can be converted to a load */ \ - case 0x02: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)ewram + \ - (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000); \ - return 1; \ - \ - case 0x03: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)iwram + \ - (address & 0x7FFF) + 0x8000); \ - return 1; \ - \ - /* Read from gamepak ROM, this has to be an immediate load because \ - it might not actually be in memory anymore when we get to it. */ \ - case 0x08: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - default: \ - return 0; \ - } \ - } \ - #define generate_block_extra_vars() \ u32 stored_pc = pc; \ u8 *update_trampoline \ @@ -1060,12 +1023,6 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ generate_load_rm_sh_builder(flags); \ generate_load_rm_sh_builder(no_flags); \ \ -/* generate_known_address_load_builder(u8); \ - generate_known_address_load_builder(u16); \ - generate_known_address_load_builder(u32); \ - generate_known_address_load_builder(s8); \ - generate_known_address_load_builder(s16); */ \ - \ u32 generate_load_offset_sh(u32 rm) \ { \ switch((opcode >> 5) & 0x03) \ @@ -2787,12 +2744,13 @@ static void emit_pmemld_stub( mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && alignment != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2862,12 +2820,13 @@ static void emit_pmemst_stub( } if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && realsize != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2888,7 +2847,12 @@ static void emit_pmemst_stub( // Generate SMC write and tracking // TODO: Should we have SMC checks here also for aligned? if (meminfo->check_smc && !aligned) { - mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + if (region == 2) { + mips_emit_lui(reg_temp, 0x40000 >> 16); + mips_emit_addu(reg_temp, reg_rv, reg_temp); // SMC lives after the ewram + } else { + mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + } if (realsize == 2) { mips_emit_lw(reg_temp, reg_temp, base_addr); } else if (realsize == 1) { @@ -3272,8 +3236,8 @@ void init_emitter() { const t_stub_meminfo ldinfo [] = { { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case @@ -3308,8 +3272,8 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // I/O is special and mapped with a function call { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 2d40bf8..5e5a479 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -44,7 +44,6 @@ .global write_io_epilogue .global memory_map_read -.global memory_map_write .global tmemld .global tmemst .global tmemst @@ -609,9 +608,6 @@ execute_arm_translate_internal: .data .align 6 -memory_map_write: - .space 0x8000 - memory_map_read: .space 0x8000 diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 67a3dc2..68930e1 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -1485,23 +1485,13 @@ u32 function_cc execute_aligned_load32(u32 address) return read_memory32(address); } -void function_cc execute_aligned_store32(u32 address, u32 source) -{ - u8 *map; - - if(!(address & 0xF0000000) && (map = memory_map_write[address >> 15])) - address32(map, address & 0x7FFF) = source; - else - write_memory32(address, source); -} - #define arm_block_memory_load() \ generate_function_call(execute_aligned_load32); \ generate_store_reg(rv, i) \ #define arm_block_memory_store() \ generate_load_reg_pc(a1, i, 8); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define arm_block_memory_final_load() \ arm_block_memory_load() \ @@ -1956,7 +1946,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_extra_push_lr(base_reg) \ generate_add_reg_reg_imm(a0, s0, (bit_count[reg_list] * 4)); \ generate_load_reg(a1, REG_LR); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_load() \ generate_function_call(execute_aligned_load32); \ @@ -1964,7 +1954,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_store() \ generate_load_reg(a1, i); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_final_load() \ thumb_block_memory_load() \ diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 1e338a4..595a789 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -29,7 +29,6 @@ #define _execute_store_cpsr execute_store_cpsr #define _execute_arm_translate execute_arm_translate #define _memory_map_read memory_map_read -#define _memory_map_write memory_map_write #define _reg reg #define _reg_mode reg_mode #define _oam_update oam_update @@ -68,7 +67,6 @@ .global _execute_arm_translate .global _memory_map_read -.global _memory_map_write .global _reg .global _reg_mode .global _spsr @@ -231,6 +229,20 @@ ext_store_eeprom: # 8bit ext memory routines +ext_store_iwram8: + and $0x7FFF, %eax # wrap around address + mov %dl, (_iwram+0x8000)(%eax) # perform store + cmpb $0, _iwram(%eax) # Check SMC mirror + jne smc_write + ret + +ext_store_ewram8: + and $0x3FFFF, %eax # wrap around address + mov %dl, _ewram(%eax) # perform store + cmpb $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret + ext_store_io8: and $0x3FF, %eax # wrap around address and $0xFF, %edx @@ -267,8 +279,8 @@ ext_store_backup: ext_store_u8_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram8 # 0x02 EWRAM + .long ext_store_iwram8 # 0x03 IWRAM .long ext_store_io8 # 0x04 I/O registers .long ext_store_palette8 # 0x05 Palette RAM .long ext_store_vram8 # 0x06 VRAM @@ -281,7 +293,12 @@ ext_store_u8_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_backup # 0x0E Flash ROM/SRAM -ext_store_u8: +# eax: address to write to +# edx: value to write +# ecx: current pc + +_execute_store_u8: + mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -290,46 +307,21 @@ ext_store_u8: mov ext_store_u8_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -# eax: address to write to -# edx: value to write -# ecx: current pc +# 16bit ext memory routines -_execute_store_u8: - mov %ecx, REG_PC(%ebx) # write out the PC - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u8 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u8 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dl, (%eax, %ecx) # store the value - # check for self-modifying code - testb $0xFF, -32768(%eax, %ecx) +ext_store_iwram16: + and $0x7FFF, %eax # wrap around address + mov %dx, (_iwram+0x8000)(%eax) # perform store + cmpw $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return + ret -_execute_store_u16: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x01, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u16 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u16 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dx, (%eax, %ecx) # store the value - # check for self-modifying code - testw $0xFFFF, -32768(%eax, %ecx) +ext_store_ewram16: + and $0x3FFFF, %eax # wrap around address + mov %dx, _ewram(%eax) # perform store + cmpw $0, (_ewram+0x40000)(%eax) # Check SMC mirror jne smc_write - ret # return - -# 16bit ext memory routines + ret ext_store_io16: and $0x3FF, %eax # wrap around address @@ -377,8 +369,8 @@ ext_store_rtc: ext_store_u16_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram16 # 0x02 EWRAM + .long ext_store_iwram16 # 0x03 IWRAM .long ext_store_io16 # 0x04 I/O registers .long ext_store_palette16 # 0x05 Palette RAM .long ext_store_vram16 # 0x06 VRAM @@ -391,7 +383,9 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u16: +_execute_store_u16: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -400,25 +394,21 @@ ext_store_u16: mov ext_store_u16_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -_execute_store_u32: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x03, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u32 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u32 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %edx, (%eax, %ecx) # store the value - # check for self-modifying code - testl $0xFFFFFFFF, -32768(%eax, %ecx) +# 32bit ext memory routines + +ext_store_iwram32: + and $0x7FFF, %eax # wrap around address + mov %edx, (_iwram+0x8000)(%eax) # perform store + cmpl $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return it + ret -# 32bit ext memory routines +ext_store_ewram32: + and $0x3FFFF, %eax # wrap around address + mov %edx, _ewram(%eax) # perform store + cmpl $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret ext_store_io32: and $0x3FF, %eax # wrap around address @@ -451,8 +441,8 @@ ext_store_oam32: ext_store_u32_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram32 # 0x02 EWRAM + .long ext_store_iwram32 # 0x03 IWRAM .long ext_store_io32 # 0x04 I/O registers .long ext_store_palette32 # 0x05 Palette RAM .long ext_store_vram32 # 0x06 VRAM @@ -466,7 +456,9 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u32: +_execute_store_u32: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -507,8 +499,8 @@ smc_write: call _flush_translation_cache_ram lookup_pc: - add $4, %esp - movl $0, CHANGED_PC_STATUS(%ebx) + add $4, %esp # Can't return, discard addr + movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it mov REG_PC(%ebx), %eax testl $0x20, REG_CPSR(%ebx) jz lookup_pc_arm @@ -577,7 +569,5 @@ _reg_mode: _memory_map_read: .space 0x8000 -_memory_map_write: - .space 0x8000 |