From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- x86/x86_emit.h | 16 ++------ x86/x86_stub.S | 126 ++++++++++++++++++++++++++------------------------------- 2 files changed, 61 insertions(+), 81 deletions(-) (limited to 'x86') diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 67a3dc2..68930e1 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -1485,23 +1485,13 @@ u32 function_cc execute_aligned_load32(u32 address) return read_memory32(address); } -void function_cc execute_aligned_store32(u32 address, u32 source) -{ - u8 *map; - - if(!(address & 0xF0000000) && (map = memory_map_write[address >> 15])) - address32(map, address & 0x7FFF) = source; - else - write_memory32(address, source); -} - #define arm_block_memory_load() \ generate_function_call(execute_aligned_load32); \ generate_store_reg(rv, i) \ #define arm_block_memory_store() \ generate_load_reg_pc(a1, i, 8); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define arm_block_memory_final_load() \ arm_block_memory_load() \ @@ -1956,7 +1946,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_extra_push_lr(base_reg) \ generate_add_reg_reg_imm(a0, s0, (bit_count[reg_list] * 4)); \ generate_load_reg(a1, REG_LR); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_load() \ generate_function_call(execute_aligned_load32); \ @@ -1964,7 +1954,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_store() \ generate_load_reg(a1, i); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_final_load() \ thumb_block_memory_load() \ diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 1e338a4..595a789 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -29,7 +29,6 @@ #define _execute_store_cpsr execute_store_cpsr #define _execute_arm_translate execute_arm_translate #define _memory_map_read memory_map_read -#define _memory_map_write memory_map_write #define _reg reg #define _reg_mode reg_mode #define _oam_update oam_update @@ -68,7 +67,6 @@ .global _execute_arm_translate .global _memory_map_read -.global _memory_map_write .global _reg .global _reg_mode .global _spsr @@ -231,6 +229,20 @@ ext_store_eeprom: # 8bit ext memory routines +ext_store_iwram8: + and $0x7FFF, %eax # wrap around address + mov %dl, (_iwram+0x8000)(%eax) # perform store + cmpb $0, _iwram(%eax) # Check SMC mirror + jne smc_write + ret + +ext_store_ewram8: + and $0x3FFFF, %eax # wrap around address + mov %dl, _ewram(%eax) # perform store + cmpb $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret + ext_store_io8: and $0x3FF, %eax # wrap around address and $0xFF, %edx @@ -267,8 +279,8 @@ ext_store_backup: ext_store_u8_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram8 # 0x02 EWRAM + .long ext_store_iwram8 # 0x03 IWRAM .long ext_store_io8 # 0x04 I/O registers .long ext_store_palette8 # 0x05 Palette RAM .long ext_store_vram8 # 0x06 VRAM @@ -281,7 +293,12 @@ ext_store_u8_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_backup # 0x0E Flash ROM/SRAM -ext_store_u8: +# eax: address to write to +# edx: value to write +# ecx: current pc + +_execute_store_u8: + mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -290,46 +307,21 @@ ext_store_u8: mov ext_store_u8_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -# eax: address to write to -# edx: value to write -# ecx: current pc +# 16bit ext memory routines -_execute_store_u8: - mov %ecx, REG_PC(%ebx) # write out the PC - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u8 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u8 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dl, (%eax, %ecx) # store the value - # check for self-modifying code - testb $0xFF, -32768(%eax, %ecx) +ext_store_iwram16: + and $0x7FFF, %eax # wrap around address + mov %dx, (_iwram+0x8000)(%eax) # perform store + cmpw $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return + ret -_execute_store_u16: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x01, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u16 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u16 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dx, (%eax, %ecx) # store the value - # check for self-modifying code - testw $0xFFFF, -32768(%eax, %ecx) +ext_store_ewram16: + and $0x3FFFF, %eax # wrap around address + mov %dx, _ewram(%eax) # perform store + cmpw $0, (_ewram+0x40000)(%eax) # Check SMC mirror jne smc_write - ret # return - -# 16bit ext memory routines + ret ext_store_io16: and $0x3FF, %eax # wrap around address @@ -377,8 +369,8 @@ ext_store_rtc: ext_store_u16_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram16 # 0x02 EWRAM + .long ext_store_iwram16 # 0x03 IWRAM .long ext_store_io16 # 0x04 I/O registers .long ext_store_palette16 # 0x05 Palette RAM .long ext_store_vram16 # 0x06 VRAM @@ -391,7 +383,9 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u16: +_execute_store_u16: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -400,25 +394,21 @@ ext_store_u16: mov ext_store_u16_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -_execute_store_u32: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x03, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u32 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u32 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %edx, (%eax, %ecx) # store the value - # check for self-modifying code - testl $0xFFFFFFFF, -32768(%eax, %ecx) +# 32bit ext memory routines + +ext_store_iwram32: + and $0x7FFF, %eax # wrap around address + mov %edx, (_iwram+0x8000)(%eax) # perform store + cmpl $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return it + ret -# 32bit ext memory routines +ext_store_ewram32: + and $0x3FFFF, %eax # wrap around address + mov %edx, _ewram(%eax) # perform store + cmpl $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret ext_store_io32: and $0x3FF, %eax # wrap around address @@ -451,8 +441,8 @@ ext_store_oam32: ext_store_u32_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram32 # 0x02 EWRAM + .long ext_store_iwram32 # 0x03 IWRAM .long ext_store_io32 # 0x04 I/O registers .long ext_store_palette32 # 0x05 Palette RAM .long ext_store_vram32 # 0x06 VRAM @@ -466,7 +456,9 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u32: +_execute_store_u32: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -507,8 +499,8 @@ smc_write: call _flush_translation_cache_ram lookup_pc: - add $4, %esp - movl $0, CHANGED_PC_STATUS(%ebx) + add $4, %esp # Can't return, discard addr + movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it mov REG_PC(%ebx), %eax testl $0x20, REG_CPSR(%ebx) jz lookup_pc_arm @@ -577,7 +569,5 @@ _reg_mode: _memory_map_read: .space 0x8000 -_memory_map_write: - .space 0x8000 -- cgit v1.2.3