From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- x86/x86_emit.h | 16 ++------ x86/x86_stub.S | 126 ++++++++++++++++++++++++++------------------------------- 2 files changed, 61 insertions(+), 81 deletions(-) (limited to 'x86') diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 67a3dc2..68930e1 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -1485,23 +1485,13 @@ u32 function_cc execute_aligned_load32(u32 address) return read_memory32(address); } -void function_cc execute_aligned_store32(u32 address, u32 source) -{ - u8 *map; - - if(!(address & 0xF0000000) && (map = memory_map_write[address >> 15])) - address32(map, address & 0x7FFF) = source; - else - write_memory32(address, source); -} - #define arm_block_memory_load() \ generate_function_call(execute_aligned_load32); \ generate_store_reg(rv, i) \ #define arm_block_memory_store() \ generate_load_reg_pc(a1, i, 8); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define arm_block_memory_final_load() \ arm_block_memory_load() \ @@ -1956,7 +1946,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_extra_push_lr(base_reg) \ generate_add_reg_reg_imm(a0, s0, (bit_count[reg_list] * 4)); \ generate_load_reg(a1, REG_LR); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_load() \ generate_function_call(execute_aligned_load32); \ @@ -1964,7 +1954,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift) #define thumb_block_memory_store() \ generate_load_reg(a1, i); \ - generate_function_call(execute_aligned_store32) \ + generate_function_call(write_memory32) \ #define thumb_block_memory_final_load() \ thumb_block_memory_load() \ diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 1e338a4..595a789 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -29,7 +29,6 @@ #define _execute_store_cpsr execute_store_cpsr #define _execute_arm_translate execute_arm_translate #define _memory_map_read memory_map_read -#define _memory_map_write memory_map_write #define _reg reg #define _reg_mode reg_mode #define _oam_update oam_update @@ -68,7 +67,6 @@ .global _execute_arm_translate .global _memory_map_read -.global _memory_map_write .global _reg .global _reg_mode .global _spsr @@ -231,6 +229,20 @@ ext_store_eeprom: # 8bit ext memory routines +ext_store_iwram8: + and $0x7FFF, %eax # wrap around address + mov %dl, (_iwram+0x8000)(%eax) # perform store + cmpb $0, _iwram(%eax) # Check SMC mirror + jne smc_write + ret + +ext_store_ewram8: + and $0x3FFFF, %eax # wrap around address + mov %dl, _ewram(%eax) # perform store + cmpb $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret + ext_store_io8: and $0x3FF, %eax # wrap around address and $0xFF, %edx @@ -267,8 +279,8 @@ ext_store_backup: ext_store_u8_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram8 # 0x02 EWRAM + .long ext_store_iwram8 # 0x03 IWRAM .long ext_store_io8 # 0x04 I/O registers .long ext_store_palette8 # 0x05 Palette RAM .long ext_store_vram8 # 0x06 VRAM @@ -281,7 +293,12 @@ ext_store_u8_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_backup # 0x0E Flash ROM/SRAM -ext_store_u8: +# eax: address to write to +# edx: value to write +# ecx: current pc + +_execute_store_u8: + mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -290,46 +307,21 @@ ext_store_u8: mov ext_store_u8_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -# eax: address to write to -# edx: value to write -# ecx: current pc +# 16bit ext memory routines -_execute_store_u8: - mov %ecx, REG_PC(%ebx) # write out the PC - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u8 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u8 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dl, (%eax, %ecx) # store the value - # check for self-modifying code - testb $0xFF, -32768(%eax, %ecx) +ext_store_iwram16: + and $0x7FFF, %eax # wrap around address + mov %dx, (_iwram+0x8000)(%eax) # perform store + cmpw $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return + ret -_execute_store_u16: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x01, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u16 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u16 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %dx, (%eax, %ecx) # store the value - # check for self-modifying code - testw $0xFFFF, -32768(%eax, %ecx) +ext_store_ewram16: + and $0x3FFFF, %eax # wrap around address + mov %dx, _ewram(%eax) # perform store + cmpw $0, (_ewram+0x40000)(%eax) # Check SMC mirror jne smc_write - ret # return - -# 16bit ext memory routines + ret ext_store_io16: and $0x3FF, %eax # wrap around address @@ -377,8 +369,8 @@ ext_store_rtc: ext_store_u16_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram16 # 0x02 EWRAM + .long ext_store_iwram16 # 0x03 IWRAM .long ext_store_io16 # 0x04 I/O registers .long ext_store_palette16 # 0x05 Palette RAM .long ext_store_vram16 # 0x06 VRAM @@ -391,7 +383,9 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u16: +_execute_store_u16: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -400,25 +394,21 @@ ext_store_u16: mov ext_store_u16_jtable(, %ecx, 4), %ecx jmp *%ecx # jump to table index -_execute_store_u32: - mov %ecx, REG_PC(%ebx) # write out the PC - and $~0x03, %eax # fix alignment - mov %eax, %ecx # ecx = address - test $0xF0000000, %ecx # check address range - jnz ext_store_u32 # if above perform an extended write - shr $15, %ecx # ecx = page number of address - # load the corresponding memory map offset - mov _memory_map_write(, %ecx, 4), %ecx - test %ecx, %ecx # see if it's NULL - jz ext_store_u32 # if so perform an extended write - and $0x7FFF, %eax # isolate the lower 15bits of the address - mov %edx, (%eax, %ecx) # store the value - # check for self-modifying code - testl $0xFFFFFFFF, -32768(%eax, %ecx) +# 32bit ext memory routines + +ext_store_iwram32: + and $0x7FFF, %eax # wrap around address + mov %edx, (_iwram+0x8000)(%eax) # perform store + cmpl $0, _iwram(%eax) # Check SMC mirror jne smc_write - ret # return it + ret -# 32bit ext memory routines +ext_store_ewram32: + and $0x3FFFF, %eax # wrap around address + mov %edx, _ewram(%eax) # perform store + cmpl $0, (_ewram+0x40000)(%eax) # Check SMC mirror + jne smc_write + ret ext_store_io32: and $0x3FF, %eax # wrap around address @@ -451,8 +441,8 @@ ext_store_oam32: ext_store_u32_jtable: .long ext_store_ignore # 0x00 BIOS, ignore .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_ignore # 0x02 EWRAM, should have been hit already - .long ext_store_ignore # 0x03 IWRAM, should have been hit already + .long ext_store_ewram32 # 0x02 EWRAM + .long ext_store_iwram32 # 0x03 IWRAM .long ext_store_io32 # 0x04 I/O registers .long ext_store_palette32 # 0x05 Palette RAM .long ext_store_vram32 # 0x06 VRAM @@ -466,7 +456,9 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -ext_store_u32: +_execute_store_u32: + mov %ecx, REG_PC(%ebx) # write out the PC + and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 cmp $15, %ecx @@ -507,8 +499,8 @@ smc_write: call _flush_translation_cache_ram lookup_pc: - add $4, %esp - movl $0, CHANGED_PC_STATUS(%ebx) + add $4, %esp # Can't return, discard addr + movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it mov REG_PC(%ebx), %eax testl $0x20, REG_CPSR(%ebx) jz lookup_pc_arm @@ -577,7 +569,5 @@ _reg_mode: _memory_map_read: .space 0x8000 -_memory_map_write: - .space 0x8000 -- cgit v1.2.3 From ff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:47:51 +0100 Subject: Move caches to stub files to get around gcc 10 Seems that using the __atribute__ magic for sections is not the best way of doing this, since it injects some default atributtes that collide with the user defined ones. Using assembly is far easier in this case. Reworked definitions a bit to make it easier to import from assembly. Also wrapped stuff around macros for easy and less verbose implementation of the symbol prefix issue. --- x86/x86_stub.S | 85 +++++++++++++++++++++++++--------------------------------- 1 file changed, 37 insertions(+), 48 deletions(-) (limited to 'x86') diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 595a789..9dd3fdd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -16,21 +16,18 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .align 4 +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + #ifndef _WIN32 -#define _x86_update_gba x86_update_gba -#define _x86_indirect_branch_arm x86_indirect_branch_arm -#define _x86_indirect_branch_thumb x86_indirect_branch_thumb -#define _x86_indirect_branch_dual x86_indirect_branch_dual -#define _execute_store_u8 execute_store_u8 -#define _execute_store_u16 execute_store_u16 -#define _execute_store_u32 execute_store_u32 -#define _execute_store_cpsr execute_store_cpsr -#define _execute_arm_translate execute_arm_translate -#define _memory_map_read memory_map_read -#define _reg reg -#define _reg_mode reg_mode +# External symbols (data + functions) #define _oam_update oam_update #define _iwram iwram #define _ewram ewram @@ -38,7 +35,6 @@ #define _oam_ram oam_ram #define _bios_rom bios_rom #define _io_registers io_registers -#define _spsr spsr #define _update_gba update_gba #define _block_lookup_address_arm block_lookup_address_arm @@ -47,8 +43,6 @@ #define _write_io_register8 write_io_register8 #define _write_io_register16 write_io_register16 #define _write_io_register32 write_io_register32 -#define _palette_ram palette_ram -#define _palette_ram_converted palette_ram_converted #define _flush_translation_cache_ram flush_translation_cache_ram #define _write_eeprom write_eeprom #define _write_backup write_backup @@ -56,25 +50,7 @@ #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _x86_update_gba -.global _x86_indirect_branch_arm -.global _x86_indirect_branch_thumb -.global _x86_indirect_branch_dual -.global _execute_store_u8 -.global _execute_store_u16 -.global _execute_store_u32 -.global _execute_store_cpsr -.global _execute_arm_translate - -.global _memory_map_read -.global _reg -.global _reg_mode -.global _spsr -.global _palette_ram -.global _palette_ram_converted - .global _oam_update - .global _iwram .global _ewram .global _vram @@ -147,7 +123,7 @@ st: .asciz "u\n" -_x86_update_gba: +defsymbl(x86_update_gba) mov %eax, REG_PC(%ebx) # current PC = eax collapse_flags # update cpsr, trashes ecx and edx @@ -171,14 +147,14 @@ _x86_update_gba: # eax: GBA address to branch to # edi: Cycle counter -_x86_indirect_branch_arm: +defsymbl(x86_indirect_branch_arm) call _block_lookup_address_arm jmp *%eax # For indirect branches that'll definitely go to Thumb. In # Thumb mode any indirect branches except for BX. -_x86_indirect_branch_thumb: +defsymbl(x86_indirect_branch_thumb) call _block_lookup_address_thumb jmp *%eax @@ -186,7 +162,7 @@ _x86_indirect_branch_thumb: # mainly BX (also data processing to PC with S bit set, be # sure to adjust the target with a 1 in the lowest bit for this) -_x86_indirect_branch_dual: +defsymbl(x86_indirect_branch_dual) call _block_lookup_address_dual jmp *%eax @@ -297,7 +273,7 @@ ext_store_u8_jtable: # edx: value to write # ecx: current pc -_execute_store_u8: +defsymbl(execute_store_u8) mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 @@ -383,7 +359,7 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u16: +defsymbl(execute_store_u16) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -400,6 +376,7 @@ ext_store_iwram32: and $0x7FFF, %eax # wrap around address mov %edx, (_iwram+0x8000)(%eax) # perform store cmpl $0, _iwram(%eax) # Check SMC mirror + jne smc_write ret @@ -456,7 +433,7 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u32: +defsymbl(execute_store_u32) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -470,7 +447,7 @@ _execute_store_u32: # %eax = new_cpsr # %edx = store_mask -_execute_store_cpsr: +defsymbl(execute_store_cpsr) mov %edx, REG_SAVE(%ebx) # save store_mask mov %ecx, REG_SAVE2(%ebx) # save PC too @@ -515,7 +492,7 @@ lookup_pc_arm: # eax: cycle counter -_execute_arm_translate: +defsymbl(execute_arm_translate) # Save main context, since we need to return gracefully pushl %ebx pushl %esi @@ -556,18 +533,30 @@ return_to_main: .data .align 64 -_reg: +defsymbl(reg) .space 0x100, 0 -_palette_ram: +defsymbl(palette_ram) .space 0x400 -_palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -_spsr: +defsymbl(spsr) .space 24 -_reg_mode: +defsymbl(reg_mode) .space 196 -_memory_map_read: +defsymbl(memory_map_read) .space 0x8000 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif -- cgit v1.2.3 From a494a3f00ee3bd35ee9ab76f8cd4f164da080113 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 21:02:06 +0100 Subject: Move OAM update flag to a register Fix a small bug in MIPS dynarec that affects non -G0 targets --- x86/x86_stub.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'x86') diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 9dd3fdd..948572c 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -28,7 +28,6 @@ _##symbol: #ifndef _WIN32 # External symbols (data + functions) -#define _oam_update oam_update #define _iwram iwram #define _ewram ewram #define _vram vram @@ -50,7 +49,6 @@ _##symbol: #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _oam_update .global _iwram .global _ewram .global _vram @@ -75,6 +73,7 @@ _##symbol: .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) .equ COMPLETED_FRAME, (32 * 4) +.equ OAM_UPDATED, (33 * 4) # destroys ecx and edx @@ -241,7 +240,7 @@ ext_store_vram8b: ret ext_store_oam8: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FE, %eax # wrap around address and align to 16bits mov %dl, %dh # copy lower 8bits of value into full 16bits mov %dx, _oam_ram(%eax) # perform 16bit store @@ -332,7 +331,7 @@ ext_store_vram16b: ret ext_store_oam16: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %dx, _oam_ram(%eax) # perform 16bit store ret @@ -410,7 +409,7 @@ ext_store_vram32b: ret ext_store_oam32: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %edx, _oam_ram(%eax) # perform 32bit store ret -- cgit v1.2.3 From 7ea6c5e247a742af6f7acfbf215c23264410451f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 23:01:20 +0100 Subject: Move OAM RAM to stubs also Makes accesses more efficient for MIPS. Make accesses also fast for palette reads. --- x86/x86_stub.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'x86') diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 948572c..333c8fd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -538,6 +538,8 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 +defsymbl(oam_ram) + .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode) -- cgit v1.2.3