From 11ec213c99d5d22905ff82cf3fb26ba6a8adf290 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:05:35 +0100 Subject: Make ewram memory lineal This saves a few cycles in MIPS and simplifies a bit the core. Removed the write map, only affects interpreter performance very minimally. Rewired ARM and x86 handlers to support direct access to I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly better but code is cleaner and allows for further improvements in the dynarecs. --- psp/mips_emit.h | 80 ++++++++++++++++----------------------------------------- 1 file changed, 22 insertions(+), 58 deletions(-) (limited to 'psp/mips_emit.h') diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 8d1d8d8..b75f7f5 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -1010,47 +1010,10 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ { \ u32 _address = (u32)(address); \ u32 _address_hi = (_address + 0x8000) >> 16; \ - generate_load_imm(ireg, address); \ mips_emit_lui(ireg, _address_hi >> 16) \ generate_load_memory_##type(ireg, _address - (_address_hi << 16)); \ } \ -#define generate_known_address_load_builder(type) \ - u32 generate_known_address_load_##type(u32 rd, u32 address) \ - { \ - switch(address >> 24) \ - { \ - /* Read from the BIOS ROM, can be converted to an immediate load. \ - Only really possible to do this from the BIOS but should be okay \ - to allow it everywhere */ \ - case 0x00: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - /* Read from RAM, can be converted to a load */ \ - case 0x02: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)ewram + \ - (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000); \ - return 1; \ - \ - case 0x03: \ - generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)iwram + \ - (address & 0x7FFF) + 0x8000); \ - return 1; \ - \ - /* Read from gamepak ROM, this has to be an immediate load because \ - it might not actually be in memory anymore when we get to it. */ \ - case 0x08: \ - u32 imm = read_memory_constant_##type(address); \ - generate_load_imm(arm_to_mips_reg[rd], imm); \ - return 1; \ - \ - default: \ - return 0; \ - } \ - } \ - #define generate_block_extra_vars() \ u32 stored_pc = pc; \ u8 *update_trampoline \ @@ -1060,12 +1023,6 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \ generate_load_rm_sh_builder(flags); \ generate_load_rm_sh_builder(no_flags); \ \ -/* generate_known_address_load_builder(u8); \ - generate_known_address_load_builder(u16); \ - generate_known_address_load_builder(u32); \ - generate_known_address_load_builder(s8); \ - generate_known_address_load_builder(s16); */ \ - \ u32 generate_load_offset_sh(u32 rm) \ { \ switch((opcode >> 5) & 0x03) \ @@ -2787,12 +2744,13 @@ static void emit_pmemld_stub( mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && alignment != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2862,12 +2820,13 @@ static void emit_pmemst_stub( } if (region == 2) { - // EWRAM is a bit special + // Can't do EWRAM with an `andi` instruction (18 bits mask) + mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + if (!aligned && realsize != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } // Need to insert a zero in the addr (due to how it's mapped) - mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) - mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) - mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 - mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 @@ -2888,7 +2847,12 @@ static void emit_pmemst_stub( // Generate SMC write and tracking // TODO: Should we have SMC checks here also for aligned? if (meminfo->check_smc && !aligned) { - mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + if (region == 2) { + mips_emit_lui(reg_temp, 0x40000 >> 16); + mips_emit_addu(reg_temp, reg_rv, reg_temp); // SMC lives after the ewram + } else { + mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + } if (realsize == 2) { mips_emit_lw(reg_temp, reg_temp, base_addr); } else if (realsize == 1) { @@ -3272,8 +3236,8 @@ void init_emitter() { const t_stub_meminfo ldinfo [] = { { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case @@ -3308,8 +3272,8 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // I/O is special and mapped with a function call { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case -- cgit v1.2.3 From ff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:47:51 +0100 Subject: Move caches to stub files to get around gcc 10 Seems that using the __atribute__ magic for sections is not the best way of doing this, since it injects some default atributtes that collide with the user defined ones. Using assembly is far easier in this case. Reworked definitions a bit to make it easier to import from assembly. Also wrapped stuff around macros for easy and less verbose implementation of the symbol prefix issue. --- psp/mips_emit.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'psp/mips_emit.h') diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b75f7f5..b996f2b 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop( #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); #endif -// Stub memory map: -// 0 .. 63 First patch handler [#0] -// 448 .. 511 Last patch handler [#7] -// 512+ smc_write handler -#define SMC_WRITE_OFF32 160 +#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */ // Describes a "plain" memory are, that is, an area that is just accessed // as normal memory (with some caveats tho). @@ -2862,8 +2858,7 @@ static void emit_pmemst_stub( } // If the data is non zero, we just wrote over code // Local-jump to the smc_write (which lives at offset:0) - unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); - mips_emit_b(bne, reg_zero, reg_temp, instoffset); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32])); } // Store the data (delay slot from the SMC branch) -- cgit v1.2.3 From a494a3f00ee3bd35ee9ab76f8cd4f164da080113 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 21:02:06 +0100 Subject: Move OAM update flag to a register Fix a small bug in MIPS dynarec that affects non -G0 targets --- psp/mips_emit.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'psp/mips_emit.h') diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b996f2b..d24c174 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2512,7 +2512,8 @@ u8 swi_hle_handle[256] = #define ReOff_SaveR1 (21*4) // 3 save scratch regs #define ReOff_SaveR2 (22*4) #define ReOff_SaveR3 (23*4) -#define ReOff_GP_Save (32*4) // GP_SAVE +#define ReOff_OamUpd (33*4) // OAM_UPDATED +#define ReOff_GP_Save (34*4) // GP_SAVE // Saves all regs to their right slot and loads gp #define emit_save_regs(save_a2) { \ @@ -2873,9 +2874,8 @@ static void emit_pmemst_stub( // Post processing store: // Signal that OAM was updated if (region == 7) { - u32 palcaddr = (u32)&oam_update; - mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16)); - mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data + // Write any nonzero data + mips_emit_sw(reg_base, reg_base, ReOff_OamUpd); generate_function_return_swap_delay(); } else { -- cgit v1.2.3 From 7ea6c5e247a742af6f7acfbf215c23264410451f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Thu, 25 Mar 2021 23:01:20 +0100 Subject: Move OAM RAM to stubs also Makes accesses more efficient for MIPS. Make accesses also fast for palette reads. --- psp/mips_emit.h | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'psp/mips_emit.h') diff --git a/psp/mips_emit.h b/psp/mips_emit.h index d24c174..818b724 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2630,6 +2630,7 @@ typedef struct { bool check_smc; // Whether the memory can contain code bool bus16; // Whether it can only be accessed at 16bit u32 baseptr; // Memory base address. + u32 baseoff; // Offset from base_reg } t_stub_meminfo; // Generates the stub to access memory for a given region, access type, @@ -2738,7 +2739,11 @@ static void emit_pmemld_stub( } else { // Generate upper bits of the addr and do addr mirroring // (The address hi16 is rounded up since load uses signed offset) - mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + if (!meminfo->baseoff) { + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + } else { + base_addr = meminfo->baseoff; + } if (region == 2) { // Can't do EWRAM with an `andi` instruction (18 bits mask) @@ -2761,8 +2766,9 @@ static void emit_pmemld_stub( mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset } else { // Generate regular (<=32KB) mirroring - mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) - mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + mips_reg_number breg = (meminfo->baseoff ? reg_base : reg_rv); + mips_emit_andi(reg_temp, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, breg, reg_temp); // Adds to base addr } } @@ -3154,7 +3160,7 @@ static void emit_phand( mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) } - unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes + unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB @@ -3229,21 +3235,21 @@ void init_emitter() { // Generate memory handlers const t_stub_meminfo ldinfo [] = { - { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, + { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom, 0}, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, - { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, - { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram }, - { emit_pmemld_stub, 8, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 9, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 10, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 11, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 12, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, + { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers, 0 }, + { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, + { emit_pmemld_stub, 8, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 9, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 10, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 11, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 12, 0x8000, false, false, 0, 0 }, // 13 is EEPROM mapped already (a bit special) - { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call + { emit_pmemld_stub, 14, 0, false, false, 0, 0 }, // Mapped via function call // 15 Open load / Ignore store }; @@ -3267,12 +3273,12 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, // I/O is special and mapped with a function call - { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram }, + { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, }; // Store only for "regular"-ish mem regions -- cgit v1.2.3