From 5ffd2832e8b3fc8391a99a53d24788fb736d28c6 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Wed, 3 Mar 2021 01:38:09 +0100 Subject: Rewrite of the MIPS dynarec stubs This allows us to emit the handlers directly in a more efficient manner. At the same time it allows for an easy fix to emit PIC code, which is necessary for libretro. This also enables more platform specific optimizations and variations, perhaps even run-time multiplatform support. --- Makefile | 4 +- cpu.h | 23 +- cpu_threaded.c | 4 + gba_memory.c | 4 +- gba_memory.h | 5 + main.c | 1 + psp/mips_emit.h | 882 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- psp/mips_stub.S | 77 +++-- 8 files changed, 948 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index 20de2e5..85465e0 100644 --- a/Makefile +++ b/Makefile @@ -403,8 +403,8 @@ ifeq ($(DEBUG), 1) OPTIMIZE_SAFE := -O0 -g OPTIMIZE := -O0 -g else - OPTIMIZE_SAFE := -O2 -DNDEBUG - OPTIMIZE := -O3 -DNDEBUG + OPTIMIZE_SAFE := -O2 -DNDEBUG -g + OPTIMIZE := -O3 -DNDEBUG -g endif diff --git a/cpu.h b/cpu.h index 0d7553a..66d24d3 100644 --- a/cpu.h +++ b/cpu.h @@ -122,21 +122,19 @@ s32 translate_block_thumb(u32 pc, translation_region_type translation_region, u32 smc_enable); #if defined(PSP) - -#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) -#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) -#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128) -#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) - + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) + #define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) #else - -#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) -#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) -#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2) -#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) - + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) + #define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) #endif +#define STUB_ARENA_SIZE (4*1024) + #if defined(HAVE_MMAP) extern u8* rom_translation_cache; extern u8* ram_translation_cache; @@ -157,6 +155,7 @@ extern int sceBlock; extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; extern u8 bios_translation_cache[BIOS_TRANSLATION_CACHE_SIZE]; +extern u32 stub_arena[STUB_ARENA_SIZE]; #endif extern u8 *rom_translation_ptr; extern u8 *ram_translation_ptr; diff --git a/cpu_threaded.c b/cpu_threaded.c index 80a6b4a..4d93e55 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -62,6 +62,8 @@ __asm__(".section .jit,\"awx\",%progbits"); __asm__(".section .jit,\"awx\",%nobits"); #endif +u32 stub_arena[STUB_ARENA_SIZE] + __attribute__ ((aligned(4),section(".jit"))); u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] __attribute__ ((aligned(4),section(".jit"))); u8 *rom_translation_ptr = rom_translation_cache; @@ -3773,3 +3775,5 @@ void dump_translation_cache(void) bios_translation_ptr - bios_translation_cache, fd); fclose(fd); } + + diff --git a/gba_memory.c b/gba_memory.c index 0727279..a01bac5 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -427,7 +427,7 @@ u32 eeprom_address = 0; s32 eeprom_counter = 0; u8 eeprom_buffer[8]; -void function_cc write_eeprom(u32 address, u32 value) +void function_cc write_eeprom(u32 unused_address, u32 value) { switch(eeprom_mode) { @@ -749,6 +749,7 @@ static cpu_alert_type trigger_dma(u32 dma_number, u32 value) cpu_alert_type function_cc write_io_register8(u32 address, u32 value) { + value &= 0xff; switch(address) { case 0x00: @@ -1165,6 +1166,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value) cpu_alert_type function_cc write_io_register16(u32 address, u32 value) { + value &= 0xffff; switch(address) { case 0x00: diff --git a/gba_memory.h b/gba_memory.h index a37de47..1b332ed 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -163,6 +163,11 @@ u32 function_cc read_memory32(u32 address); cpu_alert_type function_cc write_memory8(u32 address, u8 value); cpu_alert_type function_cc write_memory16(u32 address, u16 value); cpu_alert_type function_cc write_memory32(u32 address, u32 value); +u32 function_cc read_eeprom(void); +void function_cc write_eeprom(u32 address, u32 value); +u8 read_backup(u32 address); +void function_cc write_backup(u32 address, u32 value); +void function_cc write_rtc(u32 address, u32 value); extern u8 *memory_regions[16]; extern u32 memory_limits[16]; diff --git a/main.c b/main.c index 06c3725..ae359f7 100644 --- a/main.c +++ b/main.c @@ -117,6 +117,7 @@ void init_main(void) flush_translation_cache_rom(); flush_translation_cache_ram(); flush_translation_cache_bios(); + init_emitter(); #endif } diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 7c69091..48ed630 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -20,6 +20,19 @@ #ifndef MIPS_EMIT_H #define MIPS_EMIT_H +// Pointers to default handlers. +// Use IWRAM as default, assume aligned by default too +#define execute_load_u8 tmemld[0][3] +#define execute_load_s8 tmemld[1][3] +#define execute_load_u16 tmemld[2][3] +#define execute_load_s16 tmemld[4][3] +#define execute_load_u32 tmemld[6][3] +#define execute_aligned_load32 tmemld[10][3] +#define execute_store_u8 tmemst[0][3] +#define execute_store_u16 tmemst[1][3] +#define execute_store_u32 tmemst[2][3] +#define execute_aligned_store32 tmemst[3][3] + u32 mips_update_gba(u32 pc); // Although these are defined as a function, don't call them as @@ -44,9 +57,6 @@ u32 execute_lsr_flags_reg(u32 value, u32 shift); u32 execute_asr_flags_reg(u32 value, u32 shift); u32 execute_ror_flags_reg(u32 value, u32 shift); -void execute_aligned_store32(u32 address, u32 value); -u32 execute_aligned_load32(u32 address); - void reg_check(); typedef enum @@ -97,6 +107,7 @@ typedef enum mips_special_jalr = 0x09, mips_special_movz = 0x0A, mips_special_movn = 0x0B, + mips_special_sync = 0x0F, mips_special_mfhi = 0x10, mips_special_mthi = 0x11, mips_special_mflo = 0x12, @@ -116,7 +127,9 @@ typedef enum mips_special_xor = 0x26, mips_special_nor = 0x27, mips_special_slt = 0x2A, - mips_special_sltu = 0x2B + mips_special_sltu = 0x2B, + mips_special_max = 0x2C, + mips_special_min = 0x2D, } mips_function_special; typedef enum @@ -126,10 +139,18 @@ typedef enum mips_special3_bshfl = 0x20 } mips_function_special3; +typedef enum +{ + mips_bshfl_seb = 0x10, + mips_bshfl_seh = 0x18, + mips_bshfl_wsbh = 0x02, +} mips_function_bshfl; + typedef enum { mips_regimm_bltz = 0x00, - mips_regimm_bltzal = 0x10 + mips_regimm_bltzal = 0x10, + mips_regimm_synci = 0x1F } mips_function_regimm; typedef enum @@ -163,8 +184,14 @@ typedef enum mips_opcode_sb = 0x28, mips_opcode_sh = 0x29, mips_opcode_sw = 0x2B, + mips_opcode_cache = 0x2F, } mips_opcode; +#define mips_emit_cache(operation, rs, immediate) \ + *((u32 *)translation_ptr) = (mips_opcode_cache << 26) | \ + (rs << 21) | (operation << 16) | (immediate & 0xFFFF); \ + translation_ptr += 4 \ + #define mips_emit_reg(opcode, rs, rt, rd, shift, function) \ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \ (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | function; \ @@ -184,12 +211,12 @@ typedef enum #define mips_emit_imm(opcode, rs, rt, immediate) \ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \ - (rs << 21) | (rt << 16) | (immediate & 0xFFFF); \ + (rs << 21) | (rt << 16) | ((immediate) & 0xFFFF); \ translation_ptr += 4 \ #define mips_emit_regimm(function, rs, immediate) \ *((u32 *)translation_ptr) = (mips_opcode_regimm << 26) | \ - (rs << 21) | (mips_regimm_##function << 16) | (immediate & 0xFFFF); \ + (rs << 21) | (mips_regimm_##function << 16) | ((immediate) & 0xFFFF); \ translation_ptr += 4 \ #define mips_emit_jump(opcode, offset) \ @@ -203,6 +230,12 @@ typedef enum #define mips_absolute_offset(offset) \ ((u32)offset / 4) \ +#define mips_emit_max(rd, rs, rt) \ + mips_emit_special(max, rs, rt, rd, 0) \ + +#define mips_emit_min(rd, rs, rt) \ + mips_emit_special(min, rs, rt, rd, 0) \ + #define mips_emit_addu(rd, rs, rt) \ mips_emit_special(addu, rs, rt, rd, 0) \ @@ -293,6 +326,9 @@ typedef enum #define mips_emit_movz(rd, rs, rt) \ mips_emit_special(movz, rs, rt, rd, 0) \ +#define mips_emit_sync() \ + mips_emit_special(sync, 0, 0, 0, 0) \ + #define mips_emit_lb(rt, rs, offset) \ mips_emit_imm(lb, rs, rt, offset) \ @@ -344,6 +380,12 @@ typedef enum #define mips_emit_ins(rt, rs, pos, size) \ mips_emit_special3(ins, rs, rt, (pos + size - 1), pos) \ +#define mips_emit_seb(rt, rd) \ + mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seb) \ + +#define mips_emit_seh(rt, rd) \ + mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seh) \ + // Breaks down if the backpatch offset is greater than 16bits, take care // when using (should be okay if limited to conditional instructions) @@ -369,9 +411,15 @@ typedef enum #define mips_emit_jr(rs) \ mips_emit_special(jr, rs, 0, 0, 0) \ +#define mips_emit_synci(rs, offset) \ + mips_emit_regimm(synci, rs, offset) \ + #define mips_emit_bltzal(rs, offset) \ mips_emit_regimm(bltzal, rs, offset) \ +#define mips_emit_bltz(rs, offset) \ + mips_emit_regimm(bltz, rs, offset) \ + #define mips_emit_nop() \ mips_emit_sll(reg_zero, reg_zero, 0) \ @@ -566,6 +614,15 @@ u32 arm_to_mips_reg[] = translation_ptr += 4; \ } \ +#define generate_function_return_swap_delay() \ +{ \ + u32 delay_instruction = address32(translation_ptr, -4); \ + translation_ptr -= 4; \ + mips_emit_jr(mips_reg_ra); \ + address32(translation_ptr, 0) = delay_instruction; \ + translation_ptr += 4; \ +} \ + #define generate_swap_delay() \ { \ u32 delay_instruction = address32(translation_ptr, -8); \ @@ -2468,4 +2525,815 @@ u8 swi_hle_handle[256] = generate_load_pc(reg_a0, pc); \ mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \ + +// Register save layout as follows: +#define ReOff_RegPC (15*4) // REG_PC +#define ReOff_CPSR (20*4) // REG_CPSR +#define ReOff_SaveR1 (21*4) // 3 save scratch regs +#define ReOff_SaveR2 (22*4) +#define ReOff_SaveR3 (23*4) +#define ReOff_GP_Save (32*4) // GP_SAVE + +// Saves all regs to their right slot and loads gp +#define emit_save_regs(save_a2) \ + for (unsigned i = 0; i < 15; i++) { \ + mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \ + } \ + if (save_a2) { \ + mips_emit_sw(reg_a2, reg_base, ReOff_SaveR2); \ + } \ + /* Load the gp pointer, used by C code */ \ + mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \ + +// Restores the registers from their slot +#define emit_restore_regs(restore_a2) \ + if (restore_a2) { \ + mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \ + } \ + for (unsigned i = 0; i < 15; i++) { \ + mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \ + } \ + +// Emits a function call for a read or a write (for special stuff like flash) +#define emit_mem_call_ds(fnptr, mask) \ + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \ + emit_save_regs(true); \ + mips_emit_jal(((u32)(fnptr)) >> 2); \ + mips_emit_andi(reg_a0, reg_a0, (mask)); \ + emit_restore_regs(true); \ + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \ + mips_emit_jr(mips_reg_ra); + +#define emit_mem_call(fnptr, mask) \ + emit_mem_call_ds(fnptr, mask) \ + mips_emit_nop(); + +// Pointer table to stubs, indexed by type and region +// Caution! This is not really a ptr table, but contains pre-encoed JALs +extern u32 tmemld[11][16]; +extern u32 tmemst[ 4][16]; +void mips_lookup_pc(); +cpu_alert_type write_io_register8 (u32 address, u32 value); +cpu_alert_type write_io_register16(u32 address, u32 value); +cpu_alert_type write_io_register32(u32 address, u32 value); +void write_io_epilogue(); + +// This is a pointer table to the open load stubs, used by the BIOS (optimization) +u32* openld_core_ptrs[11]; + +const u8 ldhldrtbl[11] = {0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5}; +#define ld_phndlr_branch(memop) \ + (((u32*)&stub_arena[ldhldrtbl[(memop)] * 16]) - ((u32*)translation_ptr + 1)) + +#define st_phndlr_branch(memop) \ + (((u32*)&stub_arena[((memop) + 6) * 16]) - ((u32*)translation_ptr + 1)) + +#define branch_handlerid(phndlrid) \ + (((u32*)&stub_arena[(phndlrid) * 16]) - ((u32*)translation_ptr + 1)) + +#define branch_offset(ptr) \ + (((u32*)ptr) - ((u32*)translation_ptr + 1)) + +static void emit_mem_access_loadop( + u8 *translation_ptr, + u32 base_addr, unsigned size, unsigned alignment, bool signext) +{ + switch (size) { + case 2: + mips_emit_lw(reg_rv, reg_rv, (base_addr & 0xffff)); + break; + case 1: + if (signext) { + // Load 16 with sign extension is essentially a load byte + if (alignment) { + mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff)); + } else { + mips_emit_lh(reg_rv, reg_rv, (base_addr & 0xffff)); + } + } else { + mips_emit_lhu(reg_rv, reg_rv, (base_addr & 0xffff)); + } + break; + default: + if (signext) { + mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff)); + } else { + mips_emit_lbu(reg_rv, reg_rv, (base_addr & 0xffff)); + } + break; + }; +} + +// Stub memory map: +// 0 .. 63 First patch handler [#0] +// 448 .. 511 Last patch handler [#7] +// 512+ smc_write handler +#define SMC_WRITE_OFF32 160 + +// Describes a "plain" memory are, that is, an area that is just accessed +// as normal memory (with some caveats tho). +typedef struct { + void *emitter; + unsigned region; // Region ID (top 8 bits) + unsigned memsize; // 0 byte, 1 halfword, 2 word + bool check_smc; // Whether the memory can contain code + bool bus16; // Whether it can only be accessed at 16bit + u32 baseptr; // Memory base address. +} t_stub_meminfo; + +// Generates the stub to access memory for a given region, access type, +// size and misalignment. +// Handles "special" cases like weirdly mapped memory +static void emit_pmemld_stub( + unsigned memop_number, const t_stub_meminfo *meminfo, + bool signext, unsigned size, + unsigned alignment, bool aligned, + u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + unsigned region = meminfo->region; + u32 base_addr = meminfo->baseptr; + + if (region >= 9 && region <= 11) { + // Use the same handler for these regions (just replicas) + tmemld[memop_number][region] = tmemld[memop_number][8]; + return; + } + + // Clean up one or two bits (to align access). It might already be aligned! + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> size) << size; // Clear 1 or 2 (or none) bits + + // Add the stub to the table (add the JAL instruction encoded already) + tmemld[memop_number][region] = (u32)translation_ptr; + + // Size: 0 (8 bits), 1 (16 bits), 2 (32 bits) + // First check we are in the right memory region + unsigned regionbits = 8; + unsigned regioncheck = region; + if (region == 8) { + // This is an optimization for ROM regions + // For region 8-11 we reuse the same code (and have a more generic check) + // Region 12 is harder to cover without changing the check (shift + xor) + regionbits = 6; + regioncheck >>= 2; // Ignore the two LSB, don't care + } + + // Address checking: jumps to handler if bad region/alignment + mips_emit_srl(reg_temp, reg_a0, (32 - regionbits)); + if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits + mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment + } + if (regioncheck || alignment) { // If region and alignment are zero, can skip + mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits)); + } + + // The patcher to use depends on ld/st, access size, and sign extension + // (so there's 10 of them). They live in the top stub addresses. + mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memop_number)); + + // BIOS region requires extra checks for protected reads + if (region == 0) { + // BIOS is *not* mirrored, check that + mips_emit_srl(reg_rv, reg_a0, 14); + unsigned joff = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1)); + mips_emit_b(bne, reg_zero, reg_rv, joff); // Jumps to read open + + // Check whether the read is allowed. Only within BIOS! + // TODO: FIX THIS! This should be a protected read, not an open one! + mips_emit_srl(reg_temp, reg_a1, 14); + unsigned jof2 = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1)); + mips_emit_b(bne, reg_zero, reg_temp, jof2); + } + + if (region >= 8 && region <= 12) { + u8 *jmppatch; + // ROM area: might need to load the ROM on-demand + mips_emit_srl(reg_rv, reg_a0, 15); // 32KB page number + mips_emit_sll(reg_rv, reg_rv, 2); // (word indexed) + mips_emit_addu(reg_rv, reg_rv, reg_base); // base + offset + + mips_emit_lw(reg_rv, reg_rv, 0x8000); // base[offset-0x8000] + mips_emit_b_filler(bne, reg_rv, reg_zero, jmppatch); // if not null, can skip load page + mips_emit_andi(reg_temp, reg_a0, memmask); // Get the lowest 15 bits [delay] + + // This code call the C routine to map the relevant ROM page + emit_save_regs(aligned); + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); + mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff + mips_emit_jal(((u32)&load_gamepak_page) >> 2); + mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1); + + mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1); + emit_restore_regs(aligned); + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); + + generate_branch_patch_conditional(jmppatch, translation_ptr); + // Now we can proceed to load, place addr in the right register + mips_emit_addu(reg_rv, reg_rv, reg_temp); + } else if (region == 14) { + // Read from flash, is a bit special, fn call + emit_mem_call_ds(&read_backup, 0xFFFF); + if (!size && signext) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 1 && alignment) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 2) { + mips_emit_rotr(reg_rv, reg_rv, 8 * alignment); + } else { + mips_emit_nop(); + } + *tr_ptr = translation_ptr; + return; + } else { + // Generate upper bits of the addr and do addr mirroring + // (The address hi16 is rounded up since load uses signed offset) + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + + if (region == 2) { + // EWRAM is a bit special + // Need to insert a zero in the addr (due to how it's mapped) + mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) + mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) + mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 + mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + } else if (region == 6) { + // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous + mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) + if (!aligned && alignment != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } + mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block + mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset + } else { + // Generate regular (<=32KB) mirroring + mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + } + } + + // Aligned accesses (or the weird s16u1 case) are just one inst + if (alignment == 0 || (size == 1 && signext)) { + emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot + translation_ptr += 4; + } + else { + // Unaligned accesses (require rotation) need two insts + emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); + translation_ptr += 4; + mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot + } + + generate_function_return_swap_delay(); // Return. Move prev inst to delay slot + *tr_ptr = translation_ptr; +} + +// Generates the stub to store memory for a given region and size +// Handles "special" cases like weirdly mapped memory +static void emit_pmemst_stub( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + unsigned region = meminfo->region; + u32 base_addr = meminfo->baseptr; + + // Palette, VRAM and OAM cannot be really byte accessed (use a 16 bit store) + bool doubleaccess = (size == 0 && meminfo->bus16); + unsigned realsize = size; + if (doubleaccess) + realsize = 1; + + // Clean up one or two bits (to align access). It might already be aligned! + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> realsize) << realsize; + + // Add the stub to the table (add the JAL instruction encoded already) + tmemst[memop_number][region] = (u32)translation_ptr; + + // First check we are in the right memory region (same as loads) + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, region); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number)); + + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + + if (doubleaccess) { + mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + } + + if (region == 2) { + // EWRAM is a bit special + // Need to insert a zero in the addr (due to how it's mapped) + mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) + mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) + mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 + mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + } else if (region == 6) { + // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous + mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) + if (!aligned && realsize != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size) + } + mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block + mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset + } else { + // Generate regular (<=32KB) mirroring + mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + } + + // Generate SMC write and tracking + // TODO: Should we have SMC checks here also for aligned? + if (meminfo->check_smc && !aligned) { + mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + if (realsize == 2) { + mips_emit_lw(reg_temp, reg_temp, base_addr); + } else if (realsize == 1) { + mips_emit_lh(reg_temp, reg_temp, base_addr); + } else { + mips_emit_lb(reg_temp, reg_temp, base_addr); + } + // If the data is non zero, we just wrote over code + // Local-jump to the smc_write (which lives at offset:0) + unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); + mips_emit_b(bne, reg_zero, reg_temp, instoffset); + } + + // Store the data (delay slot from the SMC branch) + if (realsize == 2) { + mips_emit_sw(reg_a1, reg_rv, base_addr); + } else if (realsize == 1) { + mips_emit_sh(reg_a1, reg_rv, base_addr); + } else { + mips_emit_sb(reg_a1, reg_rv, base_addr); + } + + // Post processing store: + // Signal that OAM was updated + if (region == 7) { + u32 palcaddr = (u32)&oam_update; + mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16)); + mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data + generate_function_return_swap_delay(); + } + else { + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + } + + *tr_ptr = translation_ptr; +} + +// Palette is accessed differently and stored in a decoded manner +static void emit_palette_hdl( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + + // Palette cannot be accessed at byte level + unsigned realsize = size ? size : 1; + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> realsize) << realsize; + + // Add the stub to the table (add the JAL instruction encoded already) + tmemst[memop_number][5] = (u32)translation_ptr; + + // First check we are in the right memory region (same as loads) + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, 5); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number)); + mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring) + if (size == 0) { + mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + } + mips_emit_addu(reg_rv, reg_rv, reg_base); + + // Store the data (delay slot from the SMC branch) + if (realsize == 2) { + mips_emit_sw(reg_a1, reg_base, 0x100); + } else if (realsize == 1) { + mips_emit_sh(reg_a1, reg_base, 0x100); + } + + mips_emit_sll(reg_temp, reg_a1, 1); + mips_emit_andi(reg_temp, reg_temp, 0xFFC0); + mips_emit_ins(reg_temp, reg_a1, 0, 5); + mips_emit_sh(reg_temp, reg_rv, 0x500); + + if (size == 2) { + // Convert the second half-word also + mips_emit_srl(reg_a1, reg_a1, 16); + mips_emit_sll(reg_temp, reg_a1, 1); + mips_emit_andi(reg_temp, reg_temp, 0xFFC0); + mips_emit_ins(reg_temp, reg_a1, 0, 5); + mips_emit_sh(reg_temp, reg_rv, 0x502); + } + generate_function_return_swap_delay(); + + *tr_ptr = translation_ptr; +} + +// This emits stubs for regions where writes have no side-effects +static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { + u8 *translation_ptr = *tr_ptr; + + // Region 0-1 (BIOS and ignore) + tmemst[size][0] = tmemst[size][1] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 25); // Check 7 MSB to be zero + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + // Region 8-B + tmemst[size][ 8] = tmemst[size][ 9] = + tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr; + + mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02 + mips_emit_xori(reg_temp, reg_temp, 0x02); + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + // Region C or F (or bigger!) + tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); + mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store + mips_emit_xori(reg_rv, reg_temp, 0x0C); + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + *tr_ptr = translation_ptr; +} + +// Stubs for regions with EEPROM or flash/SRAM +static void emit_saveaccess_stub(u8 **tr_ptr) { + u8 *translation_ptr = *tr_ptr; + const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; + + // Writes to region 8 are directed to RTC (only 16 bit ones though) + tmemld[1][8] = (u32)translation_ptr; + emit_mem_call(&write_rtc, 0xFE); + + // These are for region 0xD where EEPROM is mapped. Addr is ignored + // Value is limited to one bit (both reading and writing!) + u32 *read_hndlr = (u32*)translation_ptr; + emit_mem_call(&read_eeprom, 0x3FF); + u32 *write_hndlr = (u32*)translation_ptr; + emit_mem_call(&write_eeprom, 0x3FF); + + // Map loads to the read handler. + for (unsigned opt = 0; opt < 6; opt++) { + // Unalignment is not relevant here, so map them all to the same handler. + for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++) + tmemld[i][13] = (u32)translation_ptr; + // Emit just a check + patch jump + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0D); + mips_emit_b(bne, reg_rv, reg_zero, branch_handlerid(opt)); + mips_emit_nop(); + mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr)); + } + // This is for stores + for (unsigned strop = 0; strop <= 3; strop++) { + tmemst[strop][13] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0D); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + mips_emit_nop(); + mips_emit_b(beq, reg_zero, reg_zero, branch_offset(write_hndlr)); + } + + // Flash/SRAM/Backup writes are only 8 byte supported + for (unsigned strop = 0; strop <= 3; strop++) { + tmemst[strop][14] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0E); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + if (strop == 0) { + emit_mem_call(&write_backup, 0xFFFF); + } else { + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); // Does nothing in this case + mips_emit_nop(); + } + } + + // Region 4 writes + // I/O writes are also a bit special, they can trigger things like DMA, IRQs... + // Also: aligned (strop==3) accesses do not trigger IRQs + const u32 iowrtbl[] = { + (u32)&write_io_register8, (u32)&write_io_register16, + (u32)&write_io_register32, (u32)&write_io_register32 }; + const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC}; + for (unsigned strop = 0; strop <= 3; strop++) { + tmemst[strop][4] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, 0x04); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(strop)); + + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr + emit_save_regs(strop == 3); + mips_emit_andi(reg_a0, reg_a0, amsk[strop]); + mips_emit_jal(iowrtbl[strop] >> 2); + + if (strop < 3) { + mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay) + mips_emit_j(((u32)&write_io_epilogue) >> 2); + mips_emit_nop(); + } else { + mips_emit_nop(); + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); + emit_restore_regs(true); + generate_function_return_swap_delay(); + } + } + + *tr_ptr = translation_ptr; +} + +// Emits openload store memory region stub +static void emit_openload_stub( + unsigned memopn, bool signext, unsigned size, + unsigned alignment, bool aligned, u8 **tr_ptr +) { + u8 *translation_ptr = *tr_ptr; + + // This affects regions 1 and 15 + tmemld[memopn][ 1] = (u32)translation_ptr; + tmemld[memopn][15] = (u32)translation_ptr; + + // We need to repatch if: alignment is different or + // if we are accessing a non-ignore region (1 and 15) + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); + mips_emit_addiu(reg_temp, reg_temp, -1); + mips_emit_sltu(reg_temp, reg_zero, reg_temp); + mips_emit_and(reg_temp, reg_temp, reg_rv); + + if (!aligned && size != 0) { + // Also check and aggregate alignment + mips_emit_ext(reg_rv, reg_a0, 0, size); + mips_emit_xori(reg_rv, reg_rv, alignment); + mips_emit_or(reg_temp, reg_rv, reg_temp); + } + + // Jump to patch handler + mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memopn)); + + // BIOS can jump here to do open loads + openld_core_ptrs[memopn] = (u32*)translation_ptr; + + // Proceed with open load by reading data at PC (previous data in the bus) + mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR + mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit + + emit_save_regs(aligned); + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); + + switch (size) { + case 0: + mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode + mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB + mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB + mips_emit_jal(((u32)&read_memory8) >> 2); + mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) + break; + case 1: + mips_emit_b(beq, reg_zero, reg_rv, 2); + mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1 + mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all + mips_emit_jal(((u32)&read_memory16) >> 2); + mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) + break; + default: + mips_emit_b(beq, reg_zero, reg_rv, 5); + mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0 + + mips_emit_jal(((u32)&read_memory16) >> 2); + mips_emit_nop(); + mips_emit_b(beq, reg_zero, reg_zero, 3); + mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay] + + mips_emit_jal(((u32)&read_memory32) >> 2); + mips_emit_nop(); + break; + }; + + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); + emit_restore_regs(aligned); + + // Same behaviour as reading from region14 really (8 bit bus) + if (!size && signext) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 1 && alignment) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 2) { + mips_emit_rotr(reg_rv, reg_rv, 8 * alignment); + } + generate_function_return_swap_delay(); + + *tr_ptr = translation_ptr; +} + +typedef void (*sthldr_t)( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr); + +typedef void (*ldhldr_t)( + unsigned memop_number, const t_stub_meminfo *meminfo, + bool signext, unsigned size, + unsigned alignment, bool aligned, + u8 **tr_ptr); + +// Generates a patch handler for a given access size +// It will detect the access alignment and memory region and load +// the corresponding handler from the table (at the right offset) +// and patch the jal instruction from where it was called. +static void emit_phand( + u8 **tr_ptr, unsigned size, unsigned toff, + bool check_alignment) +{ + u8 *translation_ptr = *tr_ptr; + + mips_emit_srl(reg_temp, reg_a0, 24); + #ifdef PSP + mips_emit_addiu(reg_rv, reg_zero, 15*4); // Table limit (max) + mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed + mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table + #else + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX + mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds) + mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed + mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore) + #endif + + // Stores or byte-accesses do not care about alignment + if (check_alignment) { + // Move alignment bits for the table lookup + mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) + } + + unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes + mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset + mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table + mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB + mips_emit_ori(reg_temp, reg_temp, 0x3); // JAL opcode + mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate + mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction! + + mips_emit_cache(0x1A, mips_reg_ra, -8); + mips_emit_jr(reg_rv); // Jump directly to target for speed + mips_emit_cache(0x08, mips_reg_ra, -8); + + // Round up handlers to 16 instructions for easy addressing :) + while (translation_ptr - *tr_ptr < 64) { + mips_emit_nop(); + } + + *tr_ptr = translation_ptr; +} + +// This function emits the following stubs: +// - smc_write: Jumps to C code to trigger a cache flush +// - memop patcher: Patches a memop whenever it accesses the wrong mem region +// - mem stubs: There's stubs for load & store, and every memory region +// and possible operand size and misaligment (+sign extensions) +void init_emitter() { + // Initialize memory to a debuggable state + memset(stub_arena, 0, sizeof(stub_arena)); // nop + + // Generates the trampoline and helper stubs that we need + u8 *translation_ptr = (u8*)&stub_arena[0]; + + // Generate first the patch handlers + // We have 6+4 patchers, one per mem type (6 or 4) + + // Calculate the offset into tmemld[10][XX]; + emit_phand(&translation_ptr, 0, 0 * 16, false); // ld u8 + emit_phand(&translation_ptr, 0, 1 * 16, false); // ld s8 + emit_phand(&translation_ptr, 1, 2 * 16, true); // ld u16 + u16u1 + emit_phand(&translation_ptr, 1, 4 * 16, true); // ld s16 + s16u1 + emit_phand(&translation_ptr, 2, 6 * 16, true); // ld u32 (0/1/2/3u) + emit_phand(&translation_ptr, 2, 10 * 16, false); // ld aligned 32 + // Store table is immediately after + emit_phand(&translation_ptr, 0, 11 * 16, false); // st u8 + emit_phand(&translation_ptr, 1, 12 * 16, false); // st u16 + emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32 + emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32 + + // Generate SMC write handler, with the lookup machinery + // Call out the flushing routine (save PC) + emit_save_regs(false); + mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2); + mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot + + mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR + mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit + mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode + mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc + // Lookup thumb PC and execute + mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2); + mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return! + // Lookup arm PC and execute + mips_emit_jal(((u32)&block_lookup_address_arm) >> 2); + mips_emit_nop(); + // Epiloge (restore and jump) + emit_restore_regs(false); + mips_emit_jr(reg_rv); // Go execute the code + mips_emit_nop(); + + // Generate the openload handlers (for accesses to unmapped mem) + emit_openload_stub(0, false, 0, 0, false, &translation_ptr); // ld u8 + emit_openload_stub(1, true, 0, 0, false, &translation_ptr); // ld s8 + emit_openload_stub(2, false, 1, 0, false, &translation_ptr); // ld u16 + emit_openload_stub(3, false, 1, 1, false, &translation_ptr); // ld u16u1 + emit_openload_stub(4, true, 1, 0, false, &translation_ptr); // ld s16 + emit_openload_stub(5, true, 1, 1, false, &translation_ptr); // ld s16u1 + emit_openload_stub(6, false, 2, 0, false, &translation_ptr); // ld u32 + emit_openload_stub(7, false, 2, 1, false, &translation_ptr); // ld u32u1 + emit_openload_stub(8, false, 2, 2, false, &translation_ptr); // ld u32u2 + emit_openload_stub(9, false, 2, 3, false, &translation_ptr); // ld u32u3 + emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32 + + // Here we emit the ignore store area, just checks and does nothing + for (unsigned i = 0; i < 4; i++) + emit_ignorestore_stub(i, &translation_ptr); + + // Here go the save game handlers + emit_saveaccess_stub(&translation_ptr); + + // Generate memory handlers + const t_stub_meminfo ldinfo [] = { + { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, + // 1 Open load / Ignore store + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, + { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, + { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case + { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram }, + { emit_pmemld_stub, 8, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 9, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 10, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 11, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 12, 0x8000, false, false, 0 }, + // 13 is EEPROM mapped already (a bit special) + { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call + // 15 Open load / Ignore store + }; + + for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) { + ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter; + /* region info signext sz al isaligned */ + handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8 + handler(1, &ldinfo[i], true, 0, 0, false, &translation_ptr); // ld s8 + + handler(2, &ldinfo[i], false, 1, 0, false, &translation_ptr); // ld u16 + handler(3, &ldinfo[i], false, 1, 1, false, &translation_ptr); // ld u16u1 + handler(4, &ldinfo[i], true, 1, 0, false, &translation_ptr); // ld s16 + handler(5, &ldinfo[i], true, 1, 1, false, &translation_ptr); // ld s16u1 + + handler(6, &ldinfo[i], false, 2, 0, false, &translation_ptr); // ld u32 + handler(7, &ldinfo[i], false, 2, 1, false, &translation_ptr); // ld u32u1 + handler(8, &ldinfo[i], false, 2, 2, false, &translation_ptr); // ld u32u2 + handler(9, &ldinfo[i], false, 2, 3, false, &translation_ptr); // ld u32u3 + + handler(10,&ldinfo[i], false, 2, 0, true, &translation_ptr); // aligned ld u32 + } + + const t_stub_meminfo stinfo [] = { + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + // I/O is special and mapped with a function call + { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, + { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case + { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram }, + }; + + // Store only for "regular"-ish mem regions + // + for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) { + sthldr_t handler = (sthldr_t)stinfo[i].emitter; + handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8 + handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16 + handler(2, &stinfo[i], 2, false, &translation_ptr); // st u32 + handler(3, &stinfo[i], 2, true, &translation_ptr); // st aligned 32 + } +} + #endif + + diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 1b24b0d..a14085b 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -23,16 +23,16 @@ .global mips_indirect_branch_arm .global mips_indirect_branch_thumb .global mips_indirect_branch_dual -.global execute_load_u8 -.global execute_load_u16 -.global execute_load_u32 -.global execute_load_s8 -.global execute_load_s16 -.global execute_store_u8 -.global execute_store_u16 -.global execute_store_u32 -.global execute_aligned_load32 -.global execute_aligned_store32 +#.global execute_load_u8 +#.global execute_load_u16 +#.global execute_load_u32 +#.global execute_load_s8 +#.global execute_load_s16 +#.global execute_store_u8 +#.global execute_store_u16 +#.global execute_store_u32 +#.global execute_aligned_load32 +#.global execute_aligned_store32 .global execute_read_cpsr .global execute_read_spsr .global execute_swi @@ -48,9 +48,15 @@ .global reg_check .global palette_ram .global palette_ram_converted +.global init_emitter +.global mips_lookup_pc +.global write_io_epilogue .global memory_map_read .global memory_map_write +.global tmemld +.global tmemst +.global tmemst .global reg .global spsr .global reg_mode @@ -105,7 +111,6 @@ .equ REG_R12, (12 * 4) .equ REG_R13, (13 * 4) .equ REG_R14, (14 * 4) -.equ REG_LR, (14 * 4) .equ REG_PC, (15 * 4) .equ REG_N_FLAG, (16 * 4) .equ REG_Z_FLAG, (17 * 4) @@ -1004,7 +1009,7 @@ execute_load_ewram_u8: # Put the generic address over the handler you want to be default # IWRAM is typically the most frequently read and written to. -execute_load_u8: +# execute_load_u8: execute_load_iwram_u8: translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF load_u8 (iwram + 0x8000) @@ -1107,7 +1112,7 @@ execute_load_ewram_s8: translate_region_ewram patch_load_s8 load_s8 (ewram + 0x8000) -execute_load_s8: +#execute_load_s8: execute_load_iwram_s8: translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF load_s8 (iwram + 0x8000) @@ -1209,7 +1214,7 @@ execute_load_ewram_u16: translate_region_ewram_load_align 1, 0, patch_load_u16 load_u16 (ewram + 0x8000) -execute_load_u16: +#execute_load_u16: execute_load_iwram_u16: translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF load_u16 (iwram + 0x8000) @@ -1408,7 +1413,7 @@ execute_load_ewram_s16: translate_region_ewram_load_align 1, 0, patch_load_s16 load_s16 (ewram + 0x8000) -execute_load_s16: +#execute_load_s16: execute_load_iwram_s16: translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF load_s16 (iwram + 0x8000) @@ -1607,7 +1612,7 @@ execute_load_ewram_u32: translate_region_ewram_load_align 2, 0, patch_load_u32 load_u32 (ewram + 0x8000) -execute_load_u32: +#execute_load_u32: execute_load_iwram_u32: translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF load_u32 (iwram + 0x8000) @@ -1993,7 +1998,7 @@ execute_load_ewram_u32a: translate_region_ewram patch_load_u32a load_u32 (ewram + 0x8000) -execute_aligned_load32: +#execute_aligned_load32: execute_load_iwram_u32a: translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF load_u32 (iwram + 0x8000) @@ -2078,7 +2083,7 @@ execute_store_ewram_u8: translate_region_ewram patch_store_u8 store_u8_smc (ewram + 0x8000) -execute_store_u8: +#execute_store_u8: execute_store_iwram_u8: translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF store_u8_smc (iwram + 0x8000) @@ -2175,7 +2180,7 @@ execute_store_ewram_u16: translate_region_ewram_store_align16 patch_store_u16 store_u16_smc (ewram + 0x8000) -execute_store_u16: +#execute_store_u16: execute_store_iwram_u16: translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE store_u16_smc (iwram + 0x8000) @@ -2274,7 +2279,7 @@ execute_store_ewram_u32: translate_region_ewram_store_align32 patch_store_u32 store_u32_smc (ewram + 0x8000) -execute_store_u32: +#execute_store_u32: execute_store_iwram_u32: translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC store_u32_smc (iwram + 0x8000) @@ -2380,7 +2385,7 @@ execute_store_ewram_u32a: translate_region_ewram_store_align32 patch_store_u32a store_u32 (ewram + 0x8000) -execute_aligned_store32: +#execute_aligned_store32: execute_store_iwram_u32a: translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC store_u32 (iwram + 0x8000) @@ -2529,6 +2534,7 @@ smc_write: jal flush_translation_cache_ram # flush translation cache sw $6, REG_PC($16) # save PC (delay slot) +mips_lookup_pc: lookup_pc: lw $2, REG_CPSR($16) # $2 = cpsr andi $2, $2, 0x20 # isolate mode bit @@ -2624,8 +2630,7 @@ execute_store_cpsr: and $2, $2, $4 # $2 = (cpsr & (~store_mask)) or $1, $1, $2 # $1 = new cpsr combined with old extract_flags_body # extract flags from $1 - addiu $sp, $sp, -4 - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) save_registers jal execute_store_cpsr_body # do the dirty work in this C function addu $4, $1, $0 # load the new CPSR (delay slot) @@ -2635,16 +2640,16 @@ execute_store_cpsr: restore_registers - lw $ra, ($sp) + lw $ra, REG_SAVE3($16) jr $ra - addiu $sp, $sp, 4 + nop changed_pc_cpsr: jal block_lookup_address_arm # GBA address is in $4 addu $4, $2, $0 # load new address in $4 (delay slot) restore_registers # restore registers jr $2 # jump to the new address - addiu $sp, $sp, 4 # get rid of the old ra (delay slot) + nop # $4: new spsr @@ -2797,11 +2802,14 @@ execute_arm_translate: .data .align 6 +memory_map_write: + .space 0x8000 + memory_map_read: .space 0x8000 -# This must be between memory_map_read and memory_map_write because it's used -# to calculate their addresses elsewhere in this file. +# memory_map_read is immediately before arm_reg on purpose (offset used +# to access it, via lw op). We do not use write though. reg: .space 0x100 @@ -2815,5 +2823,14 @@ spsr: reg_mode: .space 196 # u32[7][7]; -memory_map_write: - .space 0x8000 +# Here we store: +# void *tmemld[11][16]; # 10 types of loads +# void *tmemst[ 4][16]; # 3 types of stores +# Essentially a list of pointers to the different mem load handlers +# Keep them close for a fast patcher. +tmemld: + .space 704 +tmemst: + .space 256 + + -- cgit v1.2.3 From 80be1e3447f26376b07a1154c98258fb4a124500 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 16 Mar 2021 01:06:30 +0100 Subject: Remove old handlers from mips/stub --- psp/mips_stub.S | 2203 ------------------------------------------------------- 1 file changed, 2203 deletions(-) diff --git a/psp/mips_stub.S b/psp/mips_stub.S index a14085b..a427e89 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -23,16 +23,6 @@ .global mips_indirect_branch_arm .global mips_indirect_branch_thumb .global mips_indirect_branch_dual -#.global execute_load_u8 -#.global execute_load_u16 -#.global execute_load_u32 -#.global execute_load_s8 -#.global execute_load_s16 -#.global execute_store_u8 -#.global execute_store_u16 -#.global execute_store_u32 -#.global execute_aligned_load32 -#.global execute_aligned_store32 .global execute_read_cpsr .global execute_read_spsr .global execute_swi @@ -290,2199 +280,6 @@ mips_indirect_branch_dual: nop -# $4: address to write to -# $5: current PC - -# Will patch the return address with a call to the correct handler as -# listed in the given table. - -# Value will be set to force_open if it's open - -.macro patch_handler ftable, force_open - srl $1, $4, 24 # $1 = address region - sltu $2, $1, 0x0F # check if the value is open - bne $2, $0, 1f - sll $1, $1, 2 # make address word indexed (delay) - - addiu $1, $0, (\force_open * 4) - -1: - lui $2, %hi(\ftable) - addu $2, $2, $1 - lw $2, %lo(\ftable)($2) # new function handler is in $2 - sll $1, $2, 4 # shift left by 4 (6 LSB are zero) - ori $1, $1, 3 # Insert the opcode in the LSB - ror $1, $1, 6 # Rotate to the opcode is now in the MSB - - sw $1, -8($ra) # Overwrite jal instruction w/ new handler - - cache 0x1a, -8($ra) # hit writeback dcache line - cache 0x08, -8($ra) # hit invalidate icache line - jr $2 # Jump to new handler directly - nop - -.endm - - -# Like the above, but will use the table of the proper alignment, -# The tables should be ordered by alignment - -.macro patch_handler_align ftable, alignment - srl $1, $4, 24 # $1 = address region - sltu $2, $1, 0x0F # check if the value is open - bne $2, $0, 1f - sll $1, $1, 2 # make address word indexed (delay) - - addiu $1, $0, 4 # force address to 0x1 (open) - -1: - ins $1, $4, 6, \alignment # place alignment bits into offset - lui $2, %hi(\ftable) - - addu $2, $2, $1 - lw $2, %lo(\ftable)($2) # new function handler is in $2 - - sll $1, $2, 4 # Build the new JAL instruction - ori $1, $1, 3 # same as above. - ror $1, $1, 6 - - sw $1, -8($ra) # modify to call new handler - - cache 0x1a, -8($ra) # hit writeback dcache line - cache 0x08, -8($ra) # hit invalidate icache line - jr $2 # Jump to new handler - nop - -.endm - - -.macro region_check region, patch_handler - srl $1, $4, 24 # check upper 8bits of address - xor $1, $1, \region # see if it is the given region - bne $1, $0, \patch_handler # if not repatch/try again -.endm - -.macro region_check_open patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $2, $1, 0x0F # true if it is a low address - addiu $1, $1, -1 # non-zero if it is not a low open - sltu $1, $0, $1 # true if lower bits != 1 - and $1, $1, $2 # true if low address and not open - bne $1, $0, \patch_handler # if above is true, patch -.endm - - -.macro region_check_align region, align_bits, alignment, patch_handler - srl $1, $4, 24 # check upper 8bits of address - ins $1, $4, 8, \align_bits # look at lower bits of address too - # See if it is the given region and alignment - xori $1, $1, (\region | (\alignment << 8)) - bne $1, $0, \patch_handler # if not repatch/try again -.endm - -.macro region_check_open_align align_bits, alignment, patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $2, $1, 0x0F # true if it is a low address - addiu $1, $1, -1 # non-zero if it is not a low open - sltu $1, $0, $1 # true if $1 != 0 - and $1, $1, $2 # true if low address and not open - ext $2, $4, 0, \align_bits # $2 = low bits of 4 - xori $2, $2, \alignment # true if alignment doesn't match - or $1, $1, $2 # align failure will trigger too - bne $1, $0, \patch_handler # if above is true, patch -.endm - - -.macro ignore_region region, patch_handler - region_check \region, \patch_handler - nop - jr $ra - nop -.endm - -.macro ignore_high patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $1, $1, 0x0F # see if it is not high - bne $1, $0, \patch_handler # if not repatch/try again - nop - jr $ra - nop -.endm - - -.macro translate_region_core base, size - lui $2, %hi(\base) # generate upper address - andi $4, $4, \size # generate offset - addu $2, $2, $4 # add ptr upper and offset -.endm - -.macro translate_region region, patch_handler, base, size - region_check \region, \patch_handler - translate_region_core \base, \size -.endm - -# I refuse to have > 80 char lines, and GAS has a problem with the param -# list spilling over (grumble) - -.macro translate_region_align region, a_b, alignment, p_h, base, size - region_check_align \region, \a_b, \alignment, \p_h - translate_region_core \base, \size -.endm - - -.macro translate_region_ewram_core mask - lui $2, %hi(ewram + 0x8000) # generate upper address (delay) - andi $1, $4, \mask # generate 15bit offset - ext $4, $4, 15, 3 # isolate top 3 bits of offset - ins $1, $4, 16, 3 # reinsert into top 4 bits - addu $2, $2, $1 -.endm - -.macro translate_region_ewram patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFF -.endm - -.macro translate_region_ewram_load_align align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFF -.endm - -.macro translate_region_ewram_load_align16 align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFE -.endm - -.macro translate_region_ewram_load_align32 align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFC -.endm - -.macro translate_region_ewram_store_align16 patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFE -.endm - -.macro translate_region_ewram_store_align32 patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFC -.endm - - -.macro translate_region_vram_core - addiu $2, $2, -3 # see if it's 3 - ext $4, $4, 0, 17 # generate 17bit offset - bne $2, $0, 1f - lui $1, %hi(vram) # start loading vram address (delay) - - addiu $4, $4, -0x8000 # move address into VRAM region - -1: - addu $2, $1, $4 # $2 = (hi)vram + address -.endm - -.macro translate_region_vram patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align16 align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 1 # mask out lower bit of address - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align32 align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 2 # mask out lower two bits of address - translate_region_vram_core -.endm - -.macro translate_region_vram_store_align16 patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 1 # mask out lower bit of address - translate_region_vram_core -.endm - -.macro translate_region_vram_store_align32 patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 2 # mask out lower two bits of address - translate_region_vram_core -.endm - - - -.macro translate_region_gamepak_core mask - srl $2, $4, 15 # $2 = page number of address (delay) - sll $2, $2, 2 # adjust to word index - addu $2, $2, $16 # $2 = memory_map_read[address >> 15] - lw $2, -32768($2) - bne $2, $0, 1f # if it's non-NULL continue - andi $1, $4, \mask # $1 = low 15bits of address (delay slot) - - sw $ra, REG_SAVE2($16) # save return address - - save_registers # save the registers - ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF - - jal load_gamepak_page # get page in $2 - sw $1, REG_SAVE($16) # save offset (delay) - lw $1, REG_SAVE($16) # restore offset (delay) - - restore_registers # restore the other registers - - lw $ra, REG_SAVE2($16) # restore return address - -1: - addu $2, $2, $1 # add the memory map offset -.endm - -.macro translate_region_gamepak region, patch_handler - region_check \region, \patch_handler - translate_region_gamepak_core 0x7FFF -.endm - -.macro translate_region_gamepak_align region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFF -.endm - -.macro translate_region_gamepak_align16 region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFE -.endm - -.macro translate_region_gamepak_align32 region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFC -.endm - - -.macro translate_region_gamepak_a region, patch_handler - region_check \region, \patch_handler - srl $2, $4, 15 # $2 = page number of address (delay) - sll $2, $2, 2 # adjust to word index - addu $2, $2, $16 # $2 = memory_map_read[address >> 15] - lw $2, -32768($2) - bne $2, $0, 1f # if it's non-NULL continue - andi $1, $4, 0x7FFF # $1 = low 15bits of address (delay slot) - - sw $ra, REG_SAVE2($16) # save return address - sw $6, REG_SAVE3($16) # save a2 - - save_registers # save the registers - ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF - - jal load_gamepak_page # get page in $2 - sw $1, REG_SAVE($16) # save offset (delay) - lw $1, REG_SAVE($16) # restore offset (delay) - - restore_registers # restore the other registers - - lw $ra, REG_SAVE2($16) # restore return address - lw $6, REG_SAVE3($16) # restore a2 - -1: - addu $2, $2, $1 # add the memory map offset -.endm - - -.macro eeprom_load_a patch_handler - region_check 0xD, \patch_handler - - sw $ra, REG_SAVE($16) # save the return address (delay) - sw $6, REG_SAVE2($16) # save a2 - - save_registers # save the registers - - jal read_eeprom # get eeprom value in $2 - nop - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 -.endm - - -.macro eeprom_load_core - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal read_eeprom # get eeprom value in $2 - nop - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - -.macro eeprom_load patch_handler - region_check 0xD, \patch_handler - eeprom_load_core -.endm - -.macro eeprom_load_align align_bits, alignment, patch_handler - region_check_align 0xD, \align_bits, \alignment, \patch_handler - eeprom_load_core -.endm - -.macro eeprom_load_align16 align_bits, alignment, patch_handler - eeprom_load_align \align_bits, \alignment, \patch_handler -.endm - -.macro eeprom_load_align32 align_bits, alignment, patch_handler - eeprom_load_align \align_bits, \alignment, \patch_handler -.endm - - -.macro backup_load_core - save_registers # save the registers - - jal read_backup # get backup value in $2 - ext $4, $4, 0, 16 # address &= 0xFFFF - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro backup_load_a patch_handler - region_check 0xE, \patch_handler - sw $ra, REG_SAVE($16) # save return address (delay) - sw $6, REG_SAVE2($16) # save a2 - - save_registers # save the registers - - jal read_backup # get backup value in $2 - ext $4, $4, 0, 16 # address &= 0xFFFF - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 -.endm - - -.macro backup_load patch_handler - region_check 0xE, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - backup_load_core -.endm - -.macro backup_load_align align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - backup_load_core -.endm - -.macro backup_load_align16 align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - ins $4, $0, 0, 1 # mask out lower bit - backup_load_core -.endm - -.macro backup_load_align32 align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - ins $4, $0, 0, 2 # mask out lower two bits - backup_load_core -.endm - - -.macro open_load8_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - beq $2, $0, 1f # branch if ARM mode - andi $4, $4, 0x03 # isolate lower 3bits from address (delay) - - andi $4, $4, 0x01 # in Thumb mode, isolate one more bit - -1: - sw $ra, REG_SAVE($16) # save the return address (delay) - save_registers # save the registers - - jal read_memory8 # get instruction at PC - addu $4, $5, $4 # a0 = PC + low bits of address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load8 patch_handler - region_check_open \patch_handler - open_load8_core -.endm - - - -.macro open_load16_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - beq $2, $0, 1f # branch if ARM mode - andi $4, $4, 0x02 # isolate bit 1 from address (delay) - - addu $4, $0, $0 # zero out address bit - -1: - sw $ra, REG_SAVE($16) # save the return address (delay) - save_registers # save the registers - - jal read_memory16 # get instruction at PC - addu $4, $5, $4 # a0 = PC + low bits of address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load16_align align_bits, alignment, patch_handler - region_check_open_align \align_bits, \alignment, \patch_handler - open_load16_core -.endm - -.macro open_load16_align16 align_bits, alignment, patch_handler - open_load16_align \align_bits, \alignment, \patch_handler -.endm - - - -.macro open_load32_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - - save_registers # save the registers - - beq $2, $0, 1f # branch if ARM mode - sw $ra, REG_SAVE($16) # save the return address (delay) - - jal read_memory16 # get instruction at PC - addu $4, $5, $0 # a0 = PC - - j 2f - ins $2, $2, 16, 16 # result = (result << 16) | result (delay) - -1: - jal read_memory32 # get instruction at PC - addu $4, $5, $4 # a0 = PC - -2: # join point - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load32_a patch_handler - region_check_open \patch_handler - - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - - save_registers # save the registers - sw $6, REG_SAVE2($16) # save a2 - - beq $2, $0, 1f # branch if ARM mode - sw $ra, REG_SAVE($16) # save the return address (delay) - - jal read_memory16 # get instruction at PC - addu $4, $5, $0 # a0 = PC - - j 2f - ins $2, $2, 16, 16 # result = (result << 16) | result (delay) - -1: - jal read_memory32 # get instruction at PC - addu $4, $5, $4 # a0 = PC - -2: - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 (delay) -.endm - -.macro open_load32_align align_bits, alignment, patch_handler - region_check_open_align \align_bits, \alignment, \patch_handler - open_load32_core -.endm - -.macro open_load32_align32 align_bits, alignment, patch_handler - open_load32_align \align_bits, \alignment, \patch_handler -.endm - - -.macro store_function function, region, patch_handler, mask - region_check \region, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal \function # store value out - andi $4, $4, \mask # mask address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - - -.macro store_function_a function, region, patch_handler, mask - region_check \region, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal \function # store value out - andi $4, $4, \mask # mask address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - - - -.macro load_u8 base - jr $ra # return - lbu $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_s8 base - jr $ra # return - lb $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_u16 base - jr $ra # return - lhu $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_s16 base - jr $ra # return - lh $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_u32 base - jr $ra # return - lw $2, %lo(\base)($2) # return base[offset] -.endm - - -# 16bit unaligned load will always have a 1 in the LSB; -# should have already been taken care of in indexing. - -.macro load_u16_unaligned base - lhu $2, %lo(\base)($2) # load base[offset] - jr $ra # return - ror $2, $2, 8 # rotate value by 8bits -.endm - -# This is technically the same as load_s8, but kept to -# avoid confusion. - -.macro load_s16_unaligned base - jr $ra # return - lb $2, %lo(\base)($2) # return base[offset] -.endm - -# Unalignment must be known statically (use the tables to -# patch correctly) - -.macro load_u32_unaligned base, alignment - lw $2, %lo(\base)($2) # load base[offset] - jr $ra # return - ror $2, $2, (\alignment * 8) # rotate value by 8bits -.endm - - -.macro store_u8 base - jr $ra # return - sb $5, %lo(\base)($2) # store value at base[offset] -.endm - -.macro store_u16 base - jr $ra # return - sh $5, %lo(\base)($2) # store value at base[offset] -.endm - -.macro store_u32 base - jr $ra # return - sw $5, %lo(\base)($2) # store value at base[offset] -.endm - - -# Store the value double mirrored (u16) - -.macro store_u8_double base - ins $5, $5, 8, 8 # value = (value << 8) | value - jr $ra # return - sh $5, %lo(\base)($2) # store value at base[offset] -.endm - - -# Store the values and check if it overwrote code there - -.macro store_u8_smc base - addiu $2, $2, %lo(\base) # offset the address - lb $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sb $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - -.macro store_u16_smc base - addiu $2, $2, %lo(\base) # offset the address - lh $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sh $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - -.macro store_u32_smc base - addiu $2, $2, %lo(\base) # offset the address - lw $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sw $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - - - -# Unsigned 8bit load handlers - -execute_load_bios_u8: - region_check 0, patch_load_u8 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u8 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # lower 2 bits address contributes - load_u8 bios_read_protect - -2: - open_load8_core - nop - - -execute_load_ewram_u8: - translate_region_ewram patch_load_u8 - load_u8 (ewram + 0x8000) - -# Put the generic address over the handler you want to be default -# IWRAM is typically the most frequently read and written to. - -# execute_load_u8: -execute_load_iwram_u8: - translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF - load_u8 (iwram + 0x8000) - -execute_load_io_u8: - translate_region 4, patch_load_u8, io_registers, 0x3FF - load_u8 io_registers - -execute_load_palette_u8: - translate_region 5, patch_load_u8, palette_ram, 0x3FF - load_u8 palette_ram - -execute_load_vram_u8: - translate_region_vram patch_load_u8 - load_u8 vram - -execute_load_oam_u8: - translate_region 7, patch_load_u8, oam_ram, 0x3FF - load_u8 oam_ram - -execute_load_gamepak8_u8: - translate_region_gamepak 8, patch_load_u8 - load_u8 0 - -execute_load_gamepak9_u8: - translate_region_gamepak 9, patch_load_u8 - load_u8 0 - -execute_load_gamepakA_u8: - translate_region_gamepak 10, patch_load_u8 - load_u8 0 - -execute_load_gamepakB_u8: - translate_region_gamepak 11, patch_load_u8 - load_u8 0 - -execute_load_gamepakC_u8: - translate_region_gamepak 12, patch_load_u8 - load_u8 0 - -execute_load_eeprom_u8: - eeprom_load patch_load_u8 - -execute_load_backup_u8: - backup_load patch_load_u8 - nop - -execute_load_open_u8: - open_load8 patch_load_u8 - nop - -load_u8_ftable: - .long execute_load_bios_u8 # 0x00 BIOS - .long execute_load_open_u8 # 0x01 open address - .long execute_load_ewram_u8 # 0x02 EWRAM - .long execute_load_iwram_u8 # 0x03 IWRAM - .long execute_load_io_u8 # 0x04 I/O registers - .long execute_load_palette_u8 # 0x05 Palette RAM - .long execute_load_vram_u8 # 0x06 VRAM - .long execute_load_oam_u8 # 0x07 OAM RAM - .long execute_load_gamepak8_u8 # 0x08 gamepak - .long execute_load_gamepak9_u8 # 0x09 gamepak - .long execute_load_gamepakA_u8 # 0x0A gamepak - .long execute_load_gamepakB_u8 # 0x0B gamepak - .long execute_load_gamepakC_u8 # 0x0C gamepak - .long execute_load_eeprom_u8 # 0x0D gamepak/eeprom - .long execute_load_backup_u8 # 0x0E Flash ROM/SRAM - .long execute_load_open_u8 # 0x0F open address - -patch_load_u8: - patch_handler load_u8_ftable, 0x01 - - - -# Signed 8bit load handlers - -execute_load_bios_s8: - region_check 0, patch_load_s8 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_s8 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # lower 2 bits contribute - load_s8 bios_read_protect - -2: - open_load8_core - seb $2, $2 - - -execute_load_ewram_s8: - translate_region_ewram patch_load_s8 - load_s8 (ewram + 0x8000) - -#execute_load_s8: -execute_load_iwram_s8: - translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF - load_s8 (iwram + 0x8000) - -execute_load_io_s8: - translate_region 4, patch_load_s8, io_registers, 0x3FF - load_s8 io_registers - -execute_load_palette_s8: - translate_region 5, patch_load_s8, palette_ram, 0x3FF - load_s8 palette_ram - -execute_load_vram_s8: - translate_region_vram patch_load_s8 - load_s8 vram - -execute_load_oam_s8: - translate_region 7, patch_load_s8, oam_ram, 0x3FF - load_s8 oam_ram - -execute_load_gamepak8_s8: - translate_region_gamepak 8, patch_load_s8 - load_s8 0 - -execute_load_gamepak9_s8: - translate_region_gamepak 9, patch_load_s8 - load_s8 0 - -execute_load_gamepakA_s8: - translate_region_gamepak 10, patch_load_s8 - load_s8 0 - -execute_load_gamepakB_s8: - translate_region_gamepak 11, patch_load_s8 - load_s8 0 - -execute_load_gamepakC_s8: - translate_region_gamepak 12, patch_load_s8 - load_s8 0 - -execute_load_eeprom_s8: - eeprom_load patch_load_s8 - -execute_load_backup_s8: - backup_load patch_load_s8 - seb $2, $2 # sign extend result (delay) - -execute_load_open_s8: - open_load8 patch_load_s8 - seb $2, $2 # sign extend result (delay) - -load_s8_ftable: - .long execute_load_bios_s8 # 0x00 BIOS - .long execute_load_open_s8 # 0x01 open address - .long execute_load_ewram_s8 # 0x02 EWRAM - .long execute_load_iwram_s8 # 0x03 IWRAM - .long execute_load_io_s8 # 0x04 I/O registers - .long execute_load_palette_s8 # 0x05 Palette RAM - .long execute_load_vram_s8 # 0x06 VRAM - .long execute_load_oam_s8 # 0x07 OAM RAM - .long execute_load_gamepak8_s8 # 0x08 gamepak - .long execute_load_gamepak9_s8 # 0x09 gamepak - .long execute_load_gamepakA_s8 # 0x0A gamepak - .long execute_load_gamepakB_s8 # 0x0B gamepak - .long execute_load_gamepakC_s8 # 0x0C gamepak - .long execute_load_eeprom_s8 # 0x0D gamepak/eeprom - .long execute_load_backup_s8 # 0x0E Flash ROM/SRAM - .long execute_load_open_s8 # 0x0F open address - -patch_load_s8: - patch_handler load_s8_ftable, 1 - - - -# Unsigned aligned 16bit load handlers - -execute_load_bios_u16: - region_check_align 0, 1, 0, patch_load_u16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u16 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # bit 1 contributes - load_u16 bios_read_protect - -2: - open_load16_core - nop - -execute_load_ewram_u16: - translate_region_ewram_load_align 1, 0, patch_load_u16 - load_u16 (ewram + 0x8000) - -#execute_load_u16: -execute_load_iwram_u16: - translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF - load_u16 (iwram + 0x8000) - -execute_load_io_u16: - translate_region_align 4, 1, 0, patch_load_u16, io_registers, 0x3FF - load_u16 io_registers - -execute_load_palette_u16: - translate_region_align 5, 1, 0, patch_load_u16, palette_ram, 0x3FF - load_u16 palette_ram - -execute_load_vram_u16: - translate_region_vram_load_align 1, 0, patch_load_u16 - load_u16 vram - -execute_load_oam_u16: - translate_region_align 7, 1, 0, patch_load_u16, oam_ram, 0x3FF - load_u16 oam_ram - -execute_load_gamepak8_u16: - translate_region_gamepak_align 8, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepak9_u16: - translate_region_gamepak_align 9, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakA_u16: - translate_region_gamepak_align 10, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakB_u16: - translate_region_gamepak_align 11, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakC_u16: - translate_region_gamepak_align 12, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_eeprom_u16: - eeprom_load_align 1, 0, patch_load_u16 - -execute_load_backup_u16: - backup_load_align 1, 0, patch_load_u16 - nop - -execute_load_open_u16: - open_load16_align 1, 0, patch_load_u16 - nop - - -# Unsigned unaligned 16bit load handlers - -execute_load_bios_u16u: - region_check_align 0, 1, 1, patch_load_u16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFE # generate offset - addu $2, $2, $4 - load_u16_unaligned bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ext $1, $4, 1, 1 - ins $2, $1, 1, 1 # bit 1 contributes - load_u16_unaligned bios_read_protect - -2: - open_load16_core - ror $2, $2, 8 - - -execute_load_ewram_u16u: - translate_region_ewram_load_align16 1, 1, patch_load_u16 - load_u16_unaligned (ewram + 0x8000) - -execute_load_iwram_u16u: - translate_region_align 3, 1, 1, patch_load_u16, (iwram + 0x8000), 0x7FFE - load_u16_unaligned (iwram + 0x8000) - -execute_load_io_u16u: - translate_region_align 4, 1, 1, patch_load_u16, io_registers, 0x3FE - load_u16_unaligned io_registers - -execute_load_palette_u16u: - translate_region_align 5, 1, 1, patch_load_u16, palette_ram, 0x3FE - load_u16_unaligned palette_ram - -execute_load_vram_u16u: - translate_region_vram_load_align16 1, 1, patch_load_u16 - load_u16_unaligned vram - -execute_load_oam_u16u: - translate_region_align 7, 1, 1, patch_load_u16, oam_ram, 0x3FE - load_u16_unaligned oam_ram - -execute_load_gamepak8_u16u: - translate_region_gamepak_align16 8, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepak9_u16u: - translate_region_gamepak_align16 9, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakA_u16u: - translate_region_gamepak_align16 10, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakB_u16u: - translate_region_gamepak_align16 11, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakC_u16u: - translate_region_gamepak_align16 12, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_eeprom_u16u: - eeprom_load_align16 1, 1, patch_load_u16 - -execute_load_backup_u16u: - backup_load_align16 1, 1, patch_load_u16 - ror $2, $2, 8 # rotate value by 8bits - -execute_load_open_u16u: - open_load16_align16 1, 1, patch_load_u16 - ror $2, $2, 8 # rotate value by 8bits - -load_u16_ftable: - .long execute_load_bios_u16 # 0x00 BIOS - .long execute_load_open_u16 # 0x01 open address - .long execute_load_ewram_u16 # 0x02 EWRAM - .long execute_load_iwram_u16 # 0x03 IWRAM - .long execute_load_io_u16 # 0x04 I/O registers - .long execute_load_palette_u16 # 0x05 Palette RAM - .long execute_load_vram_u16 # 0x06 VRAM - .long execute_load_oam_u16 # 0x07 OAM RAM - .long execute_load_gamepak8_u16 # 0x08 gamepak - .long execute_load_gamepak9_u16 # 0x09 gamepak - .long execute_load_gamepakA_u16 # 0x0A gamepak - .long execute_load_gamepakB_u16 # 0x0B gamepak - .long execute_load_gamepakC_u16 # 0x0C gamepak - .long execute_load_eeprom_u16 # 0x0D gamepak/eeprom - .long execute_load_backup_u16 # 0x0E Flash ROM/SRAM - .long execute_load_open_u16 # 0x0F open - - .long execute_load_bios_u16u # 0x00 BIOS unaligned - .long execute_load_open_u16u # 0x01 open address unaligned - .long execute_load_ewram_u16u # 0x02 EWRAM unaligned - .long execute_load_iwram_u16u # 0x03 IWRAM unaligned - .long execute_load_io_u16u # 0x04 I/O registers unaligned - .long execute_load_palette_u16u # 0x05 Palette RAM unaligned - .long execute_load_vram_u16u # 0x06 VRAM unaligned - .long execute_load_oam_u16u # 0x07 OAM RAM unaligned - .long execute_load_gamepak8_u16u# 0x08 gamepak unaligned - .long execute_load_gamepak9_u16u# 0x09 gamepak unaligned - .long execute_load_gamepakA_u16u# 0x0A gamepak unaligned - .long execute_load_gamepakB_u16u# 0x0B gamepak unaligned - .long execute_load_gamepakC_u16u# 0x0C gamepak unaligned - .long execute_load_eeprom_u16u # 0x0D gamepak/eeprom unaligned - .long execute_load_backup_u16u # 0x0E Flash ROM/SRAM unaligned - .long execute_load_open_u16u # 0x0F open unaligned - -patch_load_u16: - patch_handler_align load_u16_ftable, 1 - -# Signed aligned 16bit load handlers - -execute_load_bios_s16: - region_check_align 0, 1, 0, patch_load_s16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_s16 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # bit 1 contributes - load_s16 bios_read_protect - -2: - open_load16_core - seh $2, $2 - - -execute_load_ewram_s16: - translate_region_ewram_load_align 1, 0, patch_load_s16 - load_s16 (ewram + 0x8000) - -#execute_load_s16: -execute_load_iwram_s16: - translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF - load_s16 (iwram + 0x8000) - -execute_load_io_s16: - translate_region_align 4, 1, 0, patch_load_s16, io_registers, 0x3FF - load_s16 io_registers - -execute_load_palette_s16: - translate_region_align 5, 1, 0, patch_load_s16, palette_ram, 0x3FF - load_s16 palette_ram - -execute_load_vram_s16: - translate_region_vram_load_align 1, 0, patch_load_s16 - load_s16 vram - -execute_load_oam_s16: - translate_region_align 7, 1, 0, patch_load_s16, oam_ram, 0x3FF - load_s16 oam_ram - -execute_load_gamepak8_s16: - translate_region_gamepak_align 8, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepak9_s16: - translate_region_gamepak_align 9, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakA_s16: - translate_region_gamepak_align 10, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakB_s16: - translate_region_gamepak_align 11, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakC_s16: - translate_region_gamepak_align 12, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_eeprom_s16: - eeprom_load_align 1, 0, patch_load_s16 - -execute_load_backup_s16: - backup_load_align 1, 0, patch_load_s16 - nop - -execute_load_open_s16: - open_load16_align 1, 0, patch_load_s16 - nop - - -# Signed unaligned 16bit load handlers - -execute_load_bios_s16u: - region_check_align 0, 1, 1, patch_load_s16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFE # generate offset - addu $2, $1, $4 - load_s16_unaligned bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ext $1, $4, 1, 1 - ins $2, $1, 1, 1 # bit 1 contributes - load_s16_unaligned bios_read_protect - -2: - open_load16_core - seb $2, $2 - -execute_load_ewram_s16u: - translate_region_ewram_load_align16 1, 1, patch_load_s16 - load_s16_unaligned (ewram + 0x8000) - -execute_load_iwram_s16u: - translate_region_align 3, 1, 1, patch_load_s16, (iwram + 0x8000), 0x7FFE - load_s16_unaligned (iwram + 0x8000) - -execute_load_io_s16u: - translate_region_align 4, 1, 1, patch_load_s16, io_registers, 0x3FE - load_s16_unaligned io_registers - -execute_load_palette_s16u: - translate_region_align 5, 1, 1, patch_load_s16, palette_ram, 0x3FE - load_s16_unaligned palette_ram - -execute_load_vram_s16u: - translate_region_vram_load_align16 1, 1, patch_load_s16 - load_s16_unaligned vram - -execute_load_oam_s16u: - translate_region_align 7, 1, 1, patch_load_s16, oam_ram, 0x3FE - load_s16_unaligned oam_ram - -execute_load_gamepak8_s16u: - translate_region_gamepak_align16 8, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepak9_s16u: - translate_region_gamepak_align16 9, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakA_s16u: - translate_region_gamepak_align16 10, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakB_s16u: - translate_region_gamepak_align16 11, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakC_s16u: - translate_region_gamepak_align16 12, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_eeprom_s16u: - eeprom_load_align 1, 1, patch_load_s16 - -execute_load_backup_s16u: - backup_load_align 1, 1, patch_load_s16 - seb $2, $2 # sign extend result from 8bits - -execute_load_open_s16u: - open_load16_align 1, 1, patch_load_s16 - seb $2, $2 # sign extend result from 8bits - -load_s16_ftable: - .long execute_load_bios_s16 # 0x00 BIOS - .long execute_load_open_s16 # 0x01 open address - .long execute_load_ewram_s16 # 0x02 EWRAM - .long execute_load_iwram_s16 # 0x03 IWRAM - .long execute_load_io_s16 # 0x04 I/O registers - .long execute_load_palette_s16 # 0x05 Palette RAM - .long execute_load_vram_s16 # 0x06 VRAM - .long execute_load_oam_s16 # 0x07 OAM RAM - .long execute_load_gamepak8_s16 # 0x08 gamepak - .long execute_load_gamepak9_s16 # 0x09 gamepak - .long execute_load_gamepakA_s16 # 0x0A gamepak - .long execute_load_gamepakB_s16 # 0x0B gamepak - .long execute_load_gamepakC_s16 # 0x0C gamepak - .long execute_load_eeprom_s16 # 0x0D gamepak/eeprom - .long execute_load_backup_s16 # 0x0E Flash ROM/SRAM - .long execute_load_open_s16 # 0x0F open unaligned - - .long execute_load_bios_s16u # 0x00 BIOS unaligned - .long execute_load_open_s16u # 0x01 open address unaligned - .long execute_load_ewram_s16u # 0x02 EWRAM unaligned - .long execute_load_iwram_s16u # 0x03 IWRAM unaligned - .long execute_load_io_s16u # 0x04 I/O registers unaligned - .long execute_load_palette_s16u # 0x05 Palette RAM unaligned - .long execute_load_vram_s16u # 0x06 VRAM unaligned - .long execute_load_oam_s16u # 0x07 OAM RAM unaligned - .long execute_load_gamepak8_s16u# 0x08 gamepak unaligned - .long execute_load_gamepak9_s16u# 0x09 gamepak unaligned - .long execute_load_gamepakA_s16u# 0x0A gamepak unaligned - .long execute_load_gamepakB_s16u# 0x0B gamepak unaligned - .long execute_load_gamepakC_s16u# 0x0C gamepak unaligned - .long execute_load_eeprom_s16u # 0x0D gamepak/eeprom unaligned - .long execute_load_backup_s16u # 0x0E Flash ROM/SRAM unaligned - .long execute_load_open_s16u # 0x0F open unaligned - -patch_load_s16: - patch_handler_align load_s16_ftable, 1 - - - -# Unsigned aligned 32bit load handlers - -execute_load_bios_u32: - region_check_align 0, 2, 0, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u32 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32 bios_read_protect - -2: - open_load32_core - nop - - -execute_load_ewram_u32: - translate_region_ewram_load_align 2, 0, patch_load_u32 - load_u32 (ewram + 0x8000) - -#execute_load_u32: -execute_load_iwram_u32: - translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF - load_u32 (iwram + 0x8000) - -execute_load_io_u32: - translate_region_align 4, 2, 0, patch_load_u32, io_registers, 0x3FF - load_u32 io_registers - -execute_load_palette_u32: - translate_region_align 5, 2, 0, patch_load_u32, palette_ram, 0x3FF - load_u32 palette_ram - -execute_load_vram_u32: - translate_region_vram_load_align 2, 0, patch_load_u32 - load_u32 vram - -execute_load_oam_u32: - translate_region_align 7, 2, 0, patch_load_u32, oam_ram, 0x3FF - load_u32 oam_ram - -execute_load_gamepak8_u32: - translate_region_gamepak_align 8, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepak9_u32: - translate_region_gamepak_align 9, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakA_u32: - translate_region_gamepak_align 10, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakB_u32: - translate_region_gamepak_align 11, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakC_u32: - translate_region_gamepak_align 12, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_eeprom_u32: - eeprom_load_align 2, 0, patch_load_u32 - -execute_load_backup_u32: - backup_load_align 2, 0, patch_load_u32 - nop - -execute_load_open_u32: - open_load32_align 2, 0, patch_load_u32 - nop - - -# Unsigned unaligned (by 1) 32bit load handlers - -execute_load_bios_u32u1: - region_check_align 0, 2, 1, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 1 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 1 - -2: - open_load32_core - ror $2, $2, 8 - -execute_load_ewram_u32u1: - translate_region_ewram_load_align32 2, 1, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 1 - -execute_load_iwram_u32u1: - translate_region_align 3, 2, 1, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 1 - -execute_load_io_u32u1: - translate_region_align 4, 2, 1, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 1 - -execute_load_palette_u32u1: - translate_region_align 5, 2, 1, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 1 - -execute_load_vram_u32u1: - translate_region_vram_load_align32 2, 1, patch_load_u32 - load_u32_unaligned vram, 1 - -execute_load_oam_u32u1: - translate_region_align 7, 2, 1, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 1 - -execute_load_gamepak8_u32u1: - translate_region_gamepak_align32 8, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepak9_u32u1: - translate_region_gamepak_align32 9, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakA_u32u1: - translate_region_gamepak_align32 10, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakB_u32u1: - translate_region_gamepak_align32 11, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakC_u32u1: - translate_region_gamepak_align32 12, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_eeprom_u32u1: - eeprom_load_align32 2, 1, patch_load_u32 - -execute_load_backup_u32u1: - backup_load_align32 2, 1, patch_load_u32 - ror $2, $2, 8 # rotate value by 8bits - -execute_load_open_u32u1: - open_load32_align32 2, 1, patch_load_u32 - ror $2, $2, 8 # rotate value by 8bits - - -# Unsigned unaligned (by 2) 32bit load handlers - -execute_load_bios_u32u2: - region_check_align 0, 2, 2, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 2 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 2 - -2: - open_load32_core - ror $2, $2, 16 - -execute_load_ewram_u32u2: - translate_region_ewram_load_align32 2, 2, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 2 - -execute_load_iwram_u32u2: - translate_region_align 3, 2, 2, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 2 - -execute_load_io_u32u2: - translate_region_align 4, 2, 2, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 2 - -execute_load_palette_u32u2: - translate_region_align 5, 2, 2, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 2 - -execute_load_vram_u32u2: - translate_region_vram_load_align32 2, 2, patch_load_u32 - load_u32_unaligned vram, 2 - -execute_load_oam_u32u2: - translate_region_align 7, 2, 2, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 2 - -execute_load_gamepak8_u32u2: - translate_region_gamepak_align32 8, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepak9_u32u2: - translate_region_gamepak_align32 9, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakA_u32u2: - translate_region_gamepak_align32 10, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakB_u32u2: - translate_region_gamepak_align32 11, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakC_u32u2: - translate_region_gamepak_align32 12, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_eeprom_u32u2: - eeprom_load_align32 2, 2, patch_load_u32 - -execute_load_backup_u32u2: - backup_load_align32 2, 2, patch_load_u32 - ror $2, $2, 16 # rotate value by 16bits - -execute_load_open_u32u2: - open_load32_align32 2, 2, patch_load_u32 - ror $2, $2, 16 # rotate value by 16bits - -# Unsigned unaligned (by 1) 32bit load handlers - -execute_load_bios_u32u3: - region_check_align 0, 2, 3, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 3 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 3 - -2: - open_load32_core - ror $2, $2, 24 - -execute_load_ewram_u32u3: - translate_region_ewram_load_align32 2, 3, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 3 - -execute_load_iwram_u32u3: - translate_region_align 3, 2, 3, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 3 - -execute_load_io_u32u3: - translate_region_align 4, 2, 3, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 3 - -execute_load_palette_u32u3: - translate_region_align 5, 2, 3, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 3 - -execute_load_vram_u32u3: - translate_region_vram_load_align32 2, 3, patch_load_u32 - load_u32_unaligned vram, 3 - -execute_load_oam_u32u3: - translate_region_align 7, 2, 3, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 3 - -execute_load_gamepak8_u32u3: - translate_region_gamepak_align32 8, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepak9_u32u3: - translate_region_gamepak_align32 9, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakA_u32u3: - translate_region_gamepak_align32 10, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakB_u32u3: - translate_region_gamepak_align32 11, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakC_u32u3: - translate_region_gamepak_align32 12, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_eeprom_u32u3: - eeprom_load_align32 2, 3, patch_load_u32 - -execute_load_backup_u32u3: - backup_load_align32 2, 3, patch_load_u32 - ror $2, $2, 24 # rotate value by 24bits - -execute_load_open_u32u3: - open_load32_align32 2, 3, patch_load_u32 - ror $2, $2, 24 # rotate value by 24bits - - -load_u32_ftable: - .long execute_load_bios_u32 # 0x00 BIOS - .long execute_load_open_u32 # 0x01 open address - .long execute_load_ewram_u32 # 0x02 EWRAM - .long execute_load_iwram_u32 # 0x03 IWRAM - .long execute_load_io_u32 # 0x04 I/O registers - .long execute_load_palette_u32 # 0x05 Palette RAM - .long execute_load_vram_u32 # 0x06 VRAM - .long execute_load_oam_u32 # 0x07 OAM RAM - .long execute_load_gamepak8_u32 # 0x08 gamepak - .long execute_load_gamepak9_u32 # 0x09 gamepak - .long execute_load_gamepakA_u32 # 0x0A gamepak - .long execute_load_gamepakB_u32 # 0x0B gamepak - .long execute_load_gamepakC_u32 # 0x0C gamepak - .long execute_load_eeprom_u32 # 0x0D gamepak/eeprom - .long execute_load_backup_u32 # 0x0E Flash ROM/SRAM - .long execute_load_open_u32 # 0x0F open - - .long execute_load_bios_u32u1 # 0x00 BIOS unaligned (1b) - .long execute_load_open_u32u1 # 0x01 open address unaligned (1b) - .long execute_load_ewram_u32u1 # 0x02 EWRAM unaligned (1b) - .long execute_load_iwram_u32u1 # 0x03 IWRAM unaligned (1b) - .long execute_load_io_u32u1 # 0x04 I/O registers unaligned (1b) - .long execute_load_palette_u32u1 # 0x05 Palette RAM unaligned (1b) - .long execute_load_vram_u32u1 # 0x06 VRAM unaligned (1b) - .long execute_load_oam_u32u1 # 0x07 OAM RAM unaligned (1b) - .long execute_load_gamepak8_u32u1 # 0x08 gamepak unaligned (1b) - .long execute_load_gamepak9_u32u1 # 0x09 gamepak unaligned (1b) - .long execute_load_gamepakA_u32u1 # 0x0A gamepak unaligned (1b) - .long execute_load_gamepakB_u32u1 # 0x0B gamepak unaligned (1b) - .long execute_load_gamepakC_u32u1 # 0x0C gamepak unaligned (1b) - .long execute_load_eeprom_u32u1 # 0x0D gamepak/eeprom unaligned (1b) - .long execute_load_backup_u32u1 # 0x0E Flash ROM/SRAM unaligned (1b) - .long execute_load_open_u32u1 # 0x0F open unaligned (1b) - - .long execute_load_bios_u32u2 # 0x00 BIOS unaligned (2b) - .long execute_load_open_u32u2 # 0x01 open address unaligned (2b) - .long execute_load_ewram_u32u2 # 0x02 EWRAM unaligned (2b) - .long execute_load_iwram_u32u2 # 0x03 IWRAM unaligned (2b) - .long execute_load_io_u32u2 # 0x04 I/O registers unaligned (2b) - .long execute_load_palette_u32u2 # 0x05 Palette RAM unaligned (2b) - .long execute_load_vram_u32u2 # 0x06 VRAM unaligned (2b) - .long execute_load_oam_u32u2 # 0x07 OAM RAM unaligned (2b) - .long execute_load_gamepak8_u32u2 # 0x08 gamepak unaligned (2b) - .long execute_load_gamepak9_u32u2 # 0x09 gamepak unaligned (2b) - .long execute_load_gamepakA_u32u2 # 0x0A gamepak unaligned (2b) - .long execute_load_gamepakB_u32u2 # 0x0B gamepak unaligned (2b) - .long execute_load_gamepakC_u32u2 # 0x0C gamepak unaligned (2b) - .long execute_load_eeprom_u32u2 # 0x0D gamepak/eeprom unaligned (2b) - .long execute_load_backup_u32u2 # 0x0E Flash ROM/SRAM unaligned (2b) - .long execute_load_open_u32u2 # 0x0F open unaligned (2b) - - .long execute_load_bios_u32u3 # 0x00 BIOS unaligned (3b) - .long execute_load_open_u32u3 # 0x01 open address unaligned (3b) - .long execute_load_ewram_u32u3 # 0x02 EWRAM unaligned (3b) - .long execute_load_iwram_u32u3 # 0x03 IWRAM unaligned (3b) - .long execute_load_io_u32u3 # 0x04 I/O registers unaligned (3b) - .long execute_load_palette_u32u3 # 0x05 Palette RAM unaligned (3b) - .long execute_load_vram_u32u3 # 0x06 VRAM unaligned (3b) - .long execute_load_oam_u32u3 # 0x07 OAM RAM unaligned (3b) - .long execute_load_gamepak8_u32u3 # 0x08 gamepak unaligned (3b) - .long execute_load_gamepak9_u32u3 # 0x09 gamepak unaligned (3b) - .long execute_load_gamepakA_u32u3 # 0x0A gamepak unaligned (3b) - .long execute_load_gamepakB_u32u3 # 0x0B gamepak unaligned (3b) - .long execute_load_gamepakC_u32u3 # 0x0C gamepak unaligned (3b) - .long execute_load_eeprom_u32u3 # 0x0D gamepak/eeprom unaligned (3b) - .long execute_load_backup_u32u3 # 0x0E Flash ROM/SRAM unaligned (3b) - .long execute_load_open_u32u3 # 0x0F open unaligned (3b) - -patch_load_u32: - patch_handler_align load_u32_ftable, 2 - - - -# Unsigned always aligned 32bit load handlers - -execute_load_bios_u32a: - region_check 0, patch_load_u32a - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u32 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32 bios_read_protect - -2: - open_load32_core - nop - -execute_load_ewram_u32a: - translate_region_ewram patch_load_u32a - load_u32 (ewram + 0x8000) - -#execute_aligned_load32: -execute_load_iwram_u32a: - translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF - load_u32 (iwram + 0x8000) - -execute_load_io_u32a: - translate_region 4, patch_load_u32a, io_registers, 0x3FF - load_u32 io_registers - -execute_load_palette_u32a: - translate_region 5, patch_load_u32a, palette_ram, 0x3FF - load_u32 palette_ram - -execute_load_vram_u32a: - translate_region_vram patch_load_u32a - load_u32 vram - -execute_load_oam_u32a: - translate_region 7, patch_load_u32a, oam_ram, 0x3FF - load_u32 oam_ram - -execute_load_gamepak8_u32a: - translate_region_gamepak_a 8, patch_load_u32a - load_u32 0 - -execute_load_gamepak9_u32a: - translate_region_gamepak_a 9, patch_load_u32a - load_u32 0 - -execute_load_gamepakA_u32a: - translate_region_gamepak_a 10, patch_load_u32a - load_u32 0 - -execute_load_gamepakB_u32a: - translate_region_gamepak_a 11, patch_load_u32a - load_u32 0 - -execute_load_gamepakC_u32a: - translate_region_gamepak_a 12, patch_load_u32a - load_u32 0 - -execute_load_eeprom_u32a: - eeprom_load_a patch_load_u32a - -execute_load_backup_u32a: - backup_load_a patch_load_u32a - nop - -execute_load_open_u32a: - open_load32_a patch_load_u32a - -load_u32a_ftable: - .long execute_load_bios_u32a # 0x00 BIOS unaligned (3b) - .long execute_load_open_u32a # 0x01 open address unaligned (3b) - .long execute_load_ewram_u32a # 0x02 EWRAM unaligned (3b) - .long execute_load_iwram_u32a # 0x03 IWRAM unaligned (3b) - .long execute_load_io_u32a # 0x04 I/O registers unaligned (3b) - .long execute_load_palette_u32a # 0x05 Palette RAM unaligned (3b) - .long execute_load_vram_u32a # 0x06 VRAM unaligned (3b) - .long execute_load_oam_u32a # 0x07 OAM RAM unaligned (3b) - .long execute_load_gamepak8_u32a # 0x08 gamepak unaligned (3b) - .long execute_load_gamepak9_u32a # 0x09 gamepak unaligned (3b) - .long execute_load_gamepakA_u32a # 0x0A gamepak unaligned (3b) - .long execute_load_gamepakB_u32a # 0x0B gamepak unaligned (3b) - .long execute_load_gamepakC_u32a # 0x0C gamepak unaligned (3b) - .long execute_load_eeprom_u32a # 0x0D gamepak/eeprom unaligned (3b) - .long execute_load_backup_u32a # 0x0E Flash ROM/SRAM unaligned (3b) - .long execute_load_open_u32a # 0x0F open unaligned (3b) - -patch_load_u32a: - patch_handler load_u32a_ftable, 1 - - -# Unsigned 8bit store handlers - -execute_store_ignore0_u8: - ignore_region 0, patch_store_u8 - -execute_store_ignore1_u8: - ignore_region 1, patch_store_u8 - -execute_store_ewram_u8: - translate_region_ewram patch_store_u8 - store_u8_smc (ewram + 0x8000) - -#execute_store_u8: -execute_store_iwram_u8: - translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF - store_u8_smc (iwram + 0x8000) - -execute_store_io_u8: - region_check 4, patch_store_u8 - andi $5, $5, 0xFF # make value 8bit - andi $4, $4, 0x3FF # wrap around address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register8 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u8: - region_check 5, patch_store_u8 - andi $2, $4, 0x3FE # align palette address - ins $5, $5, 8, 8 # double value - addu $2, $2, $16 - sh $5, 0x100($2) # palette_ram[address] = value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - jr $ra # return - sh $1, 0x500($2) - -execute_store_vram_u8: - translate_region_vram_store_align16 patch_store_u8 - store_u8_double vram - -execute_store_oam_u8: - translate_region 7, patch_store_u8, oam_ram, 0x3FE - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u8_double oam_ram - -execute_store_ignore8_u8: - ignore_region 8, patch_store_u8 - -execute_store_ignore9_u8: - ignore_region 9, patch_store_u8 - -execute_store_ignoreA_u8: - ignore_region 10, patch_store_u8 - -execute_store_ignoreB_u8: - ignore_region 11, patch_store_u8 - -execute_store_ignoreC_u8: - ignore_region 12, patch_store_u8 - -execute_store_eeprom_u8: - store_function write_eeprom, 13, patch_store_u8, 0x3FF - -execute_store_backup_u8: - store_function write_backup, 14, patch_store_u8, 0xFFFF - -execute_store_ignoreF_u8: - ignore_high patch_store_u8 - -store_u8_ftable: - .long execute_store_ignore0_u8 # 0x00 BIOS - .long execute_store_ignore1_u8 # 0x01 open address - .long execute_store_ewram_u8 # 0x02 EWRAM - .long execute_store_iwram_u8 # 0x03 IWRAM - .long execute_store_io_u8 # 0x04 I/O registers - .long execute_store_palette_u8 # 0x05 Palette RAM - .long execute_store_vram_u8 # 0x06 VRAM - .long execute_store_oam_u8 # 0x07 OAM RAM - .long execute_store_ignore8_u8 # 0x08 gamepak - .long execute_store_ignore9_u8 # 0x09 gamepak - .long execute_store_ignoreA_u8 # 0x0A gamepak - .long execute_store_ignoreB_u8 # 0x0B gamepak - .long execute_store_ignoreC_u8 # 0x0C gamepak - .long execute_store_eeprom_u8 # 0x0D gamepak/eeprom - .long execute_store_backup_u8 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u8 # 0x0F open address - -patch_store_u8: - patch_handler store_u8_ftable, 0x0F - - -# Unsigned 16bit store handlers - -execute_store_ignore0_u16: - ignore_region 0, patch_store_u16 - -execute_store_ignore1_u16: - ignore_region 1, patch_store_u16 - -execute_store_ewram_u16: - translate_region_ewram_store_align16 patch_store_u16 - store_u16_smc (ewram + 0x8000) - -#execute_store_u16: -execute_store_iwram_u16: - translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE - store_u16_smc (iwram + 0x8000) - -execute_store_io_u16: - region_check 4, patch_store_u16 - andi $5, $5, 0xFFFF # make value 16bit - andi $4, $4, 0x3FE # wrap around/align address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register16 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u16: - region_check 5, patch_store_u16 - andi $2, $4, 0x3FE # wrap/align palette address - addu $2, $2, $16 - sh $5, 0x100($2) # palette_ram[address] = value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - jr $ra # return - sh $1, 0x500($2) - -execute_store_vram_u16: - translate_region_vram_store_align16 patch_store_u16 - store_u16 vram - -execute_store_oam_u16: - translate_region 7, patch_store_u16, oam_ram, 0x3FE - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u16 oam_ram - -execute_store_rtc_u16: - store_function write_rtc, 8, patch_store_u16, 0xFE - -execute_store_ignore9_u16: - ignore_region 9, patch_store_u16 - -execute_store_ignoreA_u16: - ignore_region 10, patch_store_u16 - -execute_store_ignoreB_u16: - ignore_region 11, patch_store_u16 - -execute_store_ignoreC_u16: - ignore_region 12, patch_store_u16 - -execute_store_eeprom_u16: - store_function write_eeprom, 13, patch_store_u16, 0x3FE - -execute_store_ignoreE_u16: - ignore_region 14, patch_store_u16 - -execute_store_ignoreF_u16: - ignore_high patch_store_u16 - -store_u16_ftable: - .long execute_store_ignore0_u16 # 0x00 BIOS - .long execute_store_ignore1_u16 # 0x01 open address - .long execute_store_ewram_u16 # 0x02 EWRAM - .long execute_store_iwram_u16 # 0x03 IWRAM - .long execute_store_io_u16 # 0x04 I/O registers - .long execute_store_palette_u16 # 0x05 Palette RAM - .long execute_store_vram_u16 # 0x06 VRAM - .long execute_store_oam_u16 # 0x07 OAM RAM - .long execute_store_rtc_u16 # 0x08 gamepak - .long execute_store_ignore9_u16 # 0x09 gamepak - .long execute_store_ignoreA_u16 # 0x0A gamepak - .long execute_store_ignoreB_u16 # 0x0B gamepak - .long execute_store_ignoreC_u16 # 0x0C gamepak - .long execute_store_eeprom_u16 # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u16 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u16 # 0x0F open address - - -patch_store_u16: - patch_handler store_u16_ftable, 0x0F - - - - -# Unsigned 32bit store handlers - -execute_store_ignore0_u32: - ignore_region 0, patch_store_u32 - -execute_store_ignore1_u32: - ignore_region 1, patch_store_u32 - -execute_store_ewram_u32: - translate_region_ewram_store_align32 patch_store_u32 - store_u32_smc (ewram + 0x8000) - -#execute_store_u32: -execute_store_iwram_u32: - translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC - store_u32_smc (iwram + 0x8000) - -execute_store_io_u32: - region_check 4, patch_store_u32 - nop - andi $4, $4, 0x3FC # wrap around/align address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register32 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u32: - region_check 5, patch_store_u32 - andi $2, $4, 0x3FC # wrap/align palette address - addu $2, $2, $16 - sw $5, 0x100($2) # palette_ram[address] = value - - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - sh $1, 0x500($2) - - srl $5, $5, 16 # shift down to next palette value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - - jr $ra # return - sh $1, 0x502($2) - -execute_store_vram_u32: - translate_region_vram_store_align32 patch_store_u32 - store_u32 vram - -execute_store_oam_u32: - translate_region 7, patch_store_u32, oam_ram, 0x3FC - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u32 oam_ram - -execute_store_ignore8_u32: - ignore_region 8, patch_store_u32 - -execute_store_ignore9_u32: - ignore_region 9, patch_store_u32 - -execute_store_ignoreA_u32: - ignore_region 10, patch_store_u32 - -execute_store_ignoreB_u32: - ignore_region 11, patch_store_u32 - -execute_store_ignoreC_u32: - ignore_region 12, patch_store_u32 - -execute_store_eeprom_u32: - store_function write_eeprom, 13, patch_store_u32, 0x3FC - -execute_store_ignoreE_u32: - ignore_region 14, patch_store_u32 - -execute_store_ignoreF_u32: - ignore_high patch_store_u32 - -store_u32_ftable: - .long execute_store_ignore0_u32 # 0x00 BIOS - .long execute_store_ignore1_u32 # 0x01 open address - .long execute_store_ewram_u32 # 0x02 EWRAM - .long execute_store_iwram_u32 # 0x03 IWRAM - .long execute_store_io_u32 # 0x04 I/O registers - .long execute_store_palette_u32 # 0x05 Palette RAM - .long execute_store_vram_u32 # 0x06 VRAM - .long execute_store_oam_u32 # 0x07 OAM RAM - .long execute_store_ignore8_u32 # 0x08 gamepak - .long execute_store_ignore9_u32 # 0x09 gamepak - .long execute_store_ignoreA_u32 # 0x0A gamepak - .long execute_store_ignoreB_u32 # 0x0B gamepak - .long execute_store_ignoreC_u32 # 0x0C gamepak - .long execute_store_eeprom_u32 # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u32 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u32 # 0x0F open address - - -patch_store_u32: - patch_handler store_u32_ftable, 0x0F - - - -# Unsigned always aligned, a2 safe 32bit store handlers - -execute_store_ignore0_u32a: - ignore_region 0, patch_store_u32a - -execute_store_ignore1_u32a: - ignore_region 1, patch_store_u32a - -execute_store_ewram_u32a: - translate_region_ewram_store_align32 patch_store_u32a - store_u32 (ewram + 0x8000) - -#execute_aligned_store32: -execute_store_iwram_u32a: - translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC - store_u32 (iwram + 0x8000) - -execute_store_io_u32a: - region_check 4, patch_store_u32a - nop - sw $6, REG_SAVE($16) # save a2 - sw $ra, REG_SAVE2($16) # save ra - - andi $4, $4, 0x3FC # wrap around/align address - - save_registers - jal write_io_register32 # write the value out - nop - - restore_registers - - lw $ra, REG_SAVE2($16) # restore ra - jr $ra - lw $6, REG_SAVE($16) # restore a2 - -execute_store_palette_u32a: - region_check 5, patch_store_u32a - andi $2, $4, 0x3FC # wrap/align palette address - addu $2, $2, $16 - sw $5, 0x100($2) # palette_ram[address] = value - - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - sh $1, 0x500($2) - - srl $5, $5, 16 # shift down to next palette value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - - jr $ra # return - sh $1, 0x502($2) - -execute_store_vram_u32a: - translate_region_vram_store_align32 patch_store_u32a - store_u32 vram - -execute_store_oam_u32a: - translate_region 7, patch_store_u32a, oam_ram, 0x3FC - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u32 oam_ram - -execute_store_ignore8_u32a: - ignore_region 8, patch_store_u32a - -execute_store_ignore9_u32a: - ignore_region 9, patch_store_u32a - -execute_store_ignoreA_u32a: - ignore_region 10, patch_store_u32a - -execute_store_ignoreB_u32a: - ignore_region 11, patch_store_u32a - -execute_store_ignoreC_u32a: - ignore_region 12, patch_store_u32a - -execute_store_eeprom_u32a: - store_function_a write_eeprom, 13, patch_store_u32a, 0x3FC - -execute_store_ignoreE_u32a: - ignore_region 14, patch_store_u32a - -execute_store_ignoreF_u32a: - ignore_high patch_store_u32a - -store_u32a_ftable: - .long execute_store_ignore0_u32a# 0x00 BIOS - .long execute_store_ignore1_u32a# 0x01 open address - .long execute_store_ewram_u32a # 0x02 EWRAM - .long execute_store_iwram_u32a # 0x03 IWRAM - .long execute_store_io_u32a # 0x04 I/O registers - .long execute_store_palette_u32a# 0x05 Palette RAM - .long execute_store_vram_u32a # 0x06 VRAM - .long execute_store_oam_u32a # 0x07 OAM RAM - .long execute_store_ignore8_u32a# 0x08 gamepak - .long execute_store_ignore9_u32a# 0x09 gamepak - .long execute_store_ignoreA_u32a# 0x0A gamepak - .long execute_store_ignoreB_u32a# 0x0B gamepak - .long execute_store_ignoreC_u32a# 0x0C gamepak - .long execute_store_eeprom_u32a # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u32a# 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u32a# 0x0F open address - -patch_store_u32a: - patch_handler store_u32a_ftable, 0x0F - - write_io_epilogue: beq $2, $0, no_alert # 0 means nothing happened addiu $4, $2, -2 # see if return value is 2 (delay slot) -- cgit v1.2.3 From 6b503667ec074c55dbcd689595d8fe03aa17e4a4 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 16 Mar 2021 19:02:11 +0100 Subject: Add Dingux support Uses a different cache primitive and a differend madd(u) encoding. Also added a flag for BGR vs RGB color output (since PSP is assuming to be BGR for speed). Aside from that the ABI required some special function calls for PIC. --- Makefile | 10 ++-- psp/mips_emit.h | 156 ++++++++++++++++++++++++++++++++++++-------------------- psp/mips_stub.S | 100 +++++++++++++++++++++--------------- 3 files changed, 165 insertions(+), 101 deletions(-) diff --git a/Makefile b/Makefile index 85465e0..eddfe36 100644 --- a/Makefile +++ b/Makefile @@ -193,7 +193,7 @@ else ifeq ($(platform), psp1) TARGET := $(TARGET_NAME)_libretro_$(platform).a CC = psp-gcc$(EXE_EXT) AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 + CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT CFLAGS += -I$(shell psp-config --pspsdk-path)/include CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi CFLAGS += -fomit-frame-pointer -ffast-math @@ -374,8 +374,10 @@ else ifeq ($(platform), gcw0) CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++ AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar SHARED := -shared -nostdlib -Wl,--version-script=link.T - fpic := -fPIC + fpic := -fPIC -DPIC CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + HAVE_DYNAREC := 1 + CPU_ARCH := mips # Windows else @@ -403,8 +405,8 @@ ifeq ($(DEBUG), 1) OPTIMIZE_SAFE := -O0 -g OPTIMIZE := -O0 -g else - OPTIMIZE_SAFE := -O2 -DNDEBUG -g - OPTIMIZE := -O3 -DNDEBUG -g + OPTIMIZE_SAFE := -O2 -DNDEBUG + OPTIMIZE := -O3 -DNDEBUG endif diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 48ed630..506c440 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -132,6 +132,12 @@ typedef enum mips_special_min = 0x2D, } mips_function_special; +typedef enum +{ + mips_special2_madd = 0x00, + mips_special2_maddu = 0x01, +} mips_function_special2; + typedef enum { mips_special3_ext = 0x00, @@ -203,6 +209,12 @@ typedef enum mips_special_##function; \ translation_ptr += 4 \ +#define mips_emit_special2(function, rs, rt, rd, shift) \ + *((u32 *)translation_ptr) = (mips_opcode_special2 << 26) | \ + (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | \ + mips_special2_##function; \ + translation_ptr += 4 \ + #define mips_emit_special3(function, rs, rt, imm_a, imm_b) \ *((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \ (rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \ @@ -314,11 +326,19 @@ typedef enum #define mips_emit_divu(rs, rt) \ mips_emit_special(divu, rs, rt, 0, 0) \ -#define mips_emit_madd(rs, rt) \ - mips_emit_special(madd, rs, rt, 0, 0) \ +#ifdef PSP + #define mips_emit_madd(rs, rt) \ + mips_emit_special(madd, rs, rt, 0, 0) \ -#define mips_emit_maddu(rs, rt) \ - mips_emit_special(maddu, rs, rt, 0, 0) \ + #define mips_emit_maddu(rs, rt) \ + mips_emit_special(maddu, rs, rt, 0, 0) +#else + #define mips_emit_madd(rs, rt) \ + mips_emit_special2(madd, rs, rt, 0, 0) \ + + #define mips_emit_maddu(rs, rt) \ + mips_emit_special2(maddu, rs, rt, 0, 0) +#endif #define mips_emit_movn(rd, rs, rt) \ mips_emit_special(movn, rs, rt, rd, 0) \ @@ -411,6 +431,9 @@ typedef enum #define mips_emit_jr(rs) \ mips_emit_special(jr, rs, 0, 0, 0) \ +#define mips_emit_jalr(rs) \ + mips_emit_special(jalr, rs, 0, 31, 0) \ + #define mips_emit_synci(rs, offset) \ mips_emit_regimm(synci, rs, offset) \ @@ -2535,8 +2558,9 @@ u8 swi_hle_handle[256] = #define ReOff_GP_Save (32*4) // GP_SAVE // Saves all regs to their right slot and loads gp -#define emit_save_regs(save_a2) \ - for (unsigned i = 0; i < 15; i++) { \ +#define emit_save_regs(save_a2) { \ + int i; \ + for (i = 0; i < 15; i++) { \ mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \ } \ if (save_a2) { \ @@ -2544,21 +2568,24 @@ u8 swi_hle_handle[256] = } \ /* Load the gp pointer, used by C code */ \ mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \ +} // Restores the registers from their slot -#define emit_restore_regs(restore_a2) \ +#define emit_restore_regs(restore_a2) { \ + int i; \ if (restore_a2) { \ mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \ } \ - for (unsigned i = 0; i < 15; i++) { \ + for (i = 0; i < 15; i++) { \ mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \ } \ +} // Emits a function call for a read or a write (for special stuff like flash) #define emit_mem_call_ds(fnptr, mask) \ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \ emit_save_regs(true); \ - mips_emit_jal(((u32)(fnptr)) >> 2); \ + genccall(fnptr); \ mips_emit_andi(reg_a0, reg_a0, (mask)); \ emit_restore_regs(true); \ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \ @@ -2569,10 +2596,10 @@ u8 swi_hle_handle[256] = mips_emit_nop(); // Pointer table to stubs, indexed by type and region -// Caution! This is not really a ptr table, but contains pre-encoed JALs extern u32 tmemld[11][16]; extern u32 tmemst[ 4][16]; void mips_lookup_pc(); +void smc_write(); cpu_alert_type write_io_register8 (u32 address, u32 value); cpu_alert_type write_io_register16(u32 address, u32 value); cpu_alert_type write_io_register32(u32 address, u32 value); @@ -2624,6 +2651,15 @@ static void emit_mem_access_loadop( }; } +#ifdef PIC + #define genccall(fn) \ + mips_emit_lui(mips_reg_t9, ((u32)fn) >> 16); \ + mips_emit_ori(mips_reg_t9, mips_reg_t9, ((u32)fn)); \ + mips_emit_jalr(mips_reg_t9); +#else + #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); +#endif + // Stub memory map: // 0 .. 63 First patch handler [#0] // 448 .. 511 Last patch handler [#7] @@ -2721,7 +2757,7 @@ static void emit_pmemld_stub( emit_save_regs(aligned); mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff - mips_emit_jal(((u32)&load_gamepak_page) >> 2); + genccall(&load_gamepak_page); mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1); mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1); @@ -2891,6 +2927,21 @@ static void emit_pmemst_stub( *tr_ptr = translation_ptr; } +#ifdef USE_BGR_FORMAT + /* 0BGR to BGR565, for PSP */ + #define palette_convert() \ + mips_emit_sll(reg_temp, reg_a1, 1); \ + mips_emit_andi(reg_temp, reg_temp, 0xFFC0); \ + mips_emit_ins(reg_temp, reg_a1, 0, 5); +#else + /* 0BGR to RGB565 (clobbers a0!) */ + #define palette_convert() \ + mips_emit_ext(reg_temp, reg_a1, 10, 5); \ + mips_emit_ins(reg_temp, reg_a1, 11, 5); \ + mips_emit_ext(reg_a0, reg_a1, 5, 5); \ + mips_emit_ins(reg_temp, reg_a0, 6, 5); +#endif + // Palette is accessed differently and stored in a decoded manner static void emit_palette_hdl( unsigned memop_number, const t_stub_meminfo *meminfo, @@ -2923,17 +2974,13 @@ static void emit_palette_hdl( mips_emit_sh(reg_a1, reg_base, 0x100); } - mips_emit_sll(reg_temp, reg_a1, 1); - mips_emit_andi(reg_temp, reg_temp, 0xFFC0); - mips_emit_ins(reg_temp, reg_a1, 0, 5); + palette_convert(); mips_emit_sh(reg_temp, reg_rv, 0x500); if (size == 2) { // Convert the second half-word also mips_emit_srl(reg_a1, reg_a1, 16); - mips_emit_sll(reg_temp, reg_a1, 1); - mips_emit_andi(reg_temp, reg_temp, 0xFFC0); - mips_emit_ins(reg_temp, reg_a1, 0, 5); + palette_convert(); mips_emit_sh(reg_temp, reg_rv, 0x502); } generate_function_return_swap_delay(); @@ -2980,6 +3027,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { // Stubs for regions with EEPROM or flash/SRAM static void emit_saveaccess_stub(u8 **tr_ptr) { + unsigned opt, i, strop; u8 *translation_ptr = *tr_ptr; const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; @@ -2995,9 +3043,9 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { emit_mem_call(&write_eeprom, 0x3FF); // Map loads to the read handler. - for (unsigned opt = 0; opt < 6; opt++) { + for (opt = 0; opt < 6; opt++) { // Unalignment is not relevant here, so map them all to the same handler. - for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++) + for (i = opmap[opt][0]; i < opmap[opt][1]; i++) tmemld[i][13] = (u32)translation_ptr; // Emit just a check + patch jump mips_emit_srl(reg_temp, reg_a0, 24); @@ -3007,7 +3055,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr)); } // This is for stores - for (unsigned strop = 0; strop <= 3; strop++) { + for (strop = 0; strop <= 3; strop++) { tmemst[strop][13] = (u32)translation_ptr; mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_xori(reg_rv, reg_temp, 0x0D); @@ -3017,7 +3065,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { } // Flash/SRAM/Backup writes are only 8 byte supported - for (unsigned strop = 0; strop <= 3; strop++) { + for (strop = 0; strop <= 3; strop++) { tmemst[strop][14] = (u32)translation_ptr; mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_xori(reg_rv, reg_temp, 0x0E); @@ -3038,7 +3086,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { (u32)&write_io_register8, (u32)&write_io_register16, (u32)&write_io_register32, (u32)&write_io_register32 }; const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC}; - for (unsigned strop = 0; strop <= 3; strop++) { + for (strop = 0; strop <= 3; strop++) { tmemst[strop][4] = (u32)translation_ptr; mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_xori(reg_temp, reg_temp, 0x04); @@ -3047,7 +3095,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr emit_save_regs(strop == 3); mips_emit_andi(reg_a0, reg_a0, amsk[strop]); - mips_emit_jal(iowrtbl[strop] >> 2); + genccall(iowrtbl[strop]); if (strop < 3) { mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay) @@ -3069,6 +3117,7 @@ static void emit_openload_stub( unsigned memopn, bool signext, unsigned size, unsigned alignment, bool aligned, u8 **tr_ptr ) { + u8 *jmp1, *jmp2; u8 *translation_ptr = *tr_ptr; // This affects regions 1 and 15 @@ -3105,30 +3154,31 @@ static void emit_openload_stub( switch (size) { case 0: - mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB - mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB - mips_emit_jal(((u32)&read_memory8) >> 2); + mips_emit_andi(reg_temp, reg_a0, 0x1); // Thb: Isolate one LSB + mips_emit_movn(reg_a0, reg_temp, reg_rv); // Pick thumb or ARM + genccall(&read_memory8); mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) break; case 1: - mips_emit_b(beq, reg_zero, reg_rv, 2); mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1 - mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all - mips_emit_jal(((u32)&read_memory16) >> 2); + mips_emit_movn(reg_a0, reg_zero, reg_rv); // Thumb: ignore all low bits + genccall(&read_memory16); mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) break; default: - mips_emit_b(beq, reg_zero, reg_rv, 5); + mips_emit_b_filler(beq, reg_zero, reg_rv, jmp1); mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0 - mips_emit_jal(((u32)&read_memory16) >> 2); + genccall(&read_memory16); mips_emit_nop(); - mips_emit_b(beq, reg_zero, reg_zero, 3); + mips_emit_b_filler(beq, reg_zero, reg_zero, jmp2); mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay] - - mips_emit_jal(((u32)&read_memory32) >> 2); + + generate_branch_patch_conditional(jmp1, translation_ptr); + genccall(&read_memory32); mips_emit_nop(); + generate_branch_patch_conditional(jmp2, translation_ptr); break; }; @@ -3194,9 +3244,14 @@ static void emit_phand( mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction! + #ifdef PSP mips_emit_cache(0x1A, mips_reg_ra, -8); mips_emit_jr(reg_rv); // Jump directly to target for speed mips_emit_cache(0x08, mips_reg_ra, -8); + #else + mips_emit_jr(reg_rv); + mips_emit_synci(mips_reg_ra, -8); + #endif // Round up handlers to 16 instructions for easy addressing :) while (translation_ptr - *tr_ptr < 64) { @@ -3212,6 +3267,7 @@ static void emit_phand( // - mem stubs: There's stubs for load & store, and every memory region // and possible operand size and misaligment (+sign extensions) void init_emitter() { + int i; // Initialize memory to a debuggable state memset(stub_arena, 0, sizeof(stub_arena)); // nop @@ -3234,25 +3290,8 @@ void init_emitter() { emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32 emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32 - // Generate SMC write handler, with the lookup machinery - // Call out the flushing routine (save PC) - emit_save_regs(false); - mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2); - mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot - - mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR - mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit - mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode - mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc - // Lookup thumb PC and execute - mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2); - mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return! - // Lookup arm PC and execute - mips_emit_jal(((u32)&block_lookup_address_arm) >> 2); - mips_emit_nop(); - // Epiloge (restore and jump) - emit_restore_regs(false); - mips_emit_jr(reg_rv); // Go execute the code + // This is just a trampoline (for the SMC branches) + mips_emit_j(((u32)&smc_write) >> 2); mips_emit_nop(); // Generate the openload handlers (for accesses to unmapped mem) @@ -3269,7 +3308,7 @@ void init_emitter() { emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32 // Here we emit the ignore store area, just checks and does nothing - for (unsigned i = 0; i < 4; i++) + for (i = 0; i < 4; i++) emit_ignorestore_stub(i, &translation_ptr); // Here go the save game handlers @@ -3295,7 +3334,7 @@ void init_emitter() { // 15 Open load / Ignore store }; - for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) { + for (i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) { ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter; /* region info signext sz al isaligned */ handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8 @@ -3325,7 +3364,7 @@ void init_emitter() { // Store only for "regular"-ish mem regions // - for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) { + for (i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) { sthldr_t handler = (sthldr_t)stinfo[i].emitter; handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8 handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16 @@ -3334,6 +3373,11 @@ void init_emitter() { } } +u32 execute_arm_translate_internal(u32 cycles, void *regptr); +u32 function_cc execute_arm_translate(u32 cycles) { + return execute_arm_translate_internal(cycles, ®[0]); +} + #endif diff --git a/psp/mips_stub.S b/psp/mips_stub.S index a427e89..2d40bf8 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -33,13 +33,14 @@ .global execute_lsr_flags_reg .global execute_asr_flags_reg .global execute_ror_flags_reg -.global execute_arm_translate +.global execute_arm_translate_internal .global icache_region_sync .global reg_check .global palette_ram .global palette_ram_converted .global init_emitter .global mips_lookup_pc +.global smc_write .global write_io_epilogue .global memory_map_read @@ -120,6 +121,7 @@ .equ REGMODE_BASE, (0x900 + 24) .equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE) .equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE) +.equ FNPTRS_BASE, (0x900 + 220 + 960) .set noat .set noreorder @@ -196,6 +198,22 @@ lw $30, REG_R14($16) .endm +# PIC ABI mandates to jump to target via $t9 + +#ifdef PIC +.macro cfncall target, targetid + lw $t9, (FNPTRS_BASE + \targetid * 4)($16) + jalr $t9 + nop +.endm +#else +.macro cfncall target, targetid + jal \target + nop +.endm +#endif + + # Process a hardware event. Since an interrupt might be # raised we have to check if the PC has changed. @@ -213,8 +231,8 @@ mips_update_gba: sw $ra, REG_SAVE2($16) # save return addr collapse_flags # update cpsr save_registers # save registers - jal update_gba # process the next event sw $0, CHANGED_PC_STATUS($16) + cfncall update_gba, 0 # process the next event lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame bne $1, $0, return_to_main # Return to main thread now @@ -257,26 +275,24 @@ return_to_main: mips_indirect_branch_arm: save_registers - jal block_lookup_address_arm # $2 = MIPS address to jump to - nop + cfncall block_lookup_address_arm, 1 restore_registers - jr $2 # jump to it + jr $2 # $2 = value returned nop mips_indirect_branch_thumb: save_registers - jal block_lookup_address_thumb # $2 = MIPS address to jump to - nop + cfncall block_lookup_address_thumb, 2 restore_registers - jr $2 # jump to it + jr $2 # $2 = value returned nop mips_indirect_branch_dual: save_registers - jal block_lookup_address_dual # $2 = MIPS address to jump to + cfncall block_lookup_address_dual, 3 nop restore_registers - jr $2 # jump to it + jr $2 # $2 = value returned nop @@ -293,8 +309,7 @@ write_io_epilogue: alert_loop: - jal update_gba # process the next event - nop + cfncall update_gba, 0 # process the next event lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame bne $1, $0, return_to_main # Return to main thread now @@ -321,15 +336,14 @@ no_alert: nop smc_dma: - jal flush_translation_cache_ram # flush translation cache - nop + cfncall flush_translation_cache_ram, 4 j lookup_pc nop smc_write: save_registers - jal flush_translation_cache_ram # flush translation cache - sw $6, REG_PC($16) # save PC (delay slot) + sw $6, REG_PC($16) # save PC + cfncall flush_translation_cache_ram, 4 mips_lookup_pc: lookup_pc: @@ -339,17 +353,17 @@ lookup_pc: nop lookup_pc_thumb: - jal block_lookup_address_thumb # get Thumb address - lw $4, REG_PC($16) # load PC as arg 0 (delay slot) + lw $4, REG_PC($16) # load PC as arg 0 + cfncall block_lookup_address_thumb, 2 # get Thumb address restore_registers - jr $2 # jump to result + jr $2 # jump to result nop lookup_pc_arm: - jal block_lookup_address_arm # get ARM address - lw $4, REG_PC($16) # load PC as arg 0 (delay slot) + lw $4, REG_PC($16) # load PC as arg 0 + cfncall block_lookup_address_arm, 1 # get ARM address restore_registers - jr $2 # jump to result + jr $2 # jump to result nop # Return the current cpsr @@ -381,8 +395,8 @@ execute_swi: ori $2, 0x13 # set mode to supervisor sw $2, REG_CPSR($16) # write back CPSR save_registers - jal set_cpu_mode # set the CPU mode to supervisor - li $4, 3 # 3 is supervisor mode (delay slot) + li $4, 3 # 3 is supervisor mode + cfncall set_cpu_mode, 5 # set the CPU mode to supervisor restore_registers lw $ra, ($sp) # pop $ra jr $ra # return @@ -404,8 +418,7 @@ execute_spsr_restore: addiu $sp, $sp, -4 sw $ra, ($sp) save_registers - jal execute_spsr_restore_body # do the dirty work in this C function - nop + cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function restore_registers addu $4, $2, $0 # move return value to $4 lw $ra, ($sp) @@ -429,8 +442,8 @@ execute_store_cpsr: extract_flags_body # extract flags from $1 sw $ra, REG_SAVE3($16) save_registers - jal execute_store_cpsr_body # do the dirty work in this C function - addu $4, $1, $0 # load the new CPSR (delay slot) + addu $4, $1, $0 # load the new CPSR + cfncall execute_store_cpsr_body, 7 # do the dirty work in this C function bne $2, $0, changed_pc_cpsr # this could have changed the pc nop @@ -442,10 +455,10 @@ execute_store_cpsr: nop changed_pc_cpsr: - jal block_lookup_address_arm # GBA address is in $4 - addu $4, $2, $0 # load new address in $4 (delay slot) - restore_registers # restore registers - jr $2 # jump to the new address + addu $4, $2, $0 # load new address in $4 + cfncall block_lookup_address_arm, 1 # GBA address is in $4 + restore_registers # restore registers + jr $2 # jump to the new address nop @@ -549,8 +562,9 @@ ror_zero_shift: rotrv $4, $4, $5 # return (value ror shift) delay # $4: cycle counter argument +# $5: pointer to reg -execute_arm_translate: +execute_arm_translate_internal: add $sp, $sp, -48 # Store the main thread context sw $s0, 0($sp) sw $s1, 4($sp) @@ -563,9 +577,7 @@ execute_arm_translate: sw $fp, 32($sp) sw $ra, 36($sp) - lui $16, %hi(reg) # load reg address into base reg - addiu $16, %lo(reg) - + move $16, $5 sw $28, GP_SAVE($16) addu $17, $4, $0 # load cycle counter register @@ -582,15 +594,13 @@ execute_arm_translate: bne $1, $0, 1f lw $4, REG_PC($16) # load PC into $4 (delay) - jal block_lookup_address_arm # lookup initial jump address - nop + cfncall block_lookup_address_arm, 1 restore_registers # load initial register values jr $2 # jump to return nop 1: - jal block_lookup_address_thumb # lookup initial jump address - nop + cfncall block_lookup_address_thumb, 2 restore_registers # load initial register values jr $2 # jump to return nop @@ -629,5 +639,13 @@ tmemld: .space 704 tmemst: .space 256 - +fnptrs: + .long update_gba # 0 + .long block_lookup_address_arm # 1 + .long block_lookup_address_thumb # 2 + .long block_lookup_address_dual # 3 + .long flush_translation_cache_ram # 4 + .long set_cpu_mode # 5 + .long execute_spsr_restore_body # 6 + .long execute_store_cpsr_body # 7 -- cgit v1.2.3 From 34e672ed25f96c3558534ac24523103f3711e58a Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 16 Mar 2021 22:57:45 +0100 Subject: Simplify open load handling for MIPS and fix other arches Also rewrite a bit memory handlers for smaller functions. --- arm/arm_emit.h | 2 + gba_memory.c | 37 +++++++--------- gba_memory.h | 2 + psp/mips_emit.h | 128 ++++++++++++++++++-------------------------------------- x86/x86_emit.h | 2 + 5 files changed, 63 insertions(+), 108 deletions(-) diff --git a/arm/arm_emit.h b/arm/arm_emit.h index 669d422..a5dc930 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -1933,4 +1933,6 @@ void execute_swi_hle_div_c(void) generate_update_pc(pc); \ generate_indirect_branch_no_cycle_update(type) \ +void init_emitter(void) {} + #endif diff --git a/gba_memory.c b/gba_memory.c index a01bac5..c59aab1 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -1969,6 +1969,10 @@ u8 function_cc read_memory8(u32 address) return value; } +u32 read_memory8s(u32 address) { + return (u32)((s8)read_memory8(address)); +} + u16 function_cc read_memory16_signed(u32 address) { u16 value; @@ -1981,22 +1985,21 @@ u16 function_cc read_memory16_signed(u32 address) return value; } +u32 read_memory16s(u32 address) { + return (u32)((s16)read_memory16_signed(address)); +} + // unaligned reads are actually 32bit u32 function_cc read_memory16(u32 address) { u32 value; - - if(address & 0x01) - { - address &= ~0x01; - read_memory(16); + bool unaligned = (address & 0x01); + address &= ~0x01; + read_memory(16); + if (unaligned) { ror(value, value, 8); } - else - { - read_memory(16); - } return value; } @@ -2005,18 +2008,10 @@ u32 function_cc read_memory16(u32 address) u32 function_cc read_memory32(u32 address) { u32 value; - if(address & 0x03) - { - u32 rotate = (address & 0x03) * 8; - address &= ~0x03; - read_memory(32); - ror(value, value, rotate); - } - else - { - read_memory(32); - } - + u32 rotate = (address & 0x03) * 8; + address &= ~0x03; + read_memory(32); + ror(value, value, rotate); return value; } diff --git a/gba_memory.h b/gba_memory.h index 1b332ed..07d5fd0 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -157,8 +157,10 @@ typedef enum } flash_manufacturer_id_type; u8 function_cc read_memory8(u32 address); +u32 read_memory8s(u32 address); u32 function_cc read_memory16(u32 address); u16 function_cc read_memory16_signed(u32 address); +u32 read_memory16s(u32 address); u32 function_cc read_memory32(u32 address); cpu_alert_type function_cc write_memory8(u32 address, u8 value); cpu_alert_type function_cc write_memory16(u32 address, u16 value); diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 506c440..8d1d8d8 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2608,6 +2608,7 @@ void write_io_epilogue(); // This is a pointer table to the open load stubs, used by the BIOS (optimization) u32* openld_core_ptrs[11]; +const u8 ldopmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; const u8 ldhldrtbl[11] = {0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5}; #define ld_phndlr_branch(memop) \ (((u32*)&stub_arena[ldhldrtbl[(memop)] * 16]) - ((u32*)translation_ptr + 1)) @@ -2732,14 +2733,13 @@ static void emit_pmemld_stub( if (region == 0) { // BIOS is *not* mirrored, check that mips_emit_srl(reg_rv, reg_a0, 14); - unsigned joff = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1)); - mips_emit_b(bne, reg_zero, reg_rv, joff); // Jumps to read open - - // Check whether the read is allowed. Only within BIOS! - // TODO: FIX THIS! This should be a protected read, not an open one! - mips_emit_srl(reg_temp, reg_a1, 14); - unsigned jof2 = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1)); - mips_emit_b(bne, reg_zero, reg_temp, jof2); + mips_emit_b(bne, reg_zero, reg_rv, branch_offset(openld_core_ptrs[memop_number])); + + // Check whether the read is allowed. Only within BIOS! (Ignore aligned, bad a1) + if (!aligned) { + mips_emit_srl(reg_temp, reg_a1, 14); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(openld_core_ptrs[memop_number])); + } } if (region >= 8 && region <= 12) { @@ -3029,7 +3029,6 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { static void emit_saveaccess_stub(u8 **tr_ptr) { unsigned opt, i, strop; u8 *translation_ptr = *tr_ptr; - const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; // Writes to region 8 are directed to RTC (only 16 bit ones though) tmemld[1][8] = (u32)translation_ptr; @@ -3045,7 +3044,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { // Map loads to the read handler. for (opt = 0; opt < 6; opt++) { // Unalignment is not relevant here, so map them all to the same handler. - for (i = opmap[opt][0]; i < opmap[opt][1]; i++) + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) tmemld[i][13] = (u32)translation_ptr; // Emit just a check + patch jump mips_emit_srl(reg_temp, reg_a0, 24); @@ -3112,87 +3111,47 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { *tr_ptr = translation_ptr; } -// Emits openload store memory region stub -static void emit_openload_stub( - unsigned memopn, bool signext, unsigned size, - unsigned alignment, bool aligned, u8 **tr_ptr -) { - u8 *jmp1, *jmp2; +// Emits openload stub +// These are used for reading unmapped regions, we just make them go +// through the slow handler since should rarely happen. +static void emit_openload_stub(unsigned opt, bool signext, unsigned size, u8 **tr_ptr) { + int i; + const u32 hndreadtbl[] = { + (u32)&read_memory8, (u32)&read_memory16, (u32)&read_memory32, + (u32)&read_memory8s, (u32)&read_memory16s, (u32)&read_memory32 }; u8 *translation_ptr = *tr_ptr; // This affects regions 1 and 15 - tmemld[memopn][ 1] = (u32)translation_ptr; - tmemld[memopn][15] = (u32)translation_ptr; + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) + tmemld[i][ 1] = tmemld[i][15] = (u32)translation_ptr; - // We need to repatch if: alignment is different or - // if we are accessing a non-ignore region (1 and 15) + // Alignment is ignored since the handlers do the magic for us + // Only check region match: if we are accessing a non-ignore region mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_sltiu(reg_rv, reg_temp, 0x0F); mips_emit_addiu(reg_temp, reg_temp, -1); mips_emit_sltu(reg_temp, reg_zero, reg_temp); mips_emit_and(reg_temp, reg_temp, reg_rv); - if (!aligned && size != 0) { - // Also check and aggregate alignment - mips_emit_ext(reg_rv, reg_a0, 0, size); - mips_emit_xori(reg_rv, reg_rv, alignment); - mips_emit_or(reg_temp, reg_rv, reg_temp); - } - // Jump to patch handler - mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memopn)); + mips_emit_b(bne, reg_zero, reg_temp, branch_handlerid(opt)); // BIOS can jump here to do open loads - openld_core_ptrs[memopn] = (u32*)translation_ptr; - - // Proceed with open load by reading data at PC (previous data in the bus) - mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR - mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit - - emit_save_regs(aligned); - mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); - - switch (size) { - case 0: - mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB - mips_emit_andi(reg_temp, reg_a0, 0x1); // Thb: Isolate one LSB - mips_emit_movn(reg_a0, reg_temp, reg_rv); // Pick thumb or ARM - genccall(&read_memory8); - mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) - break; - case 1: - mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1 - mips_emit_movn(reg_a0, reg_zero, reg_rv); // Thumb: ignore all low bits - genccall(&read_memory16); - mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) - break; - default: - mips_emit_b_filler(beq, reg_zero, reg_rv, jmp1); - mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0 - - genccall(&read_memory16); - mips_emit_nop(); - mips_emit_b_filler(beq, reg_zero, reg_zero, jmp2); - mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay] - - generate_branch_patch_conditional(jmp1, translation_ptr); - genccall(&read_memory32); + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) + openld_core_ptrs[i] = (u32*)translation_ptr; + + emit_save_regs(true); + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); // Delay slot + genccall(hndreadtbl[size + (signext ? 3 : 0)]); + if (opt < 5) { + mips_emit_sw(reg_a1, reg_base, ReOff_RegPC); // Save current PC + } else { + // Aligned loads do not hold PC in a1 (imprecision) mips_emit_nop(); - generate_branch_patch_conditional(jmp2, translation_ptr); - break; - }; - - mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); - emit_restore_regs(aligned); - - // Same behaviour as reading from region14 really (8 bit bus) - if (!size && signext) { - mips_emit_seb(reg_rv, reg_rv); - } else if (size == 1 && alignment) { - mips_emit_seb(reg_rv, reg_rv); - } else if (size == 2) { - mips_emit_rotr(reg_rv, reg_rv, 8 * alignment); } + + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); + emit_restore_regs(true); generate_function_return_swap_delay(); *tr_ptr = translation_ptr; @@ -3295,17 +3254,12 @@ void init_emitter() { mips_emit_nop(); // Generate the openload handlers (for accesses to unmapped mem) - emit_openload_stub(0, false, 0, 0, false, &translation_ptr); // ld u8 - emit_openload_stub(1, true, 0, 0, false, &translation_ptr); // ld s8 - emit_openload_stub(2, false, 1, 0, false, &translation_ptr); // ld u16 - emit_openload_stub(3, false, 1, 1, false, &translation_ptr); // ld u16u1 - emit_openload_stub(4, true, 1, 0, false, &translation_ptr); // ld s16 - emit_openload_stub(5, true, 1, 1, false, &translation_ptr); // ld s16u1 - emit_openload_stub(6, false, 2, 0, false, &translation_ptr); // ld u32 - emit_openload_stub(7, false, 2, 1, false, &translation_ptr); // ld u32u1 - emit_openload_stub(8, false, 2, 2, false, &translation_ptr); // ld u32u2 - emit_openload_stub(9, false, 2, 3, false, &translation_ptr); // ld u32u3 - emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32 + emit_openload_stub(0, false, 0, &translation_ptr); // ld u8 + emit_openload_stub(1, true, 0, &translation_ptr); // ld s8 + emit_openload_stub(2, false, 1, &translation_ptr); // ld u16 + emit_openload_stub(3, true, 1, &translation_ptr); // ld s16 + emit_openload_stub(4, false, 2, &translation_ptr); // ld u32 + emit_openload_stub(5, false, 2, &translation_ptr); // ld a32 // Here we emit the ignore store area, just checks and does nothing for (i = 0; i < 4; i++) diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 4839618..67a3dc2 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -2299,4 +2299,6 @@ void function_cc swi_hle_div(void) generate_update_pc(pc); \ generate_indirect_branch_no_cycle_update(type) \ +void init_emitter(void) {} + #endif -- cgit v1.2.3