diff options
author | Autechre | 2021-03-17 07:20:55 +0100 |
---|---|---|
committer | GitHub | 2021-03-17 07:20:55 +0100 |
commit | 85ba903b19f9ef39c80906680dd210f259b5160c (patch) | |
tree | f9adf1475cbaa9621bb22eb02c7452a3b6c62922 | |
parent | b9ac4536757c4c24afaa86b6e3131ec21e407e80 (diff) | |
parent | 34e672ed25f96c3558534ac24523103f3711e58a (diff) | |
download | picogpsp-85ba903b19f9ef39c80906680dd210f259b5160c.tar.gz picogpsp-85ba903b19f9ef39c80906680dd210f259b5160c.tar.bz2 picogpsp-85ba903b19f9ef39c80906680dd210f259b5160c.zip |
Merge pull request #110 from davidgfnet/master
Rewrite the MIPS stub backend to add OpenDingux
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | arm/arm_emit.h | 2 | ||||
-rw-r--r-- | cpu.h | 23 | ||||
-rw-r--r-- | cpu_threaded.c | 4 | ||||
-rw-r--r-- | gba_memory.c | 41 | ||||
-rw-r--r-- | gba_memory.h | 7 | ||||
-rw-r--r-- | main.c | 1 | ||||
-rw-r--r-- | psp/mips_emit.h | 888 | ||||
-rw-r--r-- | psp/mips_stub.S | 2338 | ||||
-rw-r--r-- | x86/x86_emit.h | 2 |
10 files changed, 1012 insertions, 2300 deletions
@@ -193,7 +193,7 @@ else ifeq ($(platform), psp1) TARGET := $(TARGET_NAME)_libretro_$(platform).a CC = psp-gcc$(EXE_EXT) AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 + CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT CFLAGS += -I$(shell psp-config --pspsdk-path)/include CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi CFLAGS += -fomit-frame-pointer -ffast-math @@ -374,8 +374,10 @@ else ifeq ($(platform), gcw0) CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++ AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar SHARED := -shared -nostdlib -Wl,--version-script=link.T - fpic := -fPIC + fpic := -fPIC -DPIC CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + HAVE_DYNAREC := 1 + CPU_ARCH := mips # Windows else diff --git a/arm/arm_emit.h b/arm/arm_emit.h index 669d422..a5dc930 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -1933,4 +1933,6 @@ void execute_swi_hle_div_c(void) generate_update_pc(pc); \ generate_indirect_branch_no_cycle_update(type) \ +void init_emitter(void) {} + #endif @@ -122,21 +122,19 @@ s32 translate_block_thumb(u32 pc, translation_region_type translation_region, u32 smc_enable); #if defined(PSP) - -#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) -#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) -#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128) -#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) - + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) + #define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) #else - -#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) -#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) -#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2) -#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) - + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) + #define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) #endif +#define STUB_ARENA_SIZE (4*1024) + #if defined(HAVE_MMAP) extern u8* rom_translation_cache; extern u8* ram_translation_cache; @@ -157,6 +155,7 @@ extern int sceBlock; extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; extern u8 bios_translation_cache[BIOS_TRANSLATION_CACHE_SIZE]; +extern u32 stub_arena[STUB_ARENA_SIZE]; #endif extern u8 *rom_translation_ptr; extern u8 *ram_translation_ptr; diff --git a/cpu_threaded.c b/cpu_threaded.c index 80a6b4a..4d93e55 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -62,6 +62,8 @@ __asm__(".section .jit,\"awx\",%progbits"); __asm__(".section .jit,\"awx\",%nobits"); #endif +u32 stub_arena[STUB_ARENA_SIZE] + __attribute__ ((aligned(4),section(".jit"))); u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] __attribute__ ((aligned(4),section(".jit"))); u8 *rom_translation_ptr = rom_translation_cache; @@ -3773,3 +3775,5 @@ void dump_translation_cache(void) bios_translation_ptr - bios_translation_cache, fd); fclose(fd); } + + diff --git a/gba_memory.c b/gba_memory.c index 0727279..c59aab1 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -427,7 +427,7 @@ u32 eeprom_address = 0; s32 eeprom_counter = 0; u8 eeprom_buffer[8]; -void function_cc write_eeprom(u32 address, u32 value) +void function_cc write_eeprom(u32 unused_address, u32 value) { switch(eeprom_mode) { @@ -749,6 +749,7 @@ static cpu_alert_type trigger_dma(u32 dma_number, u32 value) cpu_alert_type function_cc write_io_register8(u32 address, u32 value) { + value &= 0xff; switch(address) { case 0x00: @@ -1165,6 +1166,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value) cpu_alert_type function_cc write_io_register16(u32 address, u32 value) { + value &= 0xffff; switch(address) { case 0x00: @@ -1967,6 +1969,10 @@ u8 function_cc read_memory8(u32 address) return value; } +u32 read_memory8s(u32 address) { + return (u32)((s8)read_memory8(address)); +} + u16 function_cc read_memory16_signed(u32 address) { u16 value; @@ -1979,22 +1985,21 @@ u16 function_cc read_memory16_signed(u32 address) return value; } +u32 read_memory16s(u32 address) { + return (u32)((s16)read_memory16_signed(address)); +} + // unaligned reads are actually 32bit u32 function_cc read_memory16(u32 address) { u32 value; - - if(address & 0x01) - { - address &= ~0x01; - read_memory(16); + bool unaligned = (address & 0x01); + address &= ~0x01; + read_memory(16); + if (unaligned) { ror(value, value, 8); } - else - { - read_memory(16); - } return value; } @@ -2003,18 +2008,10 @@ u32 function_cc read_memory16(u32 address) u32 function_cc read_memory32(u32 address) { u32 value; - if(address & 0x03) - { - u32 rotate = (address & 0x03) * 8; - address &= ~0x03; - read_memory(32); - ror(value, value, rotate); - } - else - { - read_memory(32); - } - + u32 rotate = (address & 0x03) * 8; + address &= ~0x03; + read_memory(32); + ror(value, value, rotate); return value; } diff --git a/gba_memory.h b/gba_memory.h index a37de47..07d5fd0 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -157,12 +157,19 @@ typedef enum } flash_manufacturer_id_type; u8 function_cc read_memory8(u32 address); +u32 read_memory8s(u32 address); u32 function_cc read_memory16(u32 address); u16 function_cc read_memory16_signed(u32 address); +u32 read_memory16s(u32 address); u32 function_cc read_memory32(u32 address); cpu_alert_type function_cc write_memory8(u32 address, u8 value); cpu_alert_type function_cc write_memory16(u32 address, u16 value); cpu_alert_type function_cc write_memory32(u32 address, u32 value); +u32 function_cc read_eeprom(void); +void function_cc write_eeprom(u32 address, u32 value); +u8 read_backup(u32 address); +void function_cc write_backup(u32 address, u32 value); +void function_cc write_rtc(u32 address, u32 value); extern u8 *memory_regions[16]; extern u32 memory_limits[16]; @@ -117,6 +117,7 @@ void init_main(void) flush_translation_cache_rom(); flush_translation_cache_ram(); flush_translation_cache_bios(); + init_emitter(); #endif } diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 7c69091..8d1d8d8 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -20,6 +20,19 @@ #ifndef MIPS_EMIT_H #define MIPS_EMIT_H +// Pointers to default handlers. +// Use IWRAM as default, assume aligned by default too +#define execute_load_u8 tmemld[0][3] +#define execute_load_s8 tmemld[1][3] +#define execute_load_u16 tmemld[2][3] +#define execute_load_s16 tmemld[4][3] +#define execute_load_u32 tmemld[6][3] +#define execute_aligned_load32 tmemld[10][3] +#define execute_store_u8 tmemst[0][3] +#define execute_store_u16 tmemst[1][3] +#define execute_store_u32 tmemst[2][3] +#define execute_aligned_store32 tmemst[3][3] + u32 mips_update_gba(u32 pc); // Although these are defined as a function, don't call them as @@ -44,9 +57,6 @@ u32 execute_lsr_flags_reg(u32 value, u32 shift); u32 execute_asr_flags_reg(u32 value, u32 shift); u32 execute_ror_flags_reg(u32 value, u32 shift); -void execute_aligned_store32(u32 address, u32 value); -u32 execute_aligned_load32(u32 address); - void reg_check(); typedef enum @@ -97,6 +107,7 @@ typedef enum mips_special_jalr = 0x09, mips_special_movz = 0x0A, mips_special_movn = 0x0B, + mips_special_sync = 0x0F, mips_special_mfhi = 0x10, mips_special_mthi = 0x11, mips_special_mflo = 0x12, @@ -116,11 +127,19 @@ typedef enum mips_special_xor = 0x26, mips_special_nor = 0x27, mips_special_slt = 0x2A, - mips_special_sltu = 0x2B + mips_special_sltu = 0x2B, + mips_special_max = 0x2C, + mips_special_min = 0x2D, } mips_function_special; typedef enum { + mips_special2_madd = 0x00, + mips_special2_maddu = 0x01, +} mips_function_special2; + +typedef enum +{ mips_special3_ext = 0x00, mips_special3_ins = 0x04, mips_special3_bshfl = 0x20 @@ -128,8 +147,16 @@ typedef enum typedef enum { + mips_bshfl_seb = 0x10, + mips_bshfl_seh = 0x18, + mips_bshfl_wsbh = 0x02, +} mips_function_bshfl; + +typedef enum +{ mips_regimm_bltz = 0x00, - mips_regimm_bltzal = 0x10 + mips_regimm_bltzal = 0x10, + mips_regimm_synci = 0x1F } mips_function_regimm; typedef enum @@ -163,8 +190,14 @@ typedef enum mips_opcode_sb = 0x28, mips_opcode_sh = 0x29, mips_opcode_sw = 0x2B, + mips_opcode_cache = 0x2F, } mips_opcode; +#define mips_emit_cache(operation, rs, immediate) \ + *((u32 *)translation_ptr) = (mips_opcode_cache << 26) | \ + (rs << 21) | (operation << 16) | (immediate & 0xFFFF); \ + translation_ptr += 4 \ + #define mips_emit_reg(opcode, rs, rt, rd, shift, function) \ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \ (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | function; \ @@ -176,6 +209,12 @@ typedef enum mips_special_##function; \ translation_ptr += 4 \ +#define mips_emit_special2(function, rs, rt, rd, shift) \ + *((u32 *)translation_ptr) = (mips_opcode_special2 << 26) | \ + (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | \ + mips_special2_##function; \ + translation_ptr += 4 \ + #define mips_emit_special3(function, rs, rt, imm_a, imm_b) \ *((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \ (rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \ @@ -184,12 +223,12 @@ typedef enum #define mips_emit_imm(opcode, rs, rt, immediate) \ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \ - (rs << 21) | (rt << 16) | (immediate & 0xFFFF); \ + (rs << 21) | (rt << 16) | ((immediate) & 0xFFFF); \ translation_ptr += 4 \ #define mips_emit_regimm(function, rs, immediate) \ *((u32 *)translation_ptr) = (mips_opcode_regimm << 26) | \ - (rs << 21) | (mips_regimm_##function << 16) | (immediate & 0xFFFF); \ + (rs << 21) | (mips_regimm_##function << 16) | ((immediate) & 0xFFFF); \ translation_ptr += 4 \ #define mips_emit_jump(opcode, offset) \ @@ -203,6 +242,12 @@ typedef enum #define mips_absolute_offset(offset) \ ((u32)offset / 4) \ +#define mips_emit_max(rd, rs, rt) \ + mips_emit_special(max, rs, rt, rd, 0) \ + +#define mips_emit_min(rd, rs, rt) \ + mips_emit_special(min, rs, rt, rd, 0) \ + #define mips_emit_addu(rd, rs, rt) \ mips_emit_special(addu, rs, rt, rd, 0) \ @@ -281,11 +326,19 @@ typedef enum #define mips_emit_divu(rs, rt) \ mips_emit_special(divu, rs, rt, 0, 0) \ -#define mips_emit_madd(rs, rt) \ - mips_emit_special(madd, rs, rt, 0, 0) \ +#ifdef PSP + #define mips_emit_madd(rs, rt) \ + mips_emit_special(madd, rs, rt, 0, 0) \ -#define mips_emit_maddu(rs, rt) \ - mips_emit_special(maddu, rs, rt, 0, 0) \ + #define mips_emit_maddu(rs, rt) \ + mips_emit_special(maddu, rs, rt, 0, 0) +#else + #define mips_emit_madd(rs, rt) \ + mips_emit_special2(madd, rs, rt, 0, 0) \ + + #define mips_emit_maddu(rs, rt) \ + mips_emit_special2(maddu, rs, rt, 0, 0) +#endif #define mips_emit_movn(rd, rs, rt) \ mips_emit_special(movn, rs, rt, rd, 0) \ @@ -293,6 +346,9 @@ typedef enum #define mips_emit_movz(rd, rs, rt) \ mips_emit_special(movz, rs, rt, rd, 0) \ +#define mips_emit_sync() \ + mips_emit_special(sync, 0, 0, 0, 0) \ + #define mips_emit_lb(rt, rs, offset) \ mips_emit_imm(lb, rs, rt, offset) \ @@ -344,6 +400,12 @@ typedef enum #define mips_emit_ins(rt, rs, pos, size) \ mips_emit_special3(ins, rs, rt, (pos + size - 1), pos) \ +#define mips_emit_seb(rt, rd) \ + mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seb) \ + +#define mips_emit_seh(rt, rd) \ + mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seh) \ + // Breaks down if the backpatch offset is greater than 16bits, take care // when using (should be okay if limited to conditional instructions) @@ -369,9 +431,18 @@ typedef enum #define mips_emit_jr(rs) \ mips_emit_special(jr, rs, 0, 0, 0) \ +#define mips_emit_jalr(rs) \ + mips_emit_special(jalr, rs, 0, 31, 0) \ + +#define mips_emit_synci(rs, offset) \ + mips_emit_regimm(synci, rs, offset) \ + #define mips_emit_bltzal(rs, offset) \ mips_emit_regimm(bltzal, rs, offset) \ +#define mips_emit_bltz(rs, offset) \ + mips_emit_regimm(bltz, rs, offset) \ + #define mips_emit_nop() \ mips_emit_sll(reg_zero, reg_zero, 0) \ @@ -566,6 +637,15 @@ u32 arm_to_mips_reg[] = translation_ptr += 4; \ } \ +#define generate_function_return_swap_delay() \ +{ \ + u32 delay_instruction = address32(translation_ptr, -4); \ + translation_ptr -= 4; \ + mips_emit_jr(mips_reg_ra); \ + address32(translation_ptr, 0) = delay_instruction; \ + translation_ptr += 4; \ +} \ + #define generate_swap_delay() \ { \ u32 delay_instruction = address32(translation_ptr, -8); \ @@ -2468,4 +2548,790 @@ u8 swi_hle_handle[256] = generate_load_pc(reg_a0, pc); \ mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \ + +// Register save layout as follows: +#define ReOff_RegPC (15*4) // REG_PC +#define ReOff_CPSR (20*4) // REG_CPSR +#define ReOff_SaveR1 (21*4) // 3 save scratch regs +#define ReOff_SaveR2 (22*4) +#define ReOff_SaveR3 (23*4) +#define ReOff_GP_Save (32*4) // GP_SAVE + +// Saves all regs to their right slot and loads gp +#define emit_save_regs(save_a2) { \ + int i; \ + for (i = 0; i < 15; i++) { \ + mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \ + } \ + if (save_a2) { \ + mips_emit_sw(reg_a2, reg_base, ReOff_SaveR2); \ + } \ + /* Load the gp pointer, used by C code */ \ + mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \ +} + +// Restores the registers from their slot +#define emit_restore_regs(restore_a2) { \ + int i; \ + if (restore_a2) { \ + mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \ + } \ + for (i = 0; i < 15; i++) { \ + mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \ + } \ +} + +// Emits a function call for a read or a write (for special stuff like flash) +#define emit_mem_call_ds(fnptr, mask) \ + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \ + emit_save_regs(true); \ + genccall(fnptr); \ + mips_emit_andi(reg_a0, reg_a0, (mask)); \ + emit_restore_regs(true); \ + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \ + mips_emit_jr(mips_reg_ra); + +#define emit_mem_call(fnptr, mask) \ + emit_mem_call_ds(fnptr, mask) \ + mips_emit_nop(); + +// Pointer table to stubs, indexed by type and region +extern u32 tmemld[11][16]; +extern u32 tmemst[ 4][16]; +void mips_lookup_pc(); +void smc_write(); +cpu_alert_type write_io_register8 (u32 address, u32 value); +cpu_alert_type write_io_register16(u32 address, u32 value); +cpu_alert_type write_io_register32(u32 address, u32 value); +void write_io_epilogue(); + +// This is a pointer table to the open load stubs, used by the BIOS (optimization) +u32* openld_core_ptrs[11]; + +const u8 ldopmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; +const u8 ldhldrtbl[11] = {0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5}; +#define ld_phndlr_branch(memop) \ + (((u32*)&stub_arena[ldhldrtbl[(memop)] * 16]) - ((u32*)translation_ptr + 1)) + +#define st_phndlr_branch(memop) \ + (((u32*)&stub_arena[((memop) + 6) * 16]) - ((u32*)translation_ptr + 1)) + +#define branch_handlerid(phndlrid) \ + (((u32*)&stub_arena[(phndlrid) * 16]) - ((u32*)translation_ptr + 1)) + +#define branch_offset(ptr) \ + (((u32*)ptr) - ((u32*)translation_ptr + 1)) + +static void emit_mem_access_loadop( + u8 *translation_ptr, + u32 base_addr, unsigned size, unsigned alignment, bool signext) +{ + switch (size) { + case 2: + mips_emit_lw(reg_rv, reg_rv, (base_addr & 0xffff)); + break; + case 1: + if (signext) { + // Load 16 with sign extension is essentially a load byte + if (alignment) { + mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff)); + } else { + mips_emit_lh(reg_rv, reg_rv, (base_addr & 0xffff)); + } + } else { + mips_emit_lhu(reg_rv, reg_rv, (base_addr & 0xffff)); + } + break; + default: + if (signext) { + mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff)); + } else { + mips_emit_lbu(reg_rv, reg_rv, (base_addr & 0xffff)); + } + break; + }; +} + +#ifdef PIC + #define genccall(fn) \ + mips_emit_lui(mips_reg_t9, ((u32)fn) >> 16); \ + mips_emit_ori(mips_reg_t9, mips_reg_t9, ((u32)fn)); \ + mips_emit_jalr(mips_reg_t9); +#else + #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); #endif + +// Stub memory map: +// 0 .. 63 First patch handler [#0] +// 448 .. 511 Last patch handler [#7] +// 512+ smc_write handler +#define SMC_WRITE_OFF32 160 + +// Describes a "plain" memory are, that is, an area that is just accessed +// as normal memory (with some caveats tho). +typedef struct { + void *emitter; + unsigned region; // Region ID (top 8 bits) + unsigned memsize; // 0 byte, 1 halfword, 2 word + bool check_smc; // Whether the memory can contain code + bool bus16; // Whether it can only be accessed at 16bit + u32 baseptr; // Memory base address. +} t_stub_meminfo; + +// Generates the stub to access memory for a given region, access type, +// size and misalignment. +// Handles "special" cases like weirdly mapped memory +static void emit_pmemld_stub( + unsigned memop_number, const t_stub_meminfo *meminfo, + bool signext, unsigned size, + unsigned alignment, bool aligned, + u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + unsigned region = meminfo->region; + u32 base_addr = meminfo->baseptr; + + if (region >= 9 && region <= 11) { + // Use the same handler for these regions (just replicas) + tmemld[memop_number][region] = tmemld[memop_number][8]; + return; + } + + // Clean up one or two bits (to align access). It might already be aligned! + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> size) << size; // Clear 1 or 2 (or none) bits + + // Add the stub to the table (add the JAL instruction encoded already) + tmemld[memop_number][region] = (u32)translation_ptr; + + // Size: 0 (8 bits), 1 (16 bits), 2 (32 bits) + // First check we are in the right memory region + unsigned regionbits = 8; + unsigned regioncheck = region; + if (region == 8) { + // This is an optimization for ROM regions + // For region 8-11 we reuse the same code (and have a more generic check) + // Region 12 is harder to cover without changing the check (shift + xor) + regionbits = 6; + regioncheck >>= 2; // Ignore the two LSB, don't care + } + + // Address checking: jumps to handler if bad region/alignment + mips_emit_srl(reg_temp, reg_a0, (32 - regionbits)); + if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits + mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment + } + if (regioncheck || alignment) { // If region and alignment are zero, can skip + mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits)); + } + + // The patcher to use depends on ld/st, access size, and sign extension + // (so there's 10 of them). They live in the top stub addresses. + mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memop_number)); + + // BIOS region requires extra checks for protected reads + if (region == 0) { + // BIOS is *not* mirrored, check that + mips_emit_srl(reg_rv, reg_a0, 14); + mips_emit_b(bne, reg_zero, reg_rv, branch_offset(openld_core_ptrs[memop_number])); + + // Check whether the read is allowed. Only within BIOS! (Ignore aligned, bad a1) + if (!aligned) { + mips_emit_srl(reg_temp, reg_a1, 14); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(openld_core_ptrs[memop_number])); + } + } + + if (region >= 8 && region <= 12) { + u8 *jmppatch; + // ROM area: might need to load the ROM on-demand + mips_emit_srl(reg_rv, reg_a0, 15); // 32KB page number + mips_emit_sll(reg_rv, reg_rv, 2); // (word indexed) + mips_emit_addu(reg_rv, reg_rv, reg_base); // base + offset + + mips_emit_lw(reg_rv, reg_rv, 0x8000); // base[offset-0x8000] + mips_emit_b_filler(bne, reg_rv, reg_zero, jmppatch); // if not null, can skip load page + mips_emit_andi(reg_temp, reg_a0, memmask); // Get the lowest 15 bits [delay] + + // This code call the C routine to map the relevant ROM page + emit_save_regs(aligned); + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); + mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff + genccall(&load_gamepak_page); + mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1); + + mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1); + emit_restore_regs(aligned); + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); + + generate_branch_patch_conditional(jmppatch, translation_ptr); + // Now we can proceed to load, place addr in the right register + mips_emit_addu(reg_rv, reg_rv, reg_temp); + } else if (region == 14) { + // Read from flash, is a bit special, fn call + emit_mem_call_ds(&read_backup, 0xFFFF); + if (!size && signext) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 1 && alignment) { + mips_emit_seb(reg_rv, reg_rv); + } else if (size == 2) { + mips_emit_rotr(reg_rv, reg_rv, 8 * alignment); + } else { + mips_emit_nop(); + } + *tr_ptr = translation_ptr; + return; + } else { + // Generate upper bits of the addr and do addr mirroring + // (The address hi16 is rounded up since load uses signed offset) + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + + if (region == 2) { + // EWRAM is a bit special + // Need to insert a zero in the addr (due to how it's mapped) + mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) + mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) + mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 + mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + } else if (region == 6) { + // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous + mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) + if (!aligned && alignment != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + } + mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block + mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset + } else { + // Generate regular (<=32KB) mirroring + mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + } + } + + // Aligned accesses (or the weird s16u1 case) are just one inst + if (alignment == 0 || (size == 1 && signext)) { + emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot + translation_ptr += 4; + } + else { + // Unaligned accesses (require rotation) need two insts + emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); + translation_ptr += 4; + mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot + } + + generate_function_return_swap_delay(); // Return. Move prev inst to delay slot + *tr_ptr = translation_ptr; +} + +// Generates the stub to store memory for a given region and size +// Handles "special" cases like weirdly mapped memory +static void emit_pmemst_stub( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + unsigned region = meminfo->region; + u32 base_addr = meminfo->baseptr; + + // Palette, VRAM and OAM cannot be really byte accessed (use a 16 bit store) + bool doubleaccess = (size == 0 && meminfo->bus16); + unsigned realsize = size; + if (doubleaccess) + realsize = 1; + + // Clean up one or two bits (to align access). It might already be aligned! + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> realsize) << realsize; + + // Add the stub to the table (add the JAL instruction encoded already) + tmemst[memop_number][region] = (u32)translation_ptr; + + // First check we are in the right memory region (same as loads) + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, region); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number)); + + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + + if (doubleaccess) { + mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + } + + if (region == 2) { + // EWRAM is a bit special + // Need to insert a zero in the addr (due to how it's mapped) + mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB) + mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18) + mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16 + mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr + } else if (region == 6) { + // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous + mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) + if (!aligned && realsize != 0) { + mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size) + } + mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block + mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset + } else { + // Generate regular (<=32KB) mirroring + mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + } + + // Generate SMC write and tracking + // TODO: Should we have SMC checks here also for aligned? + if (meminfo->check_smc && !aligned) { + mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer + if (realsize == 2) { + mips_emit_lw(reg_temp, reg_temp, base_addr); + } else if (realsize == 1) { + mips_emit_lh(reg_temp, reg_temp, base_addr); + } else { + mips_emit_lb(reg_temp, reg_temp, base_addr); + } + // If the data is non zero, we just wrote over code + // Local-jump to the smc_write (which lives at offset:0) + unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); + mips_emit_b(bne, reg_zero, reg_temp, instoffset); + } + + // Store the data (delay slot from the SMC branch) + if (realsize == 2) { + mips_emit_sw(reg_a1, reg_rv, base_addr); + } else if (realsize == 1) { + mips_emit_sh(reg_a1, reg_rv, base_addr); + } else { + mips_emit_sb(reg_a1, reg_rv, base_addr); + } + + // Post processing store: + // Signal that OAM was updated + if (region == 7) { + u32 palcaddr = (u32)&oam_update; + mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16)); + mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data + generate_function_return_swap_delay(); + } + else { + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + } + + *tr_ptr = translation_ptr; +} + +#ifdef USE_BGR_FORMAT + /* 0BGR to BGR565, for PSP */ + #define palette_convert() \ + mips_emit_sll(reg_temp, reg_a1, 1); \ + mips_emit_andi(reg_temp, reg_temp, 0xFFC0); \ + mips_emit_ins(reg_temp, reg_a1, 0, 5); +#else + /* 0BGR to RGB565 (clobbers a0!) */ + #define palette_convert() \ + mips_emit_ext(reg_temp, reg_a1, 10, 5); \ + mips_emit_ins(reg_temp, reg_a1, 11, 5); \ + mips_emit_ext(reg_a0, reg_a1, 5, 5); \ + mips_emit_ins(reg_temp, reg_a0, 6, 5); +#endif + +// Palette is accessed differently and stored in a decoded manner +static void emit_palette_hdl( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr) +{ + u8 *translation_ptr = *tr_ptr; + + // Palette cannot be accessed at byte level + unsigned realsize = size ? size : 1; + u32 memmask = (meminfo->memsize - 1); + memmask = (memmask >> realsize) << realsize; + + // Add the stub to the table (add the JAL instruction encoded already) + tmemst[memop_number][5] = (u32)translation_ptr; + + // First check we are in the right memory region (same as loads) + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, 5); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number)); + mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring) + if (size == 0) { + mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + } + mips_emit_addu(reg_rv, reg_rv, reg_base); + + // Store the data (delay slot from the SMC branch) + if (realsize == 2) { + mips_emit_sw(reg_a1, reg_base, 0x100); + } else if (realsize == 1) { + mips_emit_sh(reg_a1, reg_base, 0x100); + } + + palette_convert(); + mips_emit_sh(reg_temp, reg_rv, 0x500); + + if (size == 2) { + // Convert the second half-word also + mips_emit_srl(reg_a1, reg_a1, 16); + palette_convert(); + mips_emit_sh(reg_temp, reg_rv, 0x502); + } + generate_function_return_swap_delay(); + + *tr_ptr = translation_ptr; +} + +// This emits stubs for regions where writes have no side-effects +static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) { + u8 *translation_ptr = *tr_ptr; + + // Region 0-1 (BIOS and ignore) + tmemst[size][0] = tmemst[size][1] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 25); // Check 7 MSB to be zero + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + // Region 8-B + tmemst[size][ 8] = tmemst[size][ 9] = + tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr; + + mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02 + mips_emit_xori(reg_temp, reg_temp, 0x02); + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + // Region C or F (or bigger!) + tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); + mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store + mips_emit_xori(reg_rv, reg_temp, 0x0C); + mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size)); + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); + mips_emit_nop(); + + *tr_ptr = translation_ptr; +} + +// Stubs for regions with EEPROM or flash/SRAM +static void emit_saveaccess_stub(u8 **tr_ptr) { + unsigned opt, i, strop; + u8 *translation_ptr = *tr_ptr; + + // Writes to region 8 are directed to RTC (only 16 bit ones though) + tmemld[1][8] = (u32)translation_ptr; + emit_mem_call(&write_rtc, 0xFE); + + // These are for region 0xD where EEPROM is mapped. Addr is ignored + // Value is limited to one bit (both reading and writing!) + u32 *read_hndlr = (u32*)translation_ptr; + emit_mem_call(&read_eeprom, 0x3FF); + u32 *write_hndlr = (u32*)translation_ptr; + emit_mem_call(&write_eeprom, 0x3FF); + + // Map loads to the read handler. + for (opt = 0; opt < 6; opt++) { + // Unalignment is not relevant here, so map them all to the same handler. + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) + tmemld[i][13] = (u32)translation_ptr; + // Emit just a check + patch jump + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0D); + mips_emit_b(bne, reg_rv, reg_zero, branch_handlerid(opt)); + mips_emit_nop(); + mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr)); + } + // This is for stores + for (strop = 0; strop <= 3; strop++) { + tmemst[strop][13] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0D); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + mips_emit_nop(); + mips_emit_b(beq, reg_zero, reg_zero, branch_offset(write_hndlr)); + } + + // Flash/SRAM/Backup writes are only 8 byte supported + for (strop = 0; strop <= 3; strop++) { + tmemst[strop][14] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_rv, reg_temp, 0x0E); + mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop)); + if (strop == 0) { + emit_mem_call(&write_backup, 0xFFFF); + } else { + mips_emit_nop(); + mips_emit_jr(mips_reg_ra); // Does nothing in this case + mips_emit_nop(); + } + } + + // Region 4 writes + // I/O writes are also a bit special, they can trigger things like DMA, IRQs... + // Also: aligned (strop==3) accesses do not trigger IRQs + const u32 iowrtbl[] = { + (u32)&write_io_register8, (u32)&write_io_register16, + (u32)&write_io_register32, (u32)&write_io_register32 }; + const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC}; + for (strop = 0; strop <= 3; strop++) { + tmemst[strop][4] = (u32)translation_ptr; + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_xori(reg_temp, reg_temp, 0x04); + mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(strop)); + + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr + emit_save_regs(strop == 3); + mips_emit_andi(reg_a0, reg_a0, amsk[strop]); + genccall(iowrtbl[strop]); + + if (strop < 3) { + mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay) + mips_emit_j(((u32)&write_io_epilogue) >> 2); + mips_emit_nop(); + } else { + mips_emit_nop(); + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); + emit_restore_regs(true); + generate_function_return_swap_delay(); + } + } + + *tr_ptr = translation_ptr; +} + +// Emits openload stub +// These are used for reading unmapped regions, we just make them go +// through the slow handler since should rarely happen. +static void emit_openload_stub(unsigned opt, bool signext, unsigned size, u8 **tr_ptr) { + int i; + const u32 hndreadtbl[] = { + (u32)&read_memory8, (u32)&read_memory16, (u32)&read_memory32, + (u32)&read_memory8s, (u32)&read_memory16s, (u32)&read_memory32 }; + u8 *translation_ptr = *tr_ptr; + + // This affects regions 1 and 15 + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) + tmemld[i][ 1] = tmemld[i][15] = (u32)translation_ptr; + + // Alignment is ignored since the handlers do the magic for us + // Only check region match: if we are accessing a non-ignore region + mips_emit_srl(reg_temp, reg_a0, 24); + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); + mips_emit_addiu(reg_temp, reg_temp, -1); + mips_emit_sltu(reg_temp, reg_zero, reg_temp); + mips_emit_and(reg_temp, reg_temp, reg_rv); + + // Jump to patch handler + mips_emit_b(bne, reg_zero, reg_temp, branch_handlerid(opt)); + + // BIOS can jump here to do open loads + for (i = ldopmap[opt][0]; i < ldopmap[opt][1]; i++) + openld_core_ptrs[i] = (u32*)translation_ptr; + + emit_save_regs(true); + mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); // Delay slot + genccall(hndreadtbl[size + (signext ? 3 : 0)]); + if (opt < 5) { + mips_emit_sw(reg_a1, reg_base, ReOff_RegPC); // Save current PC + } else { + // Aligned loads do not hold PC in a1 (imprecision) + mips_emit_nop(); + } + + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); + emit_restore_regs(true); + generate_function_return_swap_delay(); + + *tr_ptr = translation_ptr; +} + +typedef void (*sthldr_t)( + unsigned memop_number, const t_stub_meminfo *meminfo, + unsigned size, bool aligned, u8 **tr_ptr); + +typedef void (*ldhldr_t)( + unsigned memop_number, const t_stub_meminfo *meminfo, + bool signext, unsigned size, + unsigned alignment, bool aligned, + u8 **tr_ptr); + +// Generates a patch handler for a given access size +// It will detect the access alignment and memory region and load +// the corresponding handler from the table (at the right offset) +// and patch the jal instruction from where it was called. +static void emit_phand( + u8 **tr_ptr, unsigned size, unsigned toff, + bool check_alignment) +{ + u8 *translation_ptr = *tr_ptr; + + mips_emit_srl(reg_temp, reg_a0, 24); + #ifdef PSP + mips_emit_addiu(reg_rv, reg_zero, 15*4); // Table limit (max) + mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed + mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table + #else + mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX + mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds) + mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed + mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore) + #endif + + // Stores or byte-accesses do not care about alignment + if (check_alignment) { + // Move alignment bits for the table lookup + mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) + } + + unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes + mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset + mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table + mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB + mips_emit_ori(reg_temp, reg_temp, 0x3); // JAL opcode + mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate + mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction! + + #ifdef PSP + mips_emit_cache(0x1A, mips_reg_ra, -8); + mips_emit_jr(reg_rv); // Jump directly to target for speed + mips_emit_cache(0x08, mips_reg_ra, -8); + #else + mips_emit_jr(reg_rv); + mips_emit_synci(mips_reg_ra, -8); + #endif + + // Round up handlers to 16 instructions for easy addressing :) + while (translation_ptr - *tr_ptr < 64) { + mips_emit_nop(); + } + + *tr_ptr = translation_ptr; +} + +// This function emits the following stubs: +// - smc_write: Jumps to C code to trigger a cache flush +// - memop patcher: Patches a memop whenever it accesses the wrong mem region +// - mem stubs: There's stubs for load & store, and every memory region +// and possible operand size and misaligment (+sign extensions) +void init_emitter() { + int i; + // Initialize memory to a debuggable state + memset(stub_arena, 0, sizeof(stub_arena)); // nop + + // Generates the trampoline and helper stubs that we need + u8 *translation_ptr = (u8*)&stub_arena[0]; + + // Generate first the patch handlers + // We have 6+4 patchers, one per mem type (6 or 4) + + // Calculate the offset into tmemld[10][XX]; + emit_phand(&translation_ptr, 0, 0 * 16, false); // ld u8 + emit_phand(&translation_ptr, 0, 1 * 16, false); // ld s8 + emit_phand(&translation_ptr, 1, 2 * 16, true); // ld u16 + u16u1 + emit_phand(&translation_ptr, 1, 4 * 16, true); // ld s16 + s16u1 + emit_phand(&translation_ptr, 2, 6 * 16, true); // ld u32 (0/1/2/3u) + emit_phand(&translation_ptr, 2, 10 * 16, false); // ld aligned 32 + // Store table is immediately after + emit_phand(&translation_ptr, 0, 11 * 16, false); // st u8 + emit_phand(&translation_ptr, 1, 12 * 16, false); // st u16 + emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32 + emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32 + + // This is just a trampoline (for the SMC branches) + mips_emit_j(((u32)&smc_write) >> 2); + mips_emit_nop(); + + // Generate the openload handlers (for accesses to unmapped mem) + emit_openload_stub(0, false, 0, &translation_ptr); // ld u8 + emit_openload_stub(1, true, 0, &translation_ptr); // ld s8 + emit_openload_stub(2, false, 1, &translation_ptr); // ld u16 + emit_openload_stub(3, true, 1, &translation_ptr); // ld s16 + emit_openload_stub(4, false, 2, &translation_ptr); // ld u32 + emit_openload_stub(5, false, 2, &translation_ptr); // ld a32 + + // Here we emit the ignore store area, just checks and does nothing + for (i = 0; i < 4; i++) + emit_ignorestore_stub(i, &translation_ptr); + + // Here go the save game handlers + emit_saveaccess_stub(&translation_ptr); + + // Generate memory handlers + const t_stub_meminfo ldinfo [] = { + { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, + // 1 Open load / Ignore store + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, + { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, + { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case + { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram }, + { emit_pmemld_stub, 8, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 9, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 10, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 11, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 12, 0x8000, false, false, 0 }, + // 13 is EEPROM mapped already (a bit special) + { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call + // 15 Open load / Ignore store + }; + + for (i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) { + ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter; + /* region info signext sz al isaligned */ + handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8 + handler(1, &ldinfo[i], true, 0, 0, false, &translation_ptr); // ld s8 + + handler(2, &ldinfo[i], false, 1, 0, false, &translation_ptr); // ld u16 + handler(3, &ldinfo[i], false, 1, 1, false, &translation_ptr); // ld u16u1 + handler(4, &ldinfo[i], true, 1, 0, false, &translation_ptr); // ld s16 + handler(5, &ldinfo[i], true, 1, 1, false, &translation_ptr); // ld s16u1 + + handler(6, &ldinfo[i], false, 2, 0, false, &translation_ptr); // ld u32 + handler(7, &ldinfo[i], false, 2, 1, false, &translation_ptr); // ld u32u1 + handler(8, &ldinfo[i], false, 2, 2, false, &translation_ptr); // ld u32u2 + handler(9, &ldinfo[i], false, 2, 3, false, &translation_ptr); // ld u32u3 + + handler(10,&ldinfo[i], false, 2, 0, true, &translation_ptr); // aligned ld u32 + } + + const t_stub_meminfo stinfo [] = { + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above + // I/O is special and mapped with a function call + { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, + { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case + { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram }, + }; + + // Store only for "regular"-ish mem regions + // + for (i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) { + sthldr_t handler = (sthldr_t)stinfo[i].emitter; + handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8 + handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16 + handler(2, &stinfo[i], 2, false, &translation_ptr); // st u32 + handler(3, &stinfo[i], 2, true, &translation_ptr); // st aligned 32 + } +} + +u32 execute_arm_translate_internal(u32 cycles, void *regptr); +u32 function_cc execute_arm_translate(u32 cycles) { + return execute_arm_translate_internal(cycles, ®[0]); +} + +#endif + + diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 1b24b0d..2d40bf8 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -23,16 +23,6 @@ .global mips_indirect_branch_arm .global mips_indirect_branch_thumb .global mips_indirect_branch_dual -.global execute_load_u8 -.global execute_load_u16 -.global execute_load_u32 -.global execute_load_s8 -.global execute_load_s16 -.global execute_store_u8 -.global execute_store_u16 -.global execute_store_u32 -.global execute_aligned_load32 -.global execute_aligned_store32 .global execute_read_cpsr .global execute_read_spsr .global execute_swi @@ -43,14 +33,21 @@ .global execute_lsr_flags_reg .global execute_asr_flags_reg .global execute_ror_flags_reg -.global execute_arm_translate +.global execute_arm_translate_internal .global icache_region_sync .global reg_check .global palette_ram .global palette_ram_converted +.global init_emitter +.global mips_lookup_pc +.global smc_write +.global write_io_epilogue .global memory_map_read .global memory_map_write +.global tmemld +.global tmemst +.global tmemst .global reg .global spsr .global reg_mode @@ -105,7 +102,6 @@ .equ REG_R12, (12 * 4) .equ REG_R13, (13 * 4) .equ REG_R14, (14 * 4) -.equ REG_LR, (14 * 4) .equ REG_PC, (15 * 4) .equ REG_N_FLAG, (16 * 4) .equ REG_Z_FLAG, (17 * 4) @@ -125,6 +121,7 @@ .equ REGMODE_BASE, (0x900 + 24) .equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE) .equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE) +.equ FNPTRS_BASE, (0x900 + 220 + 960) .set noat .set noreorder @@ -201,6 +198,22 @@ lw $30, REG_R14($16) .endm +# PIC ABI mandates to jump to target via $t9 + +#ifdef PIC +.macro cfncall target, targetid + lw $t9, (FNPTRS_BASE + \targetid * 4)($16) + jalr $t9 + nop +.endm +#else +.macro cfncall target, targetid + jal \target + nop +.endm +#endif + + # Process a hardware event. Since an interrupt might be # raised we have to check if the PC has changed. @@ -218,8 +231,8 @@ mips_update_gba: sw $ra, REG_SAVE2($16) # save return addr collapse_flags # update cpsr save_registers # save registers - jal update_gba # process the next event sw $0, CHANGED_PC_STATUS($16) + cfncall update_gba, 0 # process the next event lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame bne $1, $0, return_to_main # Return to main thread now @@ -262,2220 +275,25 @@ return_to_main: mips_indirect_branch_arm: save_registers - jal block_lookup_address_arm # $2 = MIPS address to jump to - nop + cfncall block_lookup_address_arm, 1 restore_registers - jr $2 # jump to it + jr $2 # $2 = value returned nop mips_indirect_branch_thumb: save_registers - jal block_lookup_address_thumb # $2 = MIPS address to jump to - nop + cfncall block_lookup_address_thumb, 2 restore_registers - jr $2 # jump to it + jr $2 # $2 = value returned nop mips_indirect_branch_dual: save_registers - jal block_lookup_address_dual # $2 = MIPS address to jump to + cfncall block_lookup_address_dual, 3 nop restore_registers - jr $2 # jump to it - nop - - -# $4: address to write to -# $5: current PC - -# Will patch the return address with a call to the correct handler as -# listed in the given table. - -# Value will be set to force_open if it's open - -.macro patch_handler ftable, force_open - srl $1, $4, 24 # $1 = address region - sltu $2, $1, 0x0F # check if the value is open - bne $2, $0, 1f - sll $1, $1, 2 # make address word indexed (delay) - - addiu $1, $0, (\force_open * 4) - -1: - lui $2, %hi(\ftable) - addu $2, $2, $1 - lw $2, %lo(\ftable)($2) # new function handler is in $2 - sll $1, $2, 4 # shift left by 4 (6 LSB are zero) - ori $1, $1, 3 # Insert the opcode in the LSB - ror $1, $1, 6 # Rotate to the opcode is now in the MSB - - sw $1, -8($ra) # Overwrite jal instruction w/ new handler - - cache 0x1a, -8($ra) # hit writeback dcache line - cache 0x08, -8($ra) # hit invalidate icache line - jr $2 # Jump to new handler directly - nop - -.endm - - -# Like the above, but will use the table of the proper alignment, -# The tables should be ordered by alignment - -.macro patch_handler_align ftable, alignment - srl $1, $4, 24 # $1 = address region - sltu $2, $1, 0x0F # check if the value is open - bne $2, $0, 1f - sll $1, $1, 2 # make address word indexed (delay) - - addiu $1, $0, 4 # force address to 0x1 (open) - -1: - ins $1, $4, 6, \alignment # place alignment bits into offset - lui $2, %hi(\ftable) - - addu $2, $2, $1 - lw $2, %lo(\ftable)($2) # new function handler is in $2 - - sll $1, $2, 4 # Build the new JAL instruction - ori $1, $1, 3 # same as above. - ror $1, $1, 6 - - sw $1, -8($ra) # modify to call new handler - - cache 0x1a, -8($ra) # hit writeback dcache line - cache 0x08, -8($ra) # hit invalidate icache line - jr $2 # Jump to new handler - nop - -.endm - - -.macro region_check region, patch_handler - srl $1, $4, 24 # check upper 8bits of address - xor $1, $1, \region # see if it is the given region - bne $1, $0, \patch_handler # if not repatch/try again -.endm - -.macro region_check_open patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $2, $1, 0x0F # true if it is a low address - addiu $1, $1, -1 # non-zero if it is not a low open - sltu $1, $0, $1 # true if lower bits != 1 - and $1, $1, $2 # true if low address and not open - bne $1, $0, \patch_handler # if above is true, patch -.endm - - -.macro region_check_align region, align_bits, alignment, patch_handler - srl $1, $4, 24 # check upper 8bits of address - ins $1, $4, 8, \align_bits # look at lower bits of address too - # See if it is the given region and alignment - xori $1, $1, (\region | (\alignment << 8)) - bne $1, $0, \patch_handler # if not repatch/try again -.endm - -.macro region_check_open_align align_bits, alignment, patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $2, $1, 0x0F # true if it is a low address - addiu $1, $1, -1 # non-zero if it is not a low open - sltu $1, $0, $1 # true if $1 != 0 - and $1, $1, $2 # true if low address and not open - ext $2, $4, 0, \align_bits # $2 = low bits of 4 - xori $2, $2, \alignment # true if alignment doesn't match - or $1, $1, $2 # align failure will trigger too - bne $1, $0, \patch_handler # if above is true, patch -.endm - - -.macro ignore_region region, patch_handler - region_check \region, \patch_handler - nop - jr $ra - nop -.endm - -.macro ignore_high patch_handler - srl $1, $4, 24 # check upper 8bits of address - sltiu $1, $1, 0x0F # see if it is not high - bne $1, $0, \patch_handler # if not repatch/try again - nop - jr $ra - nop -.endm - - -.macro translate_region_core base, size - lui $2, %hi(\base) # generate upper address - andi $4, $4, \size # generate offset - addu $2, $2, $4 # add ptr upper and offset -.endm - -.macro translate_region region, patch_handler, base, size - region_check \region, \patch_handler - translate_region_core \base, \size -.endm - -# I refuse to have > 80 char lines, and GAS has a problem with the param -# list spilling over (grumble) - -.macro translate_region_align region, a_b, alignment, p_h, base, size - region_check_align \region, \a_b, \alignment, \p_h - translate_region_core \base, \size -.endm - - -.macro translate_region_ewram_core mask - lui $2, %hi(ewram + 0x8000) # generate upper address (delay) - andi $1, $4, \mask # generate 15bit offset - ext $4, $4, 15, 3 # isolate top 3 bits of offset - ins $1, $4, 16, 3 # reinsert into top 4 bits - addu $2, $2, $1 -.endm - -.macro translate_region_ewram patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFF -.endm - -.macro translate_region_ewram_load_align align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFF -.endm - -.macro translate_region_ewram_load_align16 align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFE -.endm - -.macro translate_region_ewram_load_align32 align_bits, alignment, patch_handler - region_check_align 2, \align_bits, \alignment, \patch_handler - translate_region_ewram_core 0x7FFC -.endm - -.macro translate_region_ewram_store_align16 patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFE -.endm - -.macro translate_region_ewram_store_align32 patch_handler - region_check 2, \patch_handler - translate_region_ewram_core 0x7FFC -.endm - - -.macro translate_region_vram_core - addiu $2, $2, -3 # see if it's 3 - ext $4, $4, 0, 17 # generate 17bit offset - bne $2, $0, 1f - lui $1, %hi(vram) # start loading vram address (delay) - - addiu $4, $4, -0x8000 # move address into VRAM region - -1: - addu $2, $1, $4 # $2 = (hi)vram + address -.endm - -.macro translate_region_vram patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align16 align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 1 # mask out lower bit of address - translate_region_vram_core -.endm - -.macro translate_region_vram_load_align32 align_bits, alignment, patch_handler - region_check_align 6, \align_bits, \alignment, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 2 # mask out lower two bits of address - translate_region_vram_core -.endm - -.macro translate_region_vram_store_align16 patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 1 # mask out lower bit of address - translate_region_vram_core -.endm - -.macro translate_region_vram_store_align32 patch_handler - region_check 6, \patch_handler - ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay) - ins $4, $0, 0, 2 # mask out lower two bits of address - translate_region_vram_core -.endm - - - -.macro translate_region_gamepak_core mask - srl $2, $4, 15 # $2 = page number of address (delay) - sll $2, $2, 2 # adjust to word index - addu $2, $2, $16 # $2 = memory_map_read[address >> 15] - lw $2, -32768($2) - bne $2, $0, 1f # if it's non-NULL continue - andi $1, $4, \mask # $1 = low 15bits of address (delay slot) - - sw $ra, REG_SAVE2($16) # save return address - - save_registers # save the registers - ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF - - jal load_gamepak_page # get page in $2 - sw $1, REG_SAVE($16) # save offset (delay) - lw $1, REG_SAVE($16) # restore offset (delay) - - restore_registers # restore the other registers - - lw $ra, REG_SAVE2($16) # restore return address - -1: - addu $2, $2, $1 # add the memory map offset -.endm - -.macro translate_region_gamepak region, patch_handler - region_check \region, \patch_handler - translate_region_gamepak_core 0x7FFF -.endm - -.macro translate_region_gamepak_align region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFF -.endm - -.macro translate_region_gamepak_align16 region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFE -.endm - -.macro translate_region_gamepak_align32 region, a_b, alignment, patch_handler - region_check_align \region, \a_b, \alignment, \patch_handler - translate_region_gamepak_core 0x7FFC -.endm - - -.macro translate_region_gamepak_a region, patch_handler - region_check \region, \patch_handler - srl $2, $4, 15 # $2 = page number of address (delay) - sll $2, $2, 2 # adjust to word index - addu $2, $2, $16 # $2 = memory_map_read[address >> 15] - lw $2, -32768($2) - bne $2, $0, 1f # if it's non-NULL continue - andi $1, $4, 0x7FFF # $1 = low 15bits of address (delay slot) - - sw $ra, REG_SAVE2($16) # save return address - sw $6, REG_SAVE3($16) # save a2 - - save_registers # save the registers - ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF - - jal load_gamepak_page # get page in $2 - sw $1, REG_SAVE($16) # save offset (delay) - lw $1, REG_SAVE($16) # restore offset (delay) - - restore_registers # restore the other registers - - lw $ra, REG_SAVE2($16) # restore return address - lw $6, REG_SAVE3($16) # restore a2 - -1: - addu $2, $2, $1 # add the memory map offset -.endm - - -.macro eeprom_load_a patch_handler - region_check 0xD, \patch_handler - - sw $ra, REG_SAVE($16) # save the return address (delay) - sw $6, REG_SAVE2($16) # save a2 - - save_registers # save the registers - - jal read_eeprom # get eeprom value in $2 - nop - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 -.endm - - -.macro eeprom_load_core - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal read_eeprom # get eeprom value in $2 - nop - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - -.macro eeprom_load patch_handler - region_check 0xD, \patch_handler - eeprom_load_core -.endm - -.macro eeprom_load_align align_bits, alignment, patch_handler - region_check_align 0xD, \align_bits, \alignment, \patch_handler - eeprom_load_core -.endm - -.macro eeprom_load_align16 align_bits, alignment, patch_handler - eeprom_load_align \align_bits, \alignment, \patch_handler -.endm - -.macro eeprom_load_align32 align_bits, alignment, patch_handler - eeprom_load_align \align_bits, \alignment, \patch_handler -.endm - - -.macro backup_load_core - save_registers # save the registers - - jal read_backup # get backup value in $2 - ext $4, $4, 0, 16 # address &= 0xFFFF - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro backup_load_a patch_handler - region_check 0xE, \patch_handler - sw $ra, REG_SAVE($16) # save return address (delay) - sw $6, REG_SAVE2($16) # save a2 - - save_registers # save the registers - - jal read_backup # get backup value in $2 - ext $4, $4, 0, 16 # address &= 0xFFFF - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 -.endm - - -.macro backup_load patch_handler - region_check 0xE, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - backup_load_core -.endm - -.macro backup_load_align align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - backup_load_core -.endm - -.macro backup_load_align16 align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - ins $4, $0, 0, 1 # mask out lower bit - backup_load_core -.endm - -.macro backup_load_align32 align_bits, alignment, patch_handler - region_check_align 0xE, \align_bits, \alignment, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - ins $4, $0, 0, 2 # mask out lower two bits - backup_load_core -.endm - - -.macro open_load8_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - beq $2, $0, 1f # branch if ARM mode - andi $4, $4, 0x03 # isolate lower 3bits from address (delay) - - andi $4, $4, 0x01 # in Thumb mode, isolate one more bit - -1: - sw $ra, REG_SAVE($16) # save the return address (delay) - save_registers # save the registers - - jal read_memory8 # get instruction at PC - addu $4, $5, $4 # a0 = PC + low bits of address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load8 patch_handler - region_check_open \patch_handler - open_load8_core -.endm - - - -.macro open_load16_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - beq $2, $0, 1f # branch if ARM mode - andi $4, $4, 0x02 # isolate bit 1 from address (delay) - - addu $4, $0, $0 # zero out address bit - -1: - sw $ra, REG_SAVE($16) # save the return address (delay) - save_registers # save the registers - - jal read_memory16 # get instruction at PC - addu $4, $5, $4 # a0 = PC + low bits of address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load16_align align_bits, alignment, patch_handler - region_check_open_align \align_bits, \alignment, \patch_handler - open_load16_core -.endm - -.macro open_load16_align16 align_bits, alignment, patch_handler - open_load16_align \align_bits, \alignment, \patch_handler -.endm - - - -.macro open_load32_core - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - - save_registers # save the registers - - beq $2, $0, 1f # branch if ARM mode - sw $ra, REG_SAVE($16) # save the return address (delay) - - jal read_memory16 # get instruction at PC - addu $4, $5, $0 # a0 = PC - - j 2f - ins $2, $2, 16, 16 # result = (result << 16) | result (delay) - -1: - jal read_memory32 # get instruction at PC - addu $4, $5, $4 # a0 = PC - -2: # join point - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return -.endm - -.macro open_load32_a patch_handler - region_check_open \patch_handler - - lw $2, REG_CPSR($16) # $2 = CPSR (delay) - andi $2, $2, 0x20 # test T bit - - save_registers # save the registers - sw $6, REG_SAVE2($16) # save a2 - - beq $2, $0, 1f # branch if ARM mode - sw $ra, REG_SAVE($16) # save the return address (delay) - - jal read_memory16 # get instruction at PC - addu $4, $5, $0 # a0 = PC - - j 2f - ins $2, $2, 16, 16 # result = (result << 16) | result (delay) - -1: - jal read_memory32 # get instruction at PC - addu $4, $5, $4 # a0 = PC - -2: - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - lw $6, REG_SAVE2($16) # restore a2 (delay) -.endm - -.macro open_load32_align align_bits, alignment, patch_handler - region_check_open_align \align_bits, \alignment, \patch_handler - open_load32_core -.endm - -.macro open_load32_align32 align_bits, alignment, patch_handler - open_load32_align \align_bits, \alignment, \patch_handler -.endm - - -.macro store_function function, region, patch_handler, mask - region_check \region, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal \function # store value out - andi $4, $4, \mask # mask address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - - -.macro store_function_a function, region, patch_handler, mask - region_check \region, \patch_handler - sw $ra, REG_SAVE($16) # save the return address (delay) - - save_registers # save the registers - - jal \function # store value out - andi $4, $4, \mask # mask address - - restore_registers # restore the other registers - - lw $ra, REG_SAVE($16) # restore return address - jr $ra # return - nop -.endm - - - -.macro load_u8 base - jr $ra # return - lbu $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_s8 base - jr $ra # return - lb $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_u16 base - jr $ra # return - lhu $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_s16 base - jr $ra # return - lh $2, %lo(\base)($2) # return base[offset] -.endm - -.macro load_u32 base - jr $ra # return - lw $2, %lo(\base)($2) # return base[offset] -.endm - - -# 16bit unaligned load will always have a 1 in the LSB; -# should have already been taken care of in indexing. - -.macro load_u16_unaligned base - lhu $2, %lo(\base)($2) # load base[offset] - jr $ra # return - ror $2, $2, 8 # rotate value by 8bits -.endm - -# This is technically the same as load_s8, but kept to -# avoid confusion. - -.macro load_s16_unaligned base - jr $ra # return - lb $2, %lo(\base)($2) # return base[offset] -.endm - -# Unalignment must be known statically (use the tables to -# patch correctly) - -.macro load_u32_unaligned base, alignment - lw $2, %lo(\base)($2) # load base[offset] - jr $ra # return - ror $2, $2, (\alignment * 8) # rotate value by 8bits -.endm - - -.macro store_u8 base - jr $ra # return - sb $5, %lo(\base)($2) # store value at base[offset] -.endm - -.macro store_u16 base - jr $ra # return - sh $5, %lo(\base)($2) # store value at base[offset] -.endm - -.macro store_u32 base - jr $ra # return - sw $5, %lo(\base)($2) # store value at base[offset] -.endm - - -# Store the value double mirrored (u16) - -.macro store_u8_double base - ins $5, $5, 8, 8 # value = (value << 8) | value - jr $ra # return - sh $5, %lo(\base)($2) # store value at base[offset] -.endm - - -# Store the values and check if it overwrote code there - -.macro store_u8_smc base - addiu $2, $2, %lo(\base) # offset the address - lb $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sb $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - -.macro store_u16_smc base - addiu $2, $2, %lo(\base) # offset the address - lh $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sh $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - -.macro store_u32_smc base - addiu $2, $2, %lo(\base) # offset the address - lw $1, -32768($2) # load the SMC status - bne $1, $0, smc_write # is there code there? - sw $5, ($2) # store value at base[offset] (delay) - jr $ra # return - nop -.endm - - - -# Unsigned 8bit load handlers - -execute_load_bios_u8: - region_check 0, patch_load_u8 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u8 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # lower 2 bits address contributes - load_u8 bios_read_protect - -2: - open_load8_core - nop - - -execute_load_ewram_u8: - translate_region_ewram patch_load_u8 - load_u8 (ewram + 0x8000) - -# Put the generic address over the handler you want to be default -# IWRAM is typically the most frequently read and written to. - -execute_load_u8: -execute_load_iwram_u8: - translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF - load_u8 (iwram + 0x8000) - -execute_load_io_u8: - translate_region 4, patch_load_u8, io_registers, 0x3FF - load_u8 io_registers - -execute_load_palette_u8: - translate_region 5, patch_load_u8, palette_ram, 0x3FF - load_u8 palette_ram - -execute_load_vram_u8: - translate_region_vram patch_load_u8 - load_u8 vram - -execute_load_oam_u8: - translate_region 7, patch_load_u8, oam_ram, 0x3FF - load_u8 oam_ram - -execute_load_gamepak8_u8: - translate_region_gamepak 8, patch_load_u8 - load_u8 0 - -execute_load_gamepak9_u8: - translate_region_gamepak 9, patch_load_u8 - load_u8 0 - -execute_load_gamepakA_u8: - translate_region_gamepak 10, patch_load_u8 - load_u8 0 - -execute_load_gamepakB_u8: - translate_region_gamepak 11, patch_load_u8 - load_u8 0 - -execute_load_gamepakC_u8: - translate_region_gamepak 12, patch_load_u8 - load_u8 0 - -execute_load_eeprom_u8: - eeprom_load patch_load_u8 - -execute_load_backup_u8: - backup_load patch_load_u8 - nop - -execute_load_open_u8: - open_load8 patch_load_u8 - nop - -load_u8_ftable: - .long execute_load_bios_u8 # 0x00 BIOS - .long execute_load_open_u8 # 0x01 open address - .long execute_load_ewram_u8 # 0x02 EWRAM - .long execute_load_iwram_u8 # 0x03 IWRAM - .long execute_load_io_u8 # 0x04 I/O registers - .long execute_load_palette_u8 # 0x05 Palette RAM - .long execute_load_vram_u8 # 0x06 VRAM - .long execute_load_oam_u8 # 0x07 OAM RAM - .long execute_load_gamepak8_u8 # 0x08 gamepak - .long execute_load_gamepak9_u8 # 0x09 gamepak - .long execute_load_gamepakA_u8 # 0x0A gamepak - .long execute_load_gamepakB_u8 # 0x0B gamepak - .long execute_load_gamepakC_u8 # 0x0C gamepak - .long execute_load_eeprom_u8 # 0x0D gamepak/eeprom - .long execute_load_backup_u8 # 0x0E Flash ROM/SRAM - .long execute_load_open_u8 # 0x0F open address - -patch_load_u8: - patch_handler load_u8_ftable, 0x01 - - - -# Signed 8bit load handlers - -execute_load_bios_s8: - region_check 0, patch_load_s8 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_s8 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # lower 2 bits contribute - load_s8 bios_read_protect - -2: - open_load8_core - seb $2, $2 - - -execute_load_ewram_s8: - translate_region_ewram patch_load_s8 - load_s8 (ewram + 0x8000) - -execute_load_s8: -execute_load_iwram_s8: - translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF - load_s8 (iwram + 0x8000) - -execute_load_io_s8: - translate_region 4, patch_load_s8, io_registers, 0x3FF - load_s8 io_registers - -execute_load_palette_s8: - translate_region 5, patch_load_s8, palette_ram, 0x3FF - load_s8 palette_ram - -execute_load_vram_s8: - translate_region_vram patch_load_s8 - load_s8 vram - -execute_load_oam_s8: - translate_region 7, patch_load_s8, oam_ram, 0x3FF - load_s8 oam_ram - -execute_load_gamepak8_s8: - translate_region_gamepak 8, patch_load_s8 - load_s8 0 - -execute_load_gamepak9_s8: - translate_region_gamepak 9, patch_load_s8 - load_s8 0 - -execute_load_gamepakA_s8: - translate_region_gamepak 10, patch_load_s8 - load_s8 0 - -execute_load_gamepakB_s8: - translate_region_gamepak 11, patch_load_s8 - load_s8 0 - -execute_load_gamepakC_s8: - translate_region_gamepak 12, patch_load_s8 - load_s8 0 - -execute_load_eeprom_s8: - eeprom_load patch_load_s8 - -execute_load_backup_s8: - backup_load patch_load_s8 - seb $2, $2 # sign extend result (delay) - -execute_load_open_s8: - open_load8 patch_load_s8 - seb $2, $2 # sign extend result (delay) - -load_s8_ftable: - .long execute_load_bios_s8 # 0x00 BIOS - .long execute_load_open_s8 # 0x01 open address - .long execute_load_ewram_s8 # 0x02 EWRAM - .long execute_load_iwram_s8 # 0x03 IWRAM - .long execute_load_io_s8 # 0x04 I/O registers - .long execute_load_palette_s8 # 0x05 Palette RAM - .long execute_load_vram_s8 # 0x06 VRAM - .long execute_load_oam_s8 # 0x07 OAM RAM - .long execute_load_gamepak8_s8 # 0x08 gamepak - .long execute_load_gamepak9_s8 # 0x09 gamepak - .long execute_load_gamepakA_s8 # 0x0A gamepak - .long execute_load_gamepakB_s8 # 0x0B gamepak - .long execute_load_gamepakC_s8 # 0x0C gamepak - .long execute_load_eeprom_s8 # 0x0D gamepak/eeprom - .long execute_load_backup_s8 # 0x0E Flash ROM/SRAM - .long execute_load_open_s8 # 0x0F open address - -patch_load_s8: - patch_handler load_s8_ftable, 1 - - - -# Unsigned aligned 16bit load handlers - -execute_load_bios_u16: - region_check_align 0, 1, 0, patch_load_u16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u16 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # bit 1 contributes - load_u16 bios_read_protect - -2: - open_load16_core - nop - -execute_load_ewram_u16: - translate_region_ewram_load_align 1, 0, patch_load_u16 - load_u16 (ewram + 0x8000) - -execute_load_u16: -execute_load_iwram_u16: - translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF - load_u16 (iwram + 0x8000) - -execute_load_io_u16: - translate_region_align 4, 1, 0, patch_load_u16, io_registers, 0x3FF - load_u16 io_registers - -execute_load_palette_u16: - translate_region_align 5, 1, 0, patch_load_u16, palette_ram, 0x3FF - load_u16 palette_ram - -execute_load_vram_u16: - translate_region_vram_load_align 1, 0, patch_load_u16 - load_u16 vram - -execute_load_oam_u16: - translate_region_align 7, 1, 0, patch_load_u16, oam_ram, 0x3FF - load_u16 oam_ram - -execute_load_gamepak8_u16: - translate_region_gamepak_align 8, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepak9_u16: - translate_region_gamepak_align 9, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakA_u16: - translate_region_gamepak_align 10, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakB_u16: - translate_region_gamepak_align 11, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_gamepakC_u16: - translate_region_gamepak_align 12, 1, 0, patch_load_u16 - load_u16 0 - -execute_load_eeprom_u16: - eeprom_load_align 1, 0, patch_load_u16 - -execute_load_backup_u16: - backup_load_align 1, 0, patch_load_u16 - nop - -execute_load_open_u16: - open_load16_align 1, 0, patch_load_u16 - nop - - -# Unsigned unaligned 16bit load handlers - -execute_load_bios_u16u: - region_check_align 0, 1, 1, patch_load_u16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFE # generate offset - addu $2, $2, $4 - load_u16_unaligned bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ext $1, $4, 1, 1 - ins $2, $1, 1, 1 # bit 1 contributes - load_u16_unaligned bios_read_protect - -2: - open_load16_core - ror $2, $2, 8 - - -execute_load_ewram_u16u: - translate_region_ewram_load_align16 1, 1, patch_load_u16 - load_u16_unaligned (ewram + 0x8000) - -execute_load_iwram_u16u: - translate_region_align 3, 1, 1, patch_load_u16, (iwram + 0x8000), 0x7FFE - load_u16_unaligned (iwram + 0x8000) - -execute_load_io_u16u: - translate_region_align 4, 1, 1, patch_load_u16, io_registers, 0x3FE - load_u16_unaligned io_registers - -execute_load_palette_u16u: - translate_region_align 5, 1, 1, patch_load_u16, palette_ram, 0x3FE - load_u16_unaligned palette_ram - -execute_load_vram_u16u: - translate_region_vram_load_align16 1, 1, patch_load_u16 - load_u16_unaligned vram - -execute_load_oam_u16u: - translate_region_align 7, 1, 1, patch_load_u16, oam_ram, 0x3FE - load_u16_unaligned oam_ram - -execute_load_gamepak8_u16u: - translate_region_gamepak_align16 8, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepak9_u16u: - translate_region_gamepak_align16 9, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakA_u16u: - translate_region_gamepak_align16 10, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakB_u16u: - translate_region_gamepak_align16 11, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_gamepakC_u16u: - translate_region_gamepak_align16 12, 1, 1, patch_load_u16 - load_u16_unaligned 0 - -execute_load_eeprom_u16u: - eeprom_load_align16 1, 1, patch_load_u16 - -execute_load_backup_u16u: - backup_load_align16 1, 1, patch_load_u16 - ror $2, $2, 8 # rotate value by 8bits - -execute_load_open_u16u: - open_load16_align16 1, 1, patch_load_u16 - ror $2, $2, 8 # rotate value by 8bits - -load_u16_ftable: - .long execute_load_bios_u16 # 0x00 BIOS - .long execute_load_open_u16 # 0x01 open address - .long execute_load_ewram_u16 # 0x02 EWRAM - .long execute_load_iwram_u16 # 0x03 IWRAM - .long execute_load_io_u16 # 0x04 I/O registers - .long execute_load_palette_u16 # 0x05 Palette RAM - .long execute_load_vram_u16 # 0x06 VRAM - .long execute_load_oam_u16 # 0x07 OAM RAM - .long execute_load_gamepak8_u16 # 0x08 gamepak - .long execute_load_gamepak9_u16 # 0x09 gamepak - .long execute_load_gamepakA_u16 # 0x0A gamepak - .long execute_load_gamepakB_u16 # 0x0B gamepak - .long execute_load_gamepakC_u16 # 0x0C gamepak - .long execute_load_eeprom_u16 # 0x0D gamepak/eeprom - .long execute_load_backup_u16 # 0x0E Flash ROM/SRAM - .long execute_load_open_u16 # 0x0F open - - .long execute_load_bios_u16u # 0x00 BIOS unaligned - .long execute_load_open_u16u # 0x01 open address unaligned - .long execute_load_ewram_u16u # 0x02 EWRAM unaligned - .long execute_load_iwram_u16u # 0x03 IWRAM unaligned - .long execute_load_io_u16u # 0x04 I/O registers unaligned - .long execute_load_palette_u16u # 0x05 Palette RAM unaligned - .long execute_load_vram_u16u # 0x06 VRAM unaligned - .long execute_load_oam_u16u # 0x07 OAM RAM unaligned - .long execute_load_gamepak8_u16u# 0x08 gamepak unaligned - .long execute_load_gamepak9_u16u# 0x09 gamepak unaligned - .long execute_load_gamepakA_u16u# 0x0A gamepak unaligned - .long execute_load_gamepakB_u16u# 0x0B gamepak unaligned - .long execute_load_gamepakC_u16u# 0x0C gamepak unaligned - .long execute_load_eeprom_u16u # 0x0D gamepak/eeprom unaligned - .long execute_load_backup_u16u # 0x0E Flash ROM/SRAM unaligned - .long execute_load_open_u16u # 0x0F open unaligned - -patch_load_u16: - patch_handler_align load_u16_ftable, 1 - -# Signed aligned 16bit load handlers - -execute_load_bios_s16: - region_check_align 0, 1, 0, patch_load_s16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_s16 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ins $2, $4, 0, 2 # bit 1 contributes - load_s16 bios_read_protect - -2: - open_load16_core - seh $2, $2 - - -execute_load_ewram_s16: - translate_region_ewram_load_align 1, 0, patch_load_s16 - load_s16 (ewram + 0x8000) - -execute_load_s16: -execute_load_iwram_s16: - translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF - load_s16 (iwram + 0x8000) - -execute_load_io_s16: - translate_region_align 4, 1, 0, patch_load_s16, io_registers, 0x3FF - load_s16 io_registers - -execute_load_palette_s16: - translate_region_align 5, 1, 0, patch_load_s16, palette_ram, 0x3FF - load_s16 palette_ram - -execute_load_vram_s16: - translate_region_vram_load_align 1, 0, patch_load_s16 - load_s16 vram - -execute_load_oam_s16: - translate_region_align 7, 1, 0, patch_load_s16, oam_ram, 0x3FF - load_s16 oam_ram - -execute_load_gamepak8_s16: - translate_region_gamepak_align 8, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepak9_s16: - translate_region_gamepak_align 9, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakA_s16: - translate_region_gamepak_align 10, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakB_s16: - translate_region_gamepak_align 11, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_gamepakC_s16: - translate_region_gamepak_align 12, 1, 0, patch_load_s16 - load_s16 0 - -execute_load_eeprom_s16: - eeprom_load_align 1, 0, patch_load_s16 - -execute_load_backup_s16: - backup_load_align 1, 0, patch_load_s16 - nop - -execute_load_open_s16: - open_load16_align 1, 0, patch_load_s16 - nop - - -# Signed unaligned 16bit load handlers - -execute_load_bios_s16u: - region_check_align 0, 1, 1, patch_load_s16 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFE # generate offset - addu $2, $1, $4 - load_s16_unaligned bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - ext $1, $4, 1, 1 - ins $2, $1, 1, 1 # bit 1 contributes - load_s16_unaligned bios_read_protect - -2: - open_load16_core - seb $2, $2 - -execute_load_ewram_s16u: - translate_region_ewram_load_align16 1, 1, patch_load_s16 - load_s16_unaligned (ewram + 0x8000) - -execute_load_iwram_s16u: - translate_region_align 3, 1, 1, patch_load_s16, (iwram + 0x8000), 0x7FFE - load_s16_unaligned (iwram + 0x8000) - -execute_load_io_s16u: - translate_region_align 4, 1, 1, patch_load_s16, io_registers, 0x3FE - load_s16_unaligned io_registers - -execute_load_palette_s16u: - translate_region_align 5, 1, 1, patch_load_s16, palette_ram, 0x3FE - load_s16_unaligned palette_ram - -execute_load_vram_s16u: - translate_region_vram_load_align16 1, 1, patch_load_s16 - load_s16_unaligned vram - -execute_load_oam_s16u: - translate_region_align 7, 1, 1, patch_load_s16, oam_ram, 0x3FE - load_s16_unaligned oam_ram - -execute_load_gamepak8_s16u: - translate_region_gamepak_align16 8, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepak9_s16u: - translate_region_gamepak_align16 9, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakA_s16u: - translate_region_gamepak_align16 10, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakB_s16u: - translate_region_gamepak_align16 11, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_gamepakC_s16u: - translate_region_gamepak_align16 12, 1, 1, patch_load_s16 - load_s16_unaligned 0 - -execute_load_eeprom_s16u: - eeprom_load_align 1, 1, patch_load_s16 - -execute_load_backup_s16u: - backup_load_align 1, 1, patch_load_s16 - seb $2, $2 # sign extend result from 8bits - -execute_load_open_s16u: - open_load16_align 1, 1, patch_load_s16 - seb $2, $2 # sign extend result from 8bits - -load_s16_ftable: - .long execute_load_bios_s16 # 0x00 BIOS - .long execute_load_open_s16 # 0x01 open address - .long execute_load_ewram_s16 # 0x02 EWRAM - .long execute_load_iwram_s16 # 0x03 IWRAM - .long execute_load_io_s16 # 0x04 I/O registers - .long execute_load_palette_s16 # 0x05 Palette RAM - .long execute_load_vram_s16 # 0x06 VRAM - .long execute_load_oam_s16 # 0x07 OAM RAM - .long execute_load_gamepak8_s16 # 0x08 gamepak - .long execute_load_gamepak9_s16 # 0x09 gamepak - .long execute_load_gamepakA_s16 # 0x0A gamepak - .long execute_load_gamepakB_s16 # 0x0B gamepak - .long execute_load_gamepakC_s16 # 0x0C gamepak - .long execute_load_eeprom_s16 # 0x0D gamepak/eeprom - .long execute_load_backup_s16 # 0x0E Flash ROM/SRAM - .long execute_load_open_s16 # 0x0F open unaligned - - .long execute_load_bios_s16u # 0x00 BIOS unaligned - .long execute_load_open_s16u # 0x01 open address unaligned - .long execute_load_ewram_s16u # 0x02 EWRAM unaligned - .long execute_load_iwram_s16u # 0x03 IWRAM unaligned - .long execute_load_io_s16u # 0x04 I/O registers unaligned - .long execute_load_palette_s16u # 0x05 Palette RAM unaligned - .long execute_load_vram_s16u # 0x06 VRAM unaligned - .long execute_load_oam_s16u # 0x07 OAM RAM unaligned - .long execute_load_gamepak8_s16u# 0x08 gamepak unaligned - .long execute_load_gamepak9_s16u# 0x09 gamepak unaligned - .long execute_load_gamepakA_s16u# 0x0A gamepak unaligned - .long execute_load_gamepakB_s16u# 0x0B gamepak unaligned - .long execute_load_gamepakC_s16u# 0x0C gamepak unaligned - .long execute_load_eeprom_s16u # 0x0D gamepak/eeprom unaligned - .long execute_load_backup_s16u # 0x0E Flash ROM/SRAM unaligned - .long execute_load_open_s16u # 0x0F open unaligned - -patch_load_s16: - patch_handler_align load_s16_ftable, 1 - - - -# Unsigned aligned 32bit load handlers - -execute_load_bios_u32: - region_check_align 0, 2, 0, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u32 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32 bios_read_protect - -2: - open_load32_core - nop - - -execute_load_ewram_u32: - translate_region_ewram_load_align 2, 0, patch_load_u32 - load_u32 (ewram + 0x8000) - -execute_load_u32: -execute_load_iwram_u32: - translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF - load_u32 (iwram + 0x8000) - -execute_load_io_u32: - translate_region_align 4, 2, 0, patch_load_u32, io_registers, 0x3FF - load_u32 io_registers - -execute_load_palette_u32: - translate_region_align 5, 2, 0, patch_load_u32, palette_ram, 0x3FF - load_u32 palette_ram - -execute_load_vram_u32: - translate_region_vram_load_align 2, 0, patch_load_u32 - load_u32 vram - -execute_load_oam_u32: - translate_region_align 7, 2, 0, patch_load_u32, oam_ram, 0x3FF - load_u32 oam_ram - -execute_load_gamepak8_u32: - translate_region_gamepak_align 8, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepak9_u32: - translate_region_gamepak_align 9, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakA_u32: - translate_region_gamepak_align 10, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakB_u32: - translate_region_gamepak_align 11, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_gamepakC_u32: - translate_region_gamepak_align 12, 2, 0, patch_load_u32 - load_u32 0 - -execute_load_eeprom_u32: - eeprom_load_align 2, 0, patch_load_u32 - -execute_load_backup_u32: - backup_load_align 2, 0, patch_load_u32 - nop - -execute_load_open_u32: - open_load32_align 2, 0, patch_load_u32 - nop - - -# Unsigned unaligned (by 1) 32bit load handlers - -execute_load_bios_u32u1: - region_check_align 0, 2, 1, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 1 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 1 - -2: - open_load32_core - ror $2, $2, 8 - -execute_load_ewram_u32u1: - translate_region_ewram_load_align32 2, 1, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 1 - -execute_load_iwram_u32u1: - translate_region_align 3, 2, 1, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 1 - -execute_load_io_u32u1: - translate_region_align 4, 2, 1, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 1 - -execute_load_palette_u32u1: - translate_region_align 5, 2, 1, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 1 - -execute_load_vram_u32u1: - translate_region_vram_load_align32 2, 1, patch_load_u32 - load_u32_unaligned vram, 1 - -execute_load_oam_u32u1: - translate_region_align 7, 2, 1, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 1 - -execute_load_gamepak8_u32u1: - translate_region_gamepak_align32 8, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepak9_u32u1: - translate_region_gamepak_align32 9, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakA_u32u1: - translate_region_gamepak_align32 10, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakB_u32u1: - translate_region_gamepak_align32 11, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_gamepakC_u32u1: - translate_region_gamepak_align32 12, 2, 1, patch_load_u32 - load_u32_unaligned 0, 1 - -execute_load_eeprom_u32u1: - eeprom_load_align32 2, 1, patch_load_u32 - -execute_load_backup_u32u1: - backup_load_align32 2, 1, patch_load_u32 - ror $2, $2, 8 # rotate value by 8bits - -execute_load_open_u32u1: - open_load32_align32 2, 1, patch_load_u32 - ror $2, $2, 8 # rotate value by 8bits - - -# Unsigned unaligned (by 2) 32bit load handlers - -execute_load_bios_u32u2: - region_check_align 0, 2, 2, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 2 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 2 - -2: - open_load32_core - ror $2, $2, 16 - -execute_load_ewram_u32u2: - translate_region_ewram_load_align32 2, 2, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 2 - -execute_load_iwram_u32u2: - translate_region_align 3, 2, 2, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 2 - -execute_load_io_u32u2: - translate_region_align 4, 2, 2, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 2 - -execute_load_palette_u32u2: - translate_region_align 5, 2, 2, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 2 - -execute_load_vram_u32u2: - translate_region_vram_load_align32 2, 2, patch_load_u32 - load_u32_unaligned vram, 2 - -execute_load_oam_u32u2: - translate_region_align 7, 2, 2, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 2 - -execute_load_gamepak8_u32u2: - translate_region_gamepak_align32 8, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepak9_u32u2: - translate_region_gamepak_align32 9, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakA_u32u2: - translate_region_gamepak_align32 10, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakB_u32u2: - translate_region_gamepak_align32 11, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_gamepakC_u32u2: - translate_region_gamepak_align32 12, 2, 2, patch_load_u32 - load_u32_unaligned 0, 2 - -execute_load_eeprom_u32u2: - eeprom_load_align32 2, 2, patch_load_u32 - -execute_load_backup_u32u2: - backup_load_align32 2, 2, patch_load_u32 - ror $2, $2, 16 # rotate value by 16bits - -execute_load_open_u32u2: - open_load32_align32 2, 2, patch_load_u32 - ror $2, $2, 16 # rotate value by 16bits - -# Unsigned unaligned (by 1) 32bit load handlers - -execute_load_bios_u32u3: - region_check_align 0, 2, 3, patch_load_u32 - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFC # generate offset - addu $2, $2, $4 - load_u32_unaligned bios_rom, 3 - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32_unaligned bios_read_protect, 3 - -2: - open_load32_core - ror $2, $2, 24 - -execute_load_ewram_u32u3: - translate_region_ewram_load_align32 2, 3, patch_load_u32 - load_u32_unaligned (ewram + 0x8000), 3 - -execute_load_iwram_u32u3: - translate_region_align 3, 2, 3, patch_load_u32, (iwram + 0x8000), 0x7FFC - load_u32_unaligned (iwram + 0x8000), 3 - -execute_load_io_u32u3: - translate_region_align 4, 2, 3, patch_load_u32, io_registers, 0x3FC - load_u32_unaligned io_registers, 3 - -execute_load_palette_u32u3: - translate_region_align 5, 2, 3, patch_load_u32, palette_ram, 0x3FC - load_u32_unaligned palette_ram, 3 - -execute_load_vram_u32u3: - translate_region_vram_load_align32 2, 3, patch_load_u32 - load_u32_unaligned vram, 3 - -execute_load_oam_u32u3: - translate_region_align 7, 2, 3, patch_load_u32, oam_ram, 0x3FC - load_u32_unaligned oam_ram, 3 - -execute_load_gamepak8_u32u3: - translate_region_gamepak_align32 8, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepak9_u32u3: - translate_region_gamepak_align32 9, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakA_u32u3: - translate_region_gamepak_align32 10, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakB_u32u3: - translate_region_gamepak_align32 11, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_gamepakC_u32u3: - translate_region_gamepak_align32 12, 2, 3, patch_load_u32 - load_u32_unaligned 0, 3 - -execute_load_eeprom_u32u3: - eeprom_load_align32 2, 3, patch_load_u32 - -execute_load_backup_u32u3: - backup_load_align32 2, 3, patch_load_u32 - ror $2, $2, 24 # rotate value by 24bits - -execute_load_open_u32u3: - open_load32_align32 2, 3, patch_load_u32 - ror $2, $2, 24 # rotate value by 24bits - - -load_u32_ftable: - .long execute_load_bios_u32 # 0x00 BIOS - .long execute_load_open_u32 # 0x01 open address - .long execute_load_ewram_u32 # 0x02 EWRAM - .long execute_load_iwram_u32 # 0x03 IWRAM - .long execute_load_io_u32 # 0x04 I/O registers - .long execute_load_palette_u32 # 0x05 Palette RAM - .long execute_load_vram_u32 # 0x06 VRAM - .long execute_load_oam_u32 # 0x07 OAM RAM - .long execute_load_gamepak8_u32 # 0x08 gamepak - .long execute_load_gamepak9_u32 # 0x09 gamepak - .long execute_load_gamepakA_u32 # 0x0A gamepak - .long execute_load_gamepakB_u32 # 0x0B gamepak - .long execute_load_gamepakC_u32 # 0x0C gamepak - .long execute_load_eeprom_u32 # 0x0D gamepak/eeprom - .long execute_load_backup_u32 # 0x0E Flash ROM/SRAM - .long execute_load_open_u32 # 0x0F open - - .long execute_load_bios_u32u1 # 0x00 BIOS unaligned (1b) - .long execute_load_open_u32u1 # 0x01 open address unaligned (1b) - .long execute_load_ewram_u32u1 # 0x02 EWRAM unaligned (1b) - .long execute_load_iwram_u32u1 # 0x03 IWRAM unaligned (1b) - .long execute_load_io_u32u1 # 0x04 I/O registers unaligned (1b) - .long execute_load_palette_u32u1 # 0x05 Palette RAM unaligned (1b) - .long execute_load_vram_u32u1 # 0x06 VRAM unaligned (1b) - .long execute_load_oam_u32u1 # 0x07 OAM RAM unaligned (1b) - .long execute_load_gamepak8_u32u1 # 0x08 gamepak unaligned (1b) - .long execute_load_gamepak9_u32u1 # 0x09 gamepak unaligned (1b) - .long execute_load_gamepakA_u32u1 # 0x0A gamepak unaligned (1b) - .long execute_load_gamepakB_u32u1 # 0x0B gamepak unaligned (1b) - .long execute_load_gamepakC_u32u1 # 0x0C gamepak unaligned (1b) - .long execute_load_eeprom_u32u1 # 0x0D gamepak/eeprom unaligned (1b) - .long execute_load_backup_u32u1 # 0x0E Flash ROM/SRAM unaligned (1b) - .long execute_load_open_u32u1 # 0x0F open unaligned (1b) - - .long execute_load_bios_u32u2 # 0x00 BIOS unaligned (2b) - .long execute_load_open_u32u2 # 0x01 open address unaligned (2b) - .long execute_load_ewram_u32u2 # 0x02 EWRAM unaligned (2b) - .long execute_load_iwram_u32u2 # 0x03 IWRAM unaligned (2b) - .long execute_load_io_u32u2 # 0x04 I/O registers unaligned (2b) - .long execute_load_palette_u32u2 # 0x05 Palette RAM unaligned (2b) - .long execute_load_vram_u32u2 # 0x06 VRAM unaligned (2b) - .long execute_load_oam_u32u2 # 0x07 OAM RAM unaligned (2b) - .long execute_load_gamepak8_u32u2 # 0x08 gamepak unaligned (2b) - .long execute_load_gamepak9_u32u2 # 0x09 gamepak unaligned (2b) - .long execute_load_gamepakA_u32u2 # 0x0A gamepak unaligned (2b) - .long execute_load_gamepakB_u32u2 # 0x0B gamepak unaligned (2b) - .long execute_load_gamepakC_u32u2 # 0x0C gamepak unaligned (2b) - .long execute_load_eeprom_u32u2 # 0x0D gamepak/eeprom unaligned (2b) - .long execute_load_backup_u32u2 # 0x0E Flash ROM/SRAM unaligned (2b) - .long execute_load_open_u32u2 # 0x0F open unaligned (2b) - - .long execute_load_bios_u32u3 # 0x00 BIOS unaligned (3b) - .long execute_load_open_u32u3 # 0x01 open address unaligned (3b) - .long execute_load_ewram_u32u3 # 0x02 EWRAM unaligned (3b) - .long execute_load_iwram_u32u3 # 0x03 IWRAM unaligned (3b) - .long execute_load_io_u32u3 # 0x04 I/O registers unaligned (3b) - .long execute_load_palette_u32u3 # 0x05 Palette RAM unaligned (3b) - .long execute_load_vram_u32u3 # 0x06 VRAM unaligned (3b) - .long execute_load_oam_u32u3 # 0x07 OAM RAM unaligned (3b) - .long execute_load_gamepak8_u32u3 # 0x08 gamepak unaligned (3b) - .long execute_load_gamepak9_u32u3 # 0x09 gamepak unaligned (3b) - .long execute_load_gamepakA_u32u3 # 0x0A gamepak unaligned (3b) - .long execute_load_gamepakB_u32u3 # 0x0B gamepak unaligned (3b) - .long execute_load_gamepakC_u32u3 # 0x0C gamepak unaligned (3b) - .long execute_load_eeprom_u32u3 # 0x0D gamepak/eeprom unaligned (3b) - .long execute_load_backup_u32u3 # 0x0E Flash ROM/SRAM unaligned (3b) - .long execute_load_open_u32u3 # 0x0F open unaligned (3b) - -patch_load_u32: - patch_handler_align load_u32_ftable, 2 - - - -# Unsigned always aligned 32bit load handlers - -execute_load_bios_u32a: - region_check 0, patch_load_u32a - srl $2, $4, 14 # check if address is in BIOS region - bne $2, $0, 2f # if not, perform open read - srl $1, $5, 14 # check if PC is in BIOS region - bne $1, $0, 1f # if not, perform BIOS protected read - lui $2, %hi(bios_rom) # generate upper address (delay) - - andi $4, $4, 0x3FFF # generate offset - addu $2, $2, $4 - load_u32 bios_rom - -1: - lui $2, %hi(bios_read_protect) # generate upper address - load_u32 bios_read_protect - -2: - open_load32_core - nop - -execute_load_ewram_u32a: - translate_region_ewram patch_load_u32a - load_u32 (ewram + 0x8000) - -execute_aligned_load32: -execute_load_iwram_u32a: - translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF - load_u32 (iwram + 0x8000) - -execute_load_io_u32a: - translate_region 4, patch_load_u32a, io_registers, 0x3FF - load_u32 io_registers - -execute_load_palette_u32a: - translate_region 5, patch_load_u32a, palette_ram, 0x3FF - load_u32 palette_ram - -execute_load_vram_u32a: - translate_region_vram patch_load_u32a - load_u32 vram - -execute_load_oam_u32a: - translate_region 7, patch_load_u32a, oam_ram, 0x3FF - load_u32 oam_ram - -execute_load_gamepak8_u32a: - translate_region_gamepak_a 8, patch_load_u32a - load_u32 0 - -execute_load_gamepak9_u32a: - translate_region_gamepak_a 9, patch_load_u32a - load_u32 0 - -execute_load_gamepakA_u32a: - translate_region_gamepak_a 10, patch_load_u32a - load_u32 0 - -execute_load_gamepakB_u32a: - translate_region_gamepak_a 11, patch_load_u32a - load_u32 0 - -execute_load_gamepakC_u32a: - translate_region_gamepak_a 12, patch_load_u32a - load_u32 0 - -execute_load_eeprom_u32a: - eeprom_load_a patch_load_u32a - -execute_load_backup_u32a: - backup_load_a patch_load_u32a - nop - -execute_load_open_u32a: - open_load32_a patch_load_u32a - -load_u32a_ftable: - .long execute_load_bios_u32a # 0x00 BIOS unaligned (3b) - .long execute_load_open_u32a # 0x01 open address unaligned (3b) - .long execute_load_ewram_u32a # 0x02 EWRAM unaligned (3b) - .long execute_load_iwram_u32a # 0x03 IWRAM unaligned (3b) - .long execute_load_io_u32a # 0x04 I/O registers unaligned (3b) - .long execute_load_palette_u32a # 0x05 Palette RAM unaligned (3b) - .long execute_load_vram_u32a # 0x06 VRAM unaligned (3b) - .long execute_load_oam_u32a # 0x07 OAM RAM unaligned (3b) - .long execute_load_gamepak8_u32a # 0x08 gamepak unaligned (3b) - .long execute_load_gamepak9_u32a # 0x09 gamepak unaligned (3b) - .long execute_load_gamepakA_u32a # 0x0A gamepak unaligned (3b) - .long execute_load_gamepakB_u32a # 0x0B gamepak unaligned (3b) - .long execute_load_gamepakC_u32a # 0x0C gamepak unaligned (3b) - .long execute_load_eeprom_u32a # 0x0D gamepak/eeprom unaligned (3b) - .long execute_load_backup_u32a # 0x0E Flash ROM/SRAM unaligned (3b) - .long execute_load_open_u32a # 0x0F open unaligned (3b) - -patch_load_u32a: - patch_handler load_u32a_ftable, 1 - - -# Unsigned 8bit store handlers - -execute_store_ignore0_u8: - ignore_region 0, patch_store_u8 - -execute_store_ignore1_u8: - ignore_region 1, patch_store_u8 - -execute_store_ewram_u8: - translate_region_ewram patch_store_u8 - store_u8_smc (ewram + 0x8000) - -execute_store_u8: -execute_store_iwram_u8: - translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF - store_u8_smc (iwram + 0x8000) - -execute_store_io_u8: - region_check 4, patch_store_u8 - andi $5, $5, 0xFF # make value 8bit - andi $4, $4, 0x3FF # wrap around address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register8 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u8: - region_check 5, patch_store_u8 - andi $2, $4, 0x3FE # align palette address - ins $5, $5, 8, 8 # double value - addu $2, $2, $16 - sh $5, 0x100($2) # palette_ram[address] = value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - jr $ra # return - sh $1, 0x500($2) - -execute_store_vram_u8: - translate_region_vram_store_align16 patch_store_u8 - store_u8_double vram - -execute_store_oam_u8: - translate_region 7, patch_store_u8, oam_ram, 0x3FE - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u8_double oam_ram - -execute_store_ignore8_u8: - ignore_region 8, patch_store_u8 - -execute_store_ignore9_u8: - ignore_region 9, patch_store_u8 - -execute_store_ignoreA_u8: - ignore_region 10, patch_store_u8 - -execute_store_ignoreB_u8: - ignore_region 11, patch_store_u8 - -execute_store_ignoreC_u8: - ignore_region 12, patch_store_u8 - -execute_store_eeprom_u8: - store_function write_eeprom, 13, patch_store_u8, 0x3FF - -execute_store_backup_u8: - store_function write_backup, 14, patch_store_u8, 0xFFFF - -execute_store_ignoreF_u8: - ignore_high patch_store_u8 - -store_u8_ftable: - .long execute_store_ignore0_u8 # 0x00 BIOS - .long execute_store_ignore1_u8 # 0x01 open address - .long execute_store_ewram_u8 # 0x02 EWRAM - .long execute_store_iwram_u8 # 0x03 IWRAM - .long execute_store_io_u8 # 0x04 I/O registers - .long execute_store_palette_u8 # 0x05 Palette RAM - .long execute_store_vram_u8 # 0x06 VRAM - .long execute_store_oam_u8 # 0x07 OAM RAM - .long execute_store_ignore8_u8 # 0x08 gamepak - .long execute_store_ignore9_u8 # 0x09 gamepak - .long execute_store_ignoreA_u8 # 0x0A gamepak - .long execute_store_ignoreB_u8 # 0x0B gamepak - .long execute_store_ignoreC_u8 # 0x0C gamepak - .long execute_store_eeprom_u8 # 0x0D gamepak/eeprom - .long execute_store_backup_u8 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u8 # 0x0F open address - -patch_store_u8: - patch_handler store_u8_ftable, 0x0F - - -# Unsigned 16bit store handlers - -execute_store_ignore0_u16: - ignore_region 0, patch_store_u16 - -execute_store_ignore1_u16: - ignore_region 1, patch_store_u16 - -execute_store_ewram_u16: - translate_region_ewram_store_align16 patch_store_u16 - store_u16_smc (ewram + 0x8000) - -execute_store_u16: -execute_store_iwram_u16: - translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE - store_u16_smc (iwram + 0x8000) - -execute_store_io_u16: - region_check 4, patch_store_u16 - andi $5, $5, 0xFFFF # make value 16bit - andi $4, $4, 0x3FE # wrap around/align address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register16 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u16: - region_check 5, patch_store_u16 - andi $2, $4, 0x3FE # wrap/align palette address - addu $2, $2, $16 - sh $5, 0x100($2) # palette_ram[address] = value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - jr $ra # return - sh $1, 0x500($2) - -execute_store_vram_u16: - translate_region_vram_store_align16 patch_store_u16 - store_u16 vram - -execute_store_oam_u16: - translate_region 7, patch_store_u16, oam_ram, 0x3FE - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u16 oam_ram - -execute_store_rtc_u16: - store_function write_rtc, 8, patch_store_u16, 0xFE - -execute_store_ignore9_u16: - ignore_region 9, patch_store_u16 - -execute_store_ignoreA_u16: - ignore_region 10, patch_store_u16 - -execute_store_ignoreB_u16: - ignore_region 11, patch_store_u16 - -execute_store_ignoreC_u16: - ignore_region 12, patch_store_u16 - -execute_store_eeprom_u16: - store_function write_eeprom, 13, patch_store_u16, 0x3FE - -execute_store_ignoreE_u16: - ignore_region 14, patch_store_u16 - -execute_store_ignoreF_u16: - ignore_high patch_store_u16 - -store_u16_ftable: - .long execute_store_ignore0_u16 # 0x00 BIOS - .long execute_store_ignore1_u16 # 0x01 open address - .long execute_store_ewram_u16 # 0x02 EWRAM - .long execute_store_iwram_u16 # 0x03 IWRAM - .long execute_store_io_u16 # 0x04 I/O registers - .long execute_store_palette_u16 # 0x05 Palette RAM - .long execute_store_vram_u16 # 0x06 VRAM - .long execute_store_oam_u16 # 0x07 OAM RAM - .long execute_store_rtc_u16 # 0x08 gamepak - .long execute_store_ignore9_u16 # 0x09 gamepak - .long execute_store_ignoreA_u16 # 0x0A gamepak - .long execute_store_ignoreB_u16 # 0x0B gamepak - .long execute_store_ignoreC_u16 # 0x0C gamepak - .long execute_store_eeprom_u16 # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u16 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u16 # 0x0F open address - - -patch_store_u16: - patch_handler store_u16_ftable, 0x0F - - - - -# Unsigned 32bit store handlers - -execute_store_ignore0_u32: - ignore_region 0, patch_store_u32 - -execute_store_ignore1_u32: - ignore_region 1, patch_store_u32 - -execute_store_ewram_u32: - translate_region_ewram_store_align32 patch_store_u32 - store_u32_smc (ewram + 0x8000) - -execute_store_u32: -execute_store_iwram_u32: - translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC - store_u32_smc (iwram + 0x8000) - -execute_store_io_u32: - region_check 4, patch_store_u32 - nop - andi $4, $4, 0x3FC # wrap around/align address - sw $ra, REG_SAVE3($16) # preserve $ra - - save_registers - jal write_io_register32 # write the value out - sw $6, REG_PC($16) # save the PC (delay slot) - j write_io_epilogue # handle any state changes - nop - -execute_store_palette_u32: - region_check 5, patch_store_u32 - andi $2, $4, 0x3FC # wrap/align palette address - addu $2, $2, $16 - sw $5, 0x100($2) # palette_ram[address] = value - - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - sh $1, 0x500($2) - - srl $5, $5, 16 # shift down to next palette value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - - jr $ra # return - sh $1, 0x502($2) - -execute_store_vram_u32: - translate_region_vram_store_align32 patch_store_u32 - store_u32 vram - -execute_store_oam_u32: - translate_region 7, patch_store_u32, oam_ram, 0x3FC - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u32 oam_ram - -execute_store_ignore8_u32: - ignore_region 8, patch_store_u32 - -execute_store_ignore9_u32: - ignore_region 9, patch_store_u32 - -execute_store_ignoreA_u32: - ignore_region 10, patch_store_u32 - -execute_store_ignoreB_u32: - ignore_region 11, patch_store_u32 - -execute_store_ignoreC_u32: - ignore_region 12, patch_store_u32 - -execute_store_eeprom_u32: - store_function write_eeprom, 13, patch_store_u32, 0x3FC - -execute_store_ignoreE_u32: - ignore_region 14, patch_store_u32 - -execute_store_ignoreF_u32: - ignore_high patch_store_u32 - -store_u32_ftable: - .long execute_store_ignore0_u32 # 0x00 BIOS - .long execute_store_ignore1_u32 # 0x01 open address - .long execute_store_ewram_u32 # 0x02 EWRAM - .long execute_store_iwram_u32 # 0x03 IWRAM - .long execute_store_io_u32 # 0x04 I/O registers - .long execute_store_palette_u32 # 0x05 Palette RAM - .long execute_store_vram_u32 # 0x06 VRAM - .long execute_store_oam_u32 # 0x07 OAM RAM - .long execute_store_ignore8_u32 # 0x08 gamepak - .long execute_store_ignore9_u32 # 0x09 gamepak - .long execute_store_ignoreA_u32 # 0x0A gamepak - .long execute_store_ignoreB_u32 # 0x0B gamepak - .long execute_store_ignoreC_u32 # 0x0C gamepak - .long execute_store_eeprom_u32 # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u32 # 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u32 # 0x0F open address - - -patch_store_u32: - patch_handler store_u32_ftable, 0x0F - - - -# Unsigned always aligned, a2 safe 32bit store handlers - -execute_store_ignore0_u32a: - ignore_region 0, patch_store_u32a - -execute_store_ignore1_u32a: - ignore_region 1, patch_store_u32a - -execute_store_ewram_u32a: - translate_region_ewram_store_align32 patch_store_u32a - store_u32 (ewram + 0x8000) - -execute_aligned_store32: -execute_store_iwram_u32a: - translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC - store_u32 (iwram + 0x8000) - -execute_store_io_u32a: - region_check 4, patch_store_u32a + jr $2 # $2 = value returned nop - sw $6, REG_SAVE($16) # save a2 - sw $ra, REG_SAVE2($16) # save ra - - andi $4, $4, 0x3FC # wrap around/align address - - save_registers - jal write_io_register32 # write the value out - nop - - restore_registers - - lw $ra, REG_SAVE2($16) # restore ra - jr $ra - lw $6, REG_SAVE($16) # restore a2 - -execute_store_palette_u32a: - region_check 5, patch_store_u32a - andi $2, $4, 0x3FC # wrap/align palette address - addu $2, $2, $16 - sw $5, 0x100($2) # palette_ram[address] = value - - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - sh $1, 0x500($2) - - srl $5, $5, 16 # shift down to next palette value - sll $1, $5, 1 # make green 6bits - ins $1, $0, 0, 6 # make bottom bit 0 - ins $1, $5, 0, 5 # insert red channel into $1 - - jr $ra # return - sh $1, 0x502($2) - -execute_store_vram_u32a: - translate_region_vram_store_align32 patch_store_u32a - store_u32 vram - -execute_store_oam_u32a: - translate_region 7, patch_store_u32a, oam_ram, 0x3FC - lui $1, %hi(oam_update) # write non-zero to oam_update - sw $1, %lo(oam_update)($1) # cheap, but this is non-zero - store_u32 oam_ram - -execute_store_ignore8_u32a: - ignore_region 8, patch_store_u32a - -execute_store_ignore9_u32a: - ignore_region 9, patch_store_u32a - -execute_store_ignoreA_u32a: - ignore_region 10, patch_store_u32a - -execute_store_ignoreB_u32a: - ignore_region 11, patch_store_u32a - -execute_store_ignoreC_u32a: - ignore_region 12, patch_store_u32a - -execute_store_eeprom_u32a: - store_function_a write_eeprom, 13, patch_store_u32a, 0x3FC - -execute_store_ignoreE_u32a: - ignore_region 14, patch_store_u32a - -execute_store_ignoreF_u32a: - ignore_high patch_store_u32a - -store_u32a_ftable: - .long execute_store_ignore0_u32a# 0x00 BIOS - .long execute_store_ignore1_u32a# 0x01 open address - .long execute_store_ewram_u32a # 0x02 EWRAM - .long execute_store_iwram_u32a # 0x03 IWRAM - .long execute_store_io_u32a # 0x04 I/O registers - .long execute_store_palette_u32a# 0x05 Palette RAM - .long execute_store_vram_u32a # 0x06 VRAM - .long execute_store_oam_u32a # 0x07 OAM RAM - .long execute_store_ignore8_u32a# 0x08 gamepak - .long execute_store_ignore9_u32a# 0x09 gamepak - .long execute_store_ignoreA_u32a# 0x0A gamepak - .long execute_store_ignoreB_u32a# 0x0B gamepak - .long execute_store_ignoreC_u32a# 0x0C gamepak - .long execute_store_eeprom_u32a # 0x0D gamepak/eeprom - .long execute_store_ignoreE_u32a# 0x0E Flash ROM/SRAM - .long execute_store_ignoreF_u32a# 0x0F open address - -patch_store_u32a: - patch_handler store_u32a_ftable, 0x0F write_io_epilogue: @@ -2491,8 +309,7 @@ write_io_epilogue: alert_loop: - jal update_gba # process the next event - nop + cfncall update_gba, 0 # process the next event lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame bne $1, $0, return_to_main # Return to main thread now @@ -2519,16 +336,16 @@ no_alert: nop smc_dma: - jal flush_translation_cache_ram # flush translation cache - nop + cfncall flush_translation_cache_ram, 4 j lookup_pc nop smc_write: save_registers - jal flush_translation_cache_ram # flush translation cache - sw $6, REG_PC($16) # save PC (delay slot) + sw $6, REG_PC($16) # save PC + cfncall flush_translation_cache_ram, 4 +mips_lookup_pc: lookup_pc: lw $2, REG_CPSR($16) # $2 = cpsr andi $2, $2, 0x20 # isolate mode bit @@ -2536,17 +353,17 @@ lookup_pc: nop lookup_pc_thumb: - jal block_lookup_address_thumb # get Thumb address - lw $4, REG_PC($16) # load PC as arg 0 (delay slot) + lw $4, REG_PC($16) # load PC as arg 0 + cfncall block_lookup_address_thumb, 2 # get Thumb address restore_registers - jr $2 # jump to result + jr $2 # jump to result nop lookup_pc_arm: - jal block_lookup_address_arm # get ARM address - lw $4, REG_PC($16) # load PC as arg 0 (delay slot) + lw $4, REG_PC($16) # load PC as arg 0 + cfncall block_lookup_address_arm, 1 # get ARM address restore_registers - jr $2 # jump to result + jr $2 # jump to result nop # Return the current cpsr @@ -2578,8 +395,8 @@ execute_swi: ori $2, 0x13 # set mode to supervisor sw $2, REG_CPSR($16) # write back CPSR save_registers - jal set_cpu_mode # set the CPU mode to supervisor - li $4, 3 # 3 is supervisor mode (delay slot) + li $4, 3 # 3 is supervisor mode + cfncall set_cpu_mode, 5 # set the CPU mode to supervisor restore_registers lw $ra, ($sp) # pop $ra jr $ra # return @@ -2601,8 +418,7 @@ execute_spsr_restore: addiu $sp, $sp, -4 sw $ra, ($sp) save_registers - jal execute_spsr_restore_body # do the dirty work in this C function - nop + cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function restore_registers addu $4, $2, $0 # move return value to $4 lw $ra, ($sp) @@ -2624,27 +440,26 @@ execute_store_cpsr: and $2, $2, $4 # $2 = (cpsr & (~store_mask)) or $1, $1, $2 # $1 = new cpsr combined with old extract_flags_body # extract flags from $1 - addiu $sp, $sp, -4 - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) save_registers - jal execute_store_cpsr_body # do the dirty work in this C function - addu $4, $1, $0 # load the new CPSR (delay slot) + addu $4, $1, $0 # load the new CPSR + cfncall execute_store_cpsr_body, 7 # do the dirty work in this C function bne $2, $0, changed_pc_cpsr # this could have changed the pc nop restore_registers - lw $ra, ($sp) + lw $ra, REG_SAVE3($16) jr $ra - addiu $sp, $sp, 4 + nop changed_pc_cpsr: - jal block_lookup_address_arm # GBA address is in $4 - addu $4, $2, $0 # load new address in $4 (delay slot) - restore_registers # restore registers - jr $2 # jump to the new address - addiu $sp, $sp, 4 # get rid of the old ra (delay slot) + addu $4, $2, $0 # load new address in $4 + cfncall block_lookup_address_arm, 1 # GBA address is in $4 + restore_registers # restore registers + jr $2 # jump to the new address + nop # $4: new spsr @@ -2747,8 +562,9 @@ ror_zero_shift: rotrv $4, $4, $5 # return (value ror shift) delay # $4: cycle counter argument +# $5: pointer to reg -execute_arm_translate: +execute_arm_translate_internal: add $sp, $sp, -48 # Store the main thread context sw $s0, 0($sp) sw $s1, 4($sp) @@ -2761,9 +577,7 @@ execute_arm_translate: sw $fp, 32($sp) sw $ra, 36($sp) - lui $16, %hi(reg) # load reg address into base reg - addiu $16, %lo(reg) - + move $16, $5 sw $28, GP_SAVE($16) addu $17, $4, $0 # load cycle counter register @@ -2780,15 +594,13 @@ execute_arm_translate: bne $1, $0, 1f lw $4, REG_PC($16) # load PC into $4 (delay) - jal block_lookup_address_arm # lookup initial jump address - nop + cfncall block_lookup_address_arm, 1 restore_registers # load initial register values jr $2 # jump to return nop 1: - jal block_lookup_address_thumb # lookup initial jump address - nop + cfncall block_lookup_address_thumb, 2 restore_registers # load initial register values jr $2 # jump to return nop @@ -2797,11 +609,14 @@ execute_arm_translate: .data .align 6 +memory_map_write: + .space 0x8000 + memory_map_read: .space 0x8000 -# This must be between memory_map_read and memory_map_write because it's used -# to calculate their addresses elsewhere in this file. +# memory_map_read is immediately before arm_reg on purpose (offset used +# to access it, via lw op). We do not use write though. reg: .space 0x100 @@ -2815,5 +630,22 @@ spsr: reg_mode: .space 196 # u32[7][7]; -memory_map_write: - .space 0x8000 +# Here we store: +# void *tmemld[11][16]; # 10 types of loads +# void *tmemst[ 4][16]; # 3 types of stores +# Essentially a list of pointers to the different mem load handlers +# Keep them close for a fast patcher. +tmemld: + .space 704 +tmemst: + .space 256 +fnptrs: + .long update_gba # 0 + .long block_lookup_address_arm # 1 + .long block_lookup_address_thumb # 2 + .long block_lookup_address_dual # 3 + .long flush_translation_cache_ram # 4 + .long set_cpu_mode # 5 + .long execute_spsr_restore_body # 6 + .long execute_store_cpsr_body # 7 + diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 4839618..67a3dc2 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -2299,4 +2299,6 @@ void function_cc swi_hle_div(void) generate_update_pc(pc); \ generate_indirect_branch_no_cycle_update(type) \ +void init_emitter(void) {} + #endif |