summaryrefslogtreecommitdiff
path: root/psp
diff options
context:
space:
mode:
authorDavid Guillen Fandos2021-03-03 01:38:09 +0100
committerDavid Guillen Fandos2021-03-16 22:58:58 +0100
commit5ffd2832e8b3fc8391a99a53d24788fb736d28c6 (patch)
tree3079050fea908fd3b7fcf26ee41a1b2af98df354 /psp
parentb9ac4536757c4c24afaa86b6e3131ec21e407e80 (diff)
downloadpicogpsp-5ffd2832e8b3fc8391a99a53d24788fb736d28c6.tar.gz
picogpsp-5ffd2832e8b3fc8391a99a53d24788fb736d28c6.tar.bz2
picogpsp-5ffd2832e8b3fc8391a99a53d24788fb736d28c6.zip
Rewrite of the MIPS dynarec stubs
This allows us to emit the handlers directly in a more efficient manner. At the same time it allows for an easy fix to emit PIC code, which is necessary for libretro. This also enables more platform specific optimizations and variations, perhaps even run-time multiplatform support.
Diffstat (limited to 'psp')
-rw-r--r--psp/mips_emit.h882
-rw-r--r--psp/mips_stub.S77
2 files changed, 922 insertions, 37 deletions
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index 7c69091..48ed630 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -20,6 +20,19 @@
#ifndef MIPS_EMIT_H
#define MIPS_EMIT_H
+// Pointers to default handlers.
+// Use IWRAM as default, assume aligned by default too
+#define execute_load_u8 tmemld[0][3]
+#define execute_load_s8 tmemld[1][3]
+#define execute_load_u16 tmemld[2][3]
+#define execute_load_s16 tmemld[4][3]
+#define execute_load_u32 tmemld[6][3]
+#define execute_aligned_load32 tmemld[10][3]
+#define execute_store_u8 tmemst[0][3]
+#define execute_store_u16 tmemst[1][3]
+#define execute_store_u32 tmemst[2][3]
+#define execute_aligned_store32 tmemst[3][3]
+
u32 mips_update_gba(u32 pc);
// Although these are defined as a function, don't call them as
@@ -44,9 +57,6 @@ u32 execute_lsr_flags_reg(u32 value, u32 shift);
u32 execute_asr_flags_reg(u32 value, u32 shift);
u32 execute_ror_flags_reg(u32 value, u32 shift);
-void execute_aligned_store32(u32 address, u32 value);
-u32 execute_aligned_load32(u32 address);
-
void reg_check();
typedef enum
@@ -97,6 +107,7 @@ typedef enum
mips_special_jalr = 0x09,
mips_special_movz = 0x0A,
mips_special_movn = 0x0B,
+ mips_special_sync = 0x0F,
mips_special_mfhi = 0x10,
mips_special_mthi = 0x11,
mips_special_mflo = 0x12,
@@ -116,7 +127,9 @@ typedef enum
mips_special_xor = 0x26,
mips_special_nor = 0x27,
mips_special_slt = 0x2A,
- mips_special_sltu = 0x2B
+ mips_special_sltu = 0x2B,
+ mips_special_max = 0x2C,
+ mips_special_min = 0x2D,
} mips_function_special;
typedef enum
@@ -128,8 +141,16 @@ typedef enum
typedef enum
{
+ mips_bshfl_seb = 0x10,
+ mips_bshfl_seh = 0x18,
+ mips_bshfl_wsbh = 0x02,
+} mips_function_bshfl;
+
+typedef enum
+{
mips_regimm_bltz = 0x00,
- mips_regimm_bltzal = 0x10
+ mips_regimm_bltzal = 0x10,
+ mips_regimm_synci = 0x1F
} mips_function_regimm;
typedef enum
@@ -163,8 +184,14 @@ typedef enum
mips_opcode_sb = 0x28,
mips_opcode_sh = 0x29,
mips_opcode_sw = 0x2B,
+ mips_opcode_cache = 0x2F,
} mips_opcode;
+#define mips_emit_cache(operation, rs, immediate) \
+ *((u32 *)translation_ptr) = (mips_opcode_cache << 26) | \
+ (rs << 21) | (operation << 16) | (immediate & 0xFFFF); \
+ translation_ptr += 4 \
+
#define mips_emit_reg(opcode, rs, rt, rd, shift, function) \
*((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
(rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | function; \
@@ -184,12 +211,12 @@ typedef enum
#define mips_emit_imm(opcode, rs, rt, immediate) \
*((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
- (rs << 21) | (rt << 16) | (immediate & 0xFFFF); \
+ (rs << 21) | (rt << 16) | ((immediate) & 0xFFFF); \
translation_ptr += 4 \
#define mips_emit_regimm(function, rs, immediate) \
*((u32 *)translation_ptr) = (mips_opcode_regimm << 26) | \
- (rs << 21) | (mips_regimm_##function << 16) | (immediate & 0xFFFF); \
+ (rs << 21) | (mips_regimm_##function << 16) | ((immediate) & 0xFFFF); \
translation_ptr += 4 \
#define mips_emit_jump(opcode, offset) \
@@ -203,6 +230,12 @@ typedef enum
#define mips_absolute_offset(offset) \
((u32)offset / 4) \
+#define mips_emit_max(rd, rs, rt) \
+ mips_emit_special(max, rs, rt, rd, 0) \
+
+#define mips_emit_min(rd, rs, rt) \
+ mips_emit_special(min, rs, rt, rd, 0) \
+
#define mips_emit_addu(rd, rs, rt) \
mips_emit_special(addu, rs, rt, rd, 0) \
@@ -293,6 +326,9 @@ typedef enum
#define mips_emit_movz(rd, rs, rt) \
mips_emit_special(movz, rs, rt, rd, 0) \
+#define mips_emit_sync() \
+ mips_emit_special(sync, 0, 0, 0, 0) \
+
#define mips_emit_lb(rt, rs, offset) \
mips_emit_imm(lb, rs, rt, offset) \
@@ -344,6 +380,12 @@ typedef enum
#define mips_emit_ins(rt, rs, pos, size) \
mips_emit_special3(ins, rs, rt, (pos + size - 1), pos) \
+#define mips_emit_seb(rt, rd) \
+ mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seb) \
+
+#define mips_emit_seh(rt, rd) \
+ mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seh) \
+
// Breaks down if the backpatch offset is greater than 16bits, take care
// when using (should be okay if limited to conditional instructions)
@@ -369,9 +411,15 @@ typedef enum
#define mips_emit_jr(rs) \
mips_emit_special(jr, rs, 0, 0, 0) \
+#define mips_emit_synci(rs, offset) \
+ mips_emit_regimm(synci, rs, offset) \
+
#define mips_emit_bltzal(rs, offset) \
mips_emit_regimm(bltzal, rs, offset) \
+#define mips_emit_bltz(rs, offset) \
+ mips_emit_regimm(bltz, rs, offset) \
+
#define mips_emit_nop() \
mips_emit_sll(reg_zero, reg_zero, 0) \
@@ -566,6 +614,15 @@ u32 arm_to_mips_reg[] =
translation_ptr += 4; \
} \
+#define generate_function_return_swap_delay() \
+{ \
+ u32 delay_instruction = address32(translation_ptr, -4); \
+ translation_ptr -= 4; \
+ mips_emit_jr(mips_reg_ra); \
+ address32(translation_ptr, 0) = delay_instruction; \
+ translation_ptr += 4; \
+} \
+
#define generate_swap_delay() \
{ \
u32 delay_instruction = address32(translation_ptr, -8); \
@@ -2468,4 +2525,815 @@ u8 swi_hle_handle[256] =
generate_load_pc(reg_a0, pc); \
mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \
+
+// Register save layout as follows:
+#define ReOff_RegPC (15*4) // REG_PC
+#define ReOff_CPSR (20*4) // REG_CPSR
+#define ReOff_SaveR1 (21*4) // 3 save scratch regs
+#define ReOff_SaveR2 (22*4)
+#define ReOff_SaveR3 (23*4)
+#define ReOff_GP_Save (32*4) // GP_SAVE
+
+// Saves all regs to their right slot and loads gp
+#define emit_save_regs(save_a2) \
+ for (unsigned i = 0; i < 15; i++) { \
+ mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \
+ } \
+ if (save_a2) { \
+ mips_emit_sw(reg_a2, reg_base, ReOff_SaveR2); \
+ } \
+ /* Load the gp pointer, used by C code */ \
+ mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \
+
+// Restores the registers from their slot
+#define emit_restore_regs(restore_a2) \
+ if (restore_a2) { \
+ mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \
+ } \
+ for (unsigned i = 0; i < 15; i++) { \
+ mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \
+ } \
+
+// Emits a function call for a read or a write (for special stuff like flash)
+#define emit_mem_call_ds(fnptr, mask) \
+ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \
+ emit_save_regs(true); \
+ mips_emit_jal(((u32)(fnptr)) >> 2); \
+ mips_emit_andi(reg_a0, reg_a0, (mask)); \
+ emit_restore_regs(true); \
+ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \
+ mips_emit_jr(mips_reg_ra);
+
+#define emit_mem_call(fnptr, mask) \
+ emit_mem_call_ds(fnptr, mask) \
+ mips_emit_nop();
+
+// Pointer table to stubs, indexed by type and region
+// Caution! This is not really a ptr table, but contains pre-encoed JALs
+extern u32 tmemld[11][16];
+extern u32 tmemst[ 4][16];
+void mips_lookup_pc();
+cpu_alert_type write_io_register8 (u32 address, u32 value);
+cpu_alert_type write_io_register16(u32 address, u32 value);
+cpu_alert_type write_io_register32(u32 address, u32 value);
+void write_io_epilogue();
+
+// This is a pointer table to the open load stubs, used by the BIOS (optimization)
+u32* openld_core_ptrs[11];
+
+const u8 ldhldrtbl[11] = {0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5};
+#define ld_phndlr_branch(memop) \
+ (((u32*)&stub_arena[ldhldrtbl[(memop)] * 16]) - ((u32*)translation_ptr + 1))
+
+#define st_phndlr_branch(memop) \
+ (((u32*)&stub_arena[((memop) + 6) * 16]) - ((u32*)translation_ptr + 1))
+
+#define branch_handlerid(phndlrid) \
+ (((u32*)&stub_arena[(phndlrid) * 16]) - ((u32*)translation_ptr + 1))
+
+#define branch_offset(ptr) \
+ (((u32*)ptr) - ((u32*)translation_ptr + 1))
+
+static void emit_mem_access_loadop(
+ u8 *translation_ptr,
+ u32 base_addr, unsigned size, unsigned alignment, bool signext)
+{
+ switch (size) {
+ case 2:
+ mips_emit_lw(reg_rv, reg_rv, (base_addr & 0xffff));
+ break;
+ case 1:
+ if (signext) {
+ // Load 16 with sign extension is essentially a load byte
+ if (alignment) {
+ mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff));
+ } else {
+ mips_emit_lh(reg_rv, reg_rv, (base_addr & 0xffff));
+ }
+ } else {
+ mips_emit_lhu(reg_rv, reg_rv, (base_addr & 0xffff));
+ }
+ break;
+ default:
+ if (signext) {
+ mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff));
+ } else {
+ mips_emit_lbu(reg_rv, reg_rv, (base_addr & 0xffff));
+ }
+ break;
+ };
+}
+
+// Stub memory map:
+// 0 .. 63 First patch handler [#0]
+// 448 .. 511 Last patch handler [#7]
+// 512+ smc_write handler
+#define SMC_WRITE_OFF32 160
+
+// Describes a "plain" memory are, that is, an area that is just accessed
+// as normal memory (with some caveats tho).
+typedef struct {
+ void *emitter;
+ unsigned region; // Region ID (top 8 bits)
+ unsigned memsize; // 0 byte, 1 halfword, 2 word
+ bool check_smc; // Whether the memory can contain code
+ bool bus16; // Whether it can only be accessed at 16bit
+ u32 baseptr; // Memory base address.
+} t_stub_meminfo;
+
+// Generates the stub to access memory for a given region, access type,
+// size and misalignment.
+// Handles "special" cases like weirdly mapped memory
+static void emit_pmemld_stub(
+ unsigned memop_number, const t_stub_meminfo *meminfo,
+ bool signext, unsigned size,
+ unsigned alignment, bool aligned,
+ u8 **tr_ptr)
+{
+ u8 *translation_ptr = *tr_ptr;
+ unsigned region = meminfo->region;
+ u32 base_addr = meminfo->baseptr;
+
+ if (region >= 9 && region <= 11) {
+ // Use the same handler for these regions (just replicas)
+ tmemld[memop_number][region] = tmemld[memop_number][8];
+ return;
+ }
+
+ // Clean up one or two bits (to align access). It might already be aligned!
+ u32 memmask = (meminfo->memsize - 1);
+ memmask = (memmask >> size) << size; // Clear 1 or 2 (or none) bits
+
+ // Add the stub to the table (add the JAL instruction encoded already)
+ tmemld[memop_number][region] = (u32)translation_ptr;
+
+ // Size: 0 (8 bits), 1 (16 bits), 2 (32 bits)
+ // First check we are in the right memory region
+ unsigned regionbits = 8;
+ unsigned regioncheck = region;
+ if (region == 8) {
+ // This is an optimization for ROM regions
+ // For region 8-11 we reuse the same code (and have a more generic check)
+ // Region 12 is harder to cover without changing the check (shift + xor)
+ regionbits = 6;
+ regioncheck >>= 2; // Ignore the two LSB, don't care
+ }
+
+ // Address checking: jumps to handler if bad region/alignment
+ mips_emit_srl(reg_temp, reg_a0, (32 - regionbits));
+ if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits
+ mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment
+ }
+ if (regioncheck || alignment) { // If region and alignment are zero, can skip
+ mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits));
+ }
+
+ // The patcher to use depends on ld/st, access size, and sign extension
+ // (so there's 10 of them). They live in the top stub addresses.
+ mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memop_number));
+
+ // BIOS region requires extra checks for protected reads
+ if (region == 0) {
+ // BIOS is *not* mirrored, check that
+ mips_emit_srl(reg_rv, reg_a0, 14);
+ unsigned joff = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1));
+ mips_emit_b(bne, reg_zero, reg_rv, joff); // Jumps to read open
+
+ // Check whether the read is allowed. Only within BIOS!
+ // TODO: FIX THIS! This should be a protected read, not an open one!
+ mips_emit_srl(reg_temp, reg_a1, 14);
+ unsigned jof2 = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1));
+ mips_emit_b(bne, reg_zero, reg_temp, jof2);
+ }
+
+ if (region >= 8 && region <= 12) {
+ u8 *jmppatch;
+ // ROM area: might need to load the ROM on-demand
+ mips_emit_srl(reg_rv, reg_a0, 15); // 32KB page number
+ mips_emit_sll(reg_rv, reg_rv, 2); // (word indexed)
+ mips_emit_addu(reg_rv, reg_rv, reg_base); // base + offset
+
+ mips_emit_lw(reg_rv, reg_rv, 0x8000); // base[offset-0x8000]
+ mips_emit_b_filler(bne, reg_rv, reg_zero, jmppatch); // if not null, can skip load page
+ mips_emit_andi(reg_temp, reg_a0, memmask); // Get the lowest 15 bits [delay]
+
+ // This code call the C routine to map the relevant ROM page
+ emit_save_regs(aligned);
+ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
+ mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
+ mips_emit_jal(((u32)&load_gamepak_page) >> 2);
+ mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
+
+ mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1);
+ emit_restore_regs(aligned);
+ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3);
+
+ generate_branch_patch_conditional(jmppatch, translation_ptr);
+ // Now we can proceed to load, place addr in the right register
+ mips_emit_addu(reg_rv, reg_rv, reg_temp);
+ } else if (region == 14) {
+ // Read from flash, is a bit special, fn call
+ emit_mem_call_ds(&read_backup, 0xFFFF);
+ if (!size && signext) {
+ mips_emit_seb(reg_rv, reg_rv);
+ } else if (size == 1 && alignment) {
+ mips_emit_seb(reg_rv, reg_rv);
+ } else if (size == 2) {
+ mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
+ } else {
+ mips_emit_nop();
+ }
+ *tr_ptr = translation_ptr;
+ return;
+ } else {
+ // Generate upper bits of the addr and do addr mirroring
+ // (The address hi16 is rounded up since load uses signed offset)
+ mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
+
+ if (region == 2) {
+ // EWRAM is a bit special
+ // Need to insert a zero in the addr (due to how it's mapped)
+ mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
+ mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
+ mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
+ mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
+ } else if (region == 6) {
+ // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
+ mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
+ mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
+ if (!aligned && alignment != 0) {
+ mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
+ }
+ mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
+ mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
+ mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
+ } else {
+ // Generate regular (<=32KB) mirroring
+ mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
+ mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
+ }
+ }
+
+ // Aligned accesses (or the weird s16u1 case) are just one inst
+ if (alignment == 0 || (size == 1 && signext)) {
+ emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot
+ translation_ptr += 4;
+ }
+ else {
+ // Unaligned accesses (require rotation) need two insts
+ emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
+ translation_ptr += 4;
+ mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot
+ }
+
+ generate_function_return_swap_delay(); // Return. Move prev inst to delay slot
+ *tr_ptr = translation_ptr;
+}
+
+// Generates the stub to store memory for a given region and size
+// Handles "special" cases like weirdly mapped memory
+static void emit_pmemst_stub(
+ unsigned memop_number, const t_stub_meminfo *meminfo,
+ unsigned size, bool aligned, u8 **tr_ptr)
+{
+ u8 *translation_ptr = *tr_ptr;
+ unsigned region = meminfo->region;
+ u32 base_addr = meminfo->baseptr;
+
+ // Palette, VRAM and OAM cannot be really byte accessed (use a 16 bit store)
+ bool doubleaccess = (size == 0 && meminfo->bus16);
+ unsigned realsize = size;
+ if (doubleaccess)
+ realsize = 1;
+
+ // Clean up one or two bits (to align access). It might already be aligned!
+ u32 memmask = (meminfo->memsize - 1);
+ memmask = (memmask >> realsize) << realsize;
+
+ // Add the stub to the table (add the JAL instruction encoded already)
+ tmemst[memop_number][region] = (u32)translation_ptr;
+
+ // First check we are in the right memory region (same as loads)
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_temp, reg_temp, region);
+ mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
+
+ mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
+
+ if (doubleaccess) {
+ mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
+ }
+
+ if (region == 2) {
+ // EWRAM is a bit special
+ // Need to insert a zero in the addr (due to how it's mapped)
+ mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
+ mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
+ mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
+ mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
+ } else if (region == 6) {
+ // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
+ mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
+ mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
+ if (!aligned && realsize != 0) {
+ mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size)
+ }
+ mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
+ mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
+ mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
+ } else {
+ // Generate regular (<=32KB) mirroring
+ mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
+ mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
+ }
+
+ // Generate SMC write and tracking
+ // TODO: Should we have SMC checks here also for aligned?
+ if (meminfo->check_smc && !aligned) {
+ mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer
+ if (realsize == 2) {
+ mips_emit_lw(reg_temp, reg_temp, base_addr);
+ } else if (realsize == 1) {
+ mips_emit_lh(reg_temp, reg_temp, base_addr);
+ } else {
+ mips_emit_lb(reg_temp, reg_temp, base_addr);
+ }
+ // If the data is non zero, we just wrote over code
+ // Local-jump to the smc_write (which lives at offset:0)
+ unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1));
+ mips_emit_b(bne, reg_zero, reg_temp, instoffset);
+ }
+
+ // Store the data (delay slot from the SMC branch)
+ if (realsize == 2) {
+ mips_emit_sw(reg_a1, reg_rv, base_addr);
+ } else if (realsize == 1) {
+ mips_emit_sh(reg_a1, reg_rv, base_addr);
+ } else {
+ mips_emit_sb(reg_a1, reg_rv, base_addr);
+ }
+
+ // Post processing store:
+ // Signal that OAM was updated
+ if (region == 7) {
+ u32 palcaddr = (u32)&oam_update;
+ mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16));
+ mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data
+ generate_function_return_swap_delay();
+ }
+ else {
+ mips_emit_jr(mips_reg_ra);
+ mips_emit_nop();
+ }
+
+ *tr_ptr = translation_ptr;
+}
+
+// Palette is accessed differently and stored in a decoded manner
+static void emit_palette_hdl(
+ unsigned memop_number, const t_stub_meminfo *meminfo,
+ unsigned size, bool aligned, u8 **tr_ptr)
+{
+ u8 *translation_ptr = *tr_ptr;
+
+ // Palette cannot be accessed at byte level
+ unsigned realsize = size ? size : 1;
+ u32 memmask = (meminfo->memsize - 1);
+ memmask = (memmask >> realsize) << realsize;
+
+ // Add the stub to the table (add the JAL instruction encoded already)
+ tmemst[memop_number][5] = (u32)translation_ptr;
+
+ // First check we are in the right memory region (same as loads)
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_temp, reg_temp, 5);
+ mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
+ mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring)
+ if (size == 0) {
+ mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
+ }
+ mips_emit_addu(reg_rv, reg_rv, reg_base);
+
+ // Store the data (delay slot from the SMC branch)
+ if (realsize == 2) {
+ mips_emit_sw(reg_a1, reg_base, 0x100);
+ } else if (realsize == 1) {
+ mips_emit_sh(reg_a1, reg_base, 0x100);
+ }
+
+ mips_emit_sll(reg_temp, reg_a1, 1);
+ mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
+ mips_emit_ins(reg_temp, reg_a1, 0, 5);
+ mips_emit_sh(reg_temp, reg_rv, 0x500);
+
+ if (size == 2) {
+ // Convert the second half-word also
+ mips_emit_srl(reg_a1, reg_a1, 16);
+ mips_emit_sll(reg_temp, reg_a1, 1);
+ mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
+ mips_emit_ins(reg_temp, reg_a1, 0, 5);
+ mips_emit_sh(reg_temp, reg_rv, 0x502);
+ }
+ generate_function_return_swap_delay();
+
+ *tr_ptr = translation_ptr;
+}
+
+// This emits stubs for regions where writes have no side-effects
+static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
+ u8 *translation_ptr = *tr_ptr;
+
+ // Region 0-1 (BIOS and ignore)
+ tmemst[size][0] = tmemst[size][1] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 25); // Check 7 MSB to be zero
+ mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
+ mips_emit_nop();
+ mips_emit_jr(mips_reg_ra);
+ mips_emit_nop();
+
+ // Region 8-B
+ tmemst[size][ 8] = tmemst[size][ 9] =
+ tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr;
+
+ mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02
+ mips_emit_xori(reg_temp, reg_temp, 0x02);
+ mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
+ mips_emit_nop();
+ mips_emit_jr(mips_reg_ra);
+ mips_emit_nop();
+
+ // Region C or F (or bigger!)
+ tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_sltiu(reg_rv, reg_temp, 0x0F);
+ mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store
+ mips_emit_xori(reg_rv, reg_temp, 0x0C);
+ mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
+ mips_emit_nop();
+ mips_emit_jr(mips_reg_ra);
+ mips_emit_nop();
+
+ *tr_ptr = translation_ptr;
+}
+
+// Stubs for regions with EEPROM or flash/SRAM
+static void emit_saveaccess_stub(u8 **tr_ptr) {
+ u8 *translation_ptr = *tr_ptr;
+ const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} };
+
+ // Writes to region 8 are directed to RTC (only 16 bit ones though)
+ tmemld[1][8] = (u32)translation_ptr;
+ emit_mem_call(&write_rtc, 0xFE);
+
+ // These are for region 0xD where EEPROM is mapped. Addr is ignored
+ // Value is limited to one bit (both reading and writing!)
+ u32 *read_hndlr = (u32*)translation_ptr;
+ emit_mem_call(&read_eeprom, 0x3FF);
+ u32 *write_hndlr = (u32*)translation_ptr;
+ emit_mem_call(&write_eeprom, 0x3FF);
+
+ // Map loads to the read handler.
+ for (unsigned opt = 0; opt < 6; opt++) {
+ // Unalignment is not relevant here, so map them all to the same handler.
+ for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++)
+ tmemld[i][13] = (u32)translation_ptr;
+ // Emit just a check + patch jump
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_rv, reg_temp, 0x0D);
+ mips_emit_b(bne, reg_rv, reg_zero, branch_handlerid(opt));
+ mips_emit_nop();
+ mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr));
+ }
+ // This is for stores
+ for (unsigned strop = 0; strop <= 3; strop++) {
+ tmemst[strop][13] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_rv, reg_temp, 0x0D);
+ mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop));
+ mips_emit_nop();
+ mips_emit_b(beq, reg_zero, reg_zero, branch_offset(write_hndlr));
+ }
+
+ // Flash/SRAM/Backup writes are only 8 byte supported
+ for (unsigned strop = 0; strop <= 3; strop++) {
+ tmemst[strop][14] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_rv, reg_temp, 0x0E);
+ mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop));
+ if (strop == 0) {
+ emit_mem_call(&write_backup, 0xFFFF);
+ } else {
+ mips_emit_nop();
+ mips_emit_jr(mips_reg_ra); // Does nothing in this case
+ mips_emit_nop();
+ }
+ }
+
+ // Region 4 writes
+ // I/O writes are also a bit special, they can trigger things like DMA, IRQs...
+ // Also: aligned (strop==3) accesses do not trigger IRQs
+ const u32 iowrtbl[] = {
+ (u32)&write_io_register8, (u32)&write_io_register16,
+ (u32)&write_io_register32, (u32)&write_io_register32 };
+ const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC};
+ for (unsigned strop = 0; strop <= 3; strop++) {
+ tmemst[strop][4] = (u32)translation_ptr;
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_xori(reg_temp, reg_temp, 0x04);
+ mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(strop));
+
+ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr
+ emit_save_regs(strop == 3);
+ mips_emit_andi(reg_a0, reg_a0, amsk[strop]);
+ mips_emit_jal(iowrtbl[strop] >> 2);
+
+ if (strop < 3) {
+ mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay)
+ mips_emit_j(((u32)&write_io_epilogue) >> 2);
+ mips_emit_nop();
+ } else {
+ mips_emit_nop();
+ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3);
+ emit_restore_regs(true);
+ generate_function_return_swap_delay();
+ }
+ }
+
+ *tr_ptr = translation_ptr;
+}
+
+// Emits openload store memory region stub
+static void emit_openload_stub(
+ unsigned memopn, bool signext, unsigned size,
+ unsigned alignment, bool aligned, u8 **tr_ptr
+) {
+ u8 *translation_ptr = *tr_ptr;
+
+ // This affects regions 1 and 15
+ tmemld[memopn][ 1] = (u32)translation_ptr;
+ tmemld[memopn][15] = (u32)translation_ptr;
+
+ // We need to repatch if: alignment is different or
+ // if we are accessing a non-ignore region (1 and 15)
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ mips_emit_sltiu(reg_rv, reg_temp, 0x0F);
+ mips_emit_addiu(reg_temp, reg_temp, -1);
+ mips_emit_sltu(reg_temp, reg_zero, reg_temp);
+ mips_emit_and(reg_temp, reg_temp, reg_rv);
+
+ if (!aligned && size != 0) {
+ // Also check and aggregate alignment
+ mips_emit_ext(reg_rv, reg_a0, 0, size);
+ mips_emit_xori(reg_rv, reg_rv, alignment);
+ mips_emit_or(reg_temp, reg_rv, reg_temp);
+ }
+
+ // Jump to patch handler
+ mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memopn));
+
+ // BIOS can jump here to do open loads
+ openld_core_ptrs[memopn] = (u32*)translation_ptr;
+
+ // Proceed with open load by reading data at PC (previous data in the bus)
+ mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
+ mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
+
+ emit_save_regs(aligned);
+ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1);
+
+ switch (size) {
+ case 0:
+ mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode
+ mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB
+ mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB
+ mips_emit_jal(((u32)&read_memory8) >> 2);
+ mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
+ break;
+ case 1:
+ mips_emit_b(beq, reg_zero, reg_rv, 2);
+ mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1
+ mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all
+ mips_emit_jal(((u32)&read_memory16) >> 2);
+ mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
+ break;
+ default:
+ mips_emit_b(beq, reg_zero, reg_rv, 5);
+ mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0
+
+ mips_emit_jal(((u32)&read_memory16) >> 2);
+ mips_emit_nop();
+ mips_emit_b(beq, reg_zero, reg_zero, 3);
+ mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay]
+
+ mips_emit_jal(((u32)&read_memory32) >> 2);
+ mips_emit_nop();
+ break;
+ };
+
+ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1);
+ emit_restore_regs(aligned);
+
+ // Same behaviour as reading from region14 really (8 bit bus)
+ if (!size && signext) {
+ mips_emit_seb(reg_rv, reg_rv);
+ } else if (size == 1 && alignment) {
+ mips_emit_seb(reg_rv, reg_rv);
+ } else if (size == 2) {
+ mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
+ }
+ generate_function_return_swap_delay();
+
+ *tr_ptr = translation_ptr;
+}
+
+typedef void (*sthldr_t)(
+ unsigned memop_number, const t_stub_meminfo *meminfo,
+ unsigned size, bool aligned, u8 **tr_ptr);
+
+typedef void (*ldhldr_t)(
+ unsigned memop_number, const t_stub_meminfo *meminfo,
+ bool signext, unsigned size,
+ unsigned alignment, bool aligned,
+ u8 **tr_ptr);
+
+// Generates a patch handler for a given access size
+// It will detect the access alignment and memory region and load
+// the corresponding handler from the table (at the right offset)
+// and patch the jal instruction from where it was called.
+static void emit_phand(
+ u8 **tr_ptr, unsigned size, unsigned toff,
+ bool check_alignment)
+{
+ u8 *translation_ptr = *tr_ptr;
+
+ mips_emit_srl(reg_temp, reg_a0, 24);
+ #ifdef PSP
+ mips_emit_addiu(reg_rv, reg_zero, 15*4); // Table limit (max)
+ mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
+ mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table
+ #else
+ mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX
+ mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds)
+ mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
+ mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore)
+ #endif
+
+ // Stores or byte-accesses do not care about alignment
+ if (check_alignment) {
+ // Move alignment bits for the table lookup
+ mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
+ }
+
+ unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes
+ mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset
+ mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table
+ mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB
+ mips_emit_ori(reg_temp, reg_temp, 0x3); // JAL opcode
+ mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate
+ mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction!
+
+ mips_emit_cache(0x1A, mips_reg_ra, -8);
+ mips_emit_jr(reg_rv); // Jump directly to target for speed
+ mips_emit_cache(0x08, mips_reg_ra, -8);
+
+ // Round up handlers to 16 instructions for easy addressing :)
+ while (translation_ptr - *tr_ptr < 64) {
+ mips_emit_nop();
+ }
+
+ *tr_ptr = translation_ptr;
+}
+
+// This function emits the following stubs:
+// - smc_write: Jumps to C code to trigger a cache flush
+// - memop patcher: Patches a memop whenever it accesses the wrong mem region
+// - mem stubs: There's stubs for load & store, and every memory region
+// and possible operand size and misaligment (+sign extensions)
+void init_emitter() {
+ // Initialize memory to a debuggable state
+ memset(stub_arena, 0, sizeof(stub_arena)); // nop
+
+ // Generates the trampoline and helper stubs that we need
+ u8 *translation_ptr = (u8*)&stub_arena[0];
+
+ // Generate first the patch handlers
+ // We have 6+4 patchers, one per mem type (6 or 4)
+
+ // Calculate the offset into tmemld[10][XX];
+ emit_phand(&translation_ptr, 0, 0 * 16, false); // ld u8
+ emit_phand(&translation_ptr, 0, 1 * 16, false); // ld s8
+ emit_phand(&translation_ptr, 1, 2 * 16, true); // ld u16 + u16u1
+ emit_phand(&translation_ptr, 1, 4 * 16, true); // ld s16 + s16u1
+ emit_phand(&translation_ptr, 2, 6 * 16, true); // ld u32 (0/1/2/3u)
+ emit_phand(&translation_ptr, 2, 10 * 16, false); // ld aligned 32
+ // Store table is immediately after
+ emit_phand(&translation_ptr, 0, 11 * 16, false); // st u8
+ emit_phand(&translation_ptr, 1, 12 * 16, false); // st u16
+ emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32
+ emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32
+
+ // Generate SMC write handler, with the lookup machinery
+ // Call out the flushing routine (save PC)
+ emit_save_regs(false);
+ mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2);
+ mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot
+
+ mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
+ mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
+ mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode
+ mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc
+ // Lookup thumb PC and execute
+ mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2);
+ mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return!
+ // Lookup arm PC and execute
+ mips_emit_jal(((u32)&block_lookup_address_arm) >> 2);
+ mips_emit_nop();
+ // Epiloge (restore and jump)
+ emit_restore_regs(false);
+ mips_emit_jr(reg_rv); // Go execute the code
+ mips_emit_nop();
+
+ // Generate the openload handlers (for accesses to unmapped mem)
+ emit_openload_stub(0, false, 0, 0, false, &translation_ptr); // ld u8
+ emit_openload_stub(1, true, 0, 0, false, &translation_ptr); // ld s8
+ emit_openload_stub(2, false, 1, 0, false, &translation_ptr); // ld u16
+ emit_openload_stub(3, false, 1, 1, false, &translation_ptr); // ld u16u1
+ emit_openload_stub(4, true, 1, 0, false, &translation_ptr); // ld s16
+ emit_openload_stub(5, true, 1, 1, false, &translation_ptr); // ld s16u1
+ emit_openload_stub(6, false, 2, 0, false, &translation_ptr); // ld u32
+ emit_openload_stub(7, false, 2, 1, false, &translation_ptr); // ld u32u1
+ emit_openload_stub(8, false, 2, 2, false, &translation_ptr); // ld u32u2
+ emit_openload_stub(9, false, 2, 3, false, &translation_ptr); // ld u32u3
+ emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32
+
+ // Here we emit the ignore store area, just checks and does nothing
+ for (unsigned i = 0; i < 4; i++)
+ emit_ignorestore_stub(i, &translation_ptr);
+
+ // Here go the save game handlers
+ emit_saveaccess_stub(&translation_ptr);
+
+ // Generate memory handlers
+ const t_stub_meminfo ldinfo [] = {
+ { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom },
+ // 1 Open load / Ignore store
+ { emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
+ { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
+ { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers },
+ { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram },
+ { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
+ { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram },
+ { emit_pmemld_stub, 8, 0x8000, false, false, 0 },
+ { emit_pmemld_stub, 9, 0x8000, false, false, 0 },
+ { emit_pmemld_stub, 10, 0x8000, false, false, 0 },
+ { emit_pmemld_stub, 11, 0x8000, false, false, 0 },
+ { emit_pmemld_stub, 12, 0x8000, false, false, 0 },
+ // 13 is EEPROM mapped already (a bit special)
+ { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call
+ // 15 Open load / Ignore store
+ };
+
+ for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) {
+ ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter;
+ /* region info signext sz al isaligned */
+ handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8
+ handler(1, &ldinfo[i], true, 0, 0, false, &translation_ptr); // ld s8
+
+ handler(2, &ldinfo[i], false, 1, 0, false, &translation_ptr); // ld u16
+ handler(3, &ldinfo[i], false, 1, 1, false, &translation_ptr); // ld u16u1
+ handler(4, &ldinfo[i], true, 1, 0, false, &translation_ptr); // ld s16
+ handler(5, &ldinfo[i], true, 1, 1, false, &translation_ptr); // ld s16u1
+
+ handler(6, &ldinfo[i], false, 2, 0, false, &translation_ptr); // ld u32
+ handler(7, &ldinfo[i], false, 2, 1, false, &translation_ptr); // ld u32u1
+ handler(8, &ldinfo[i], false, 2, 2, false, &translation_ptr); // ld u32u2
+ handler(9, &ldinfo[i], false, 2, 3, false, &translation_ptr); // ld u32u3
+
+ handler(10,&ldinfo[i], false, 2, 0, true, &translation_ptr); // aligned ld u32
+ }
+
+ const t_stub_meminfo stinfo [] = {
+ { emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
+ { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
+ // I/O is special and mapped with a function call
+ { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram },
+ { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
+ { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram },
+ };
+
+ // Store only for "regular"-ish mem regions
+ //
+ for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) {
+ sthldr_t handler = (sthldr_t)stinfo[i].emitter;
+ handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8
+ handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16
+ handler(2, &stinfo[i], 2, false, &translation_ptr); // st u32
+ handler(3, &stinfo[i], 2, true, &translation_ptr); // st aligned 32
+ }
+}
+
#endif
+
+
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 1b24b0d..a14085b 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -23,16 +23,16 @@
.global mips_indirect_branch_arm
.global mips_indirect_branch_thumb
.global mips_indirect_branch_dual
-.global execute_load_u8
-.global execute_load_u16
-.global execute_load_u32
-.global execute_load_s8
-.global execute_load_s16
-.global execute_store_u8
-.global execute_store_u16
-.global execute_store_u32
-.global execute_aligned_load32
-.global execute_aligned_store32
+#.global execute_load_u8
+#.global execute_load_u16
+#.global execute_load_u32
+#.global execute_load_s8
+#.global execute_load_s16
+#.global execute_store_u8
+#.global execute_store_u16
+#.global execute_store_u32
+#.global execute_aligned_load32
+#.global execute_aligned_store32
.global execute_read_cpsr
.global execute_read_spsr
.global execute_swi
@@ -48,9 +48,15 @@
.global reg_check
.global palette_ram
.global palette_ram_converted
+.global init_emitter
+.global mips_lookup_pc
+.global write_io_epilogue
.global memory_map_read
.global memory_map_write
+.global tmemld
+.global tmemst
+.global tmemst
.global reg
.global spsr
.global reg_mode
@@ -105,7 +111,6 @@
.equ REG_R12, (12 * 4)
.equ REG_R13, (13 * 4)
.equ REG_R14, (14 * 4)
-.equ REG_LR, (14 * 4)
.equ REG_PC, (15 * 4)
.equ REG_N_FLAG, (16 * 4)
.equ REG_Z_FLAG, (17 * 4)
@@ -1004,7 +1009,7 @@ execute_load_ewram_u8:
# Put the generic address over the handler you want to be default
# IWRAM is typically the most frequently read and written to.
-execute_load_u8:
+# execute_load_u8:
execute_load_iwram_u8:
translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF
load_u8 (iwram + 0x8000)
@@ -1107,7 +1112,7 @@ execute_load_ewram_s8:
translate_region_ewram patch_load_s8
load_s8 (ewram + 0x8000)
-execute_load_s8:
+#execute_load_s8:
execute_load_iwram_s8:
translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF
load_s8 (iwram + 0x8000)
@@ -1209,7 +1214,7 @@ execute_load_ewram_u16:
translate_region_ewram_load_align 1, 0, patch_load_u16
load_u16 (ewram + 0x8000)
-execute_load_u16:
+#execute_load_u16:
execute_load_iwram_u16:
translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF
load_u16 (iwram + 0x8000)
@@ -1408,7 +1413,7 @@ execute_load_ewram_s16:
translate_region_ewram_load_align 1, 0, patch_load_s16
load_s16 (ewram + 0x8000)
-execute_load_s16:
+#execute_load_s16:
execute_load_iwram_s16:
translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF
load_s16 (iwram + 0x8000)
@@ -1607,7 +1612,7 @@ execute_load_ewram_u32:
translate_region_ewram_load_align 2, 0, patch_load_u32
load_u32 (ewram + 0x8000)
-execute_load_u32:
+#execute_load_u32:
execute_load_iwram_u32:
translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF
load_u32 (iwram + 0x8000)
@@ -1993,7 +1998,7 @@ execute_load_ewram_u32a:
translate_region_ewram patch_load_u32a
load_u32 (ewram + 0x8000)
-execute_aligned_load32:
+#execute_aligned_load32:
execute_load_iwram_u32a:
translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF
load_u32 (iwram + 0x8000)
@@ -2078,7 +2083,7 @@ execute_store_ewram_u8:
translate_region_ewram patch_store_u8
store_u8_smc (ewram + 0x8000)
-execute_store_u8:
+#execute_store_u8:
execute_store_iwram_u8:
translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF
store_u8_smc (iwram + 0x8000)
@@ -2175,7 +2180,7 @@ execute_store_ewram_u16:
translate_region_ewram_store_align16 patch_store_u16
store_u16_smc (ewram + 0x8000)
-execute_store_u16:
+#execute_store_u16:
execute_store_iwram_u16:
translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE
store_u16_smc (iwram + 0x8000)
@@ -2274,7 +2279,7 @@ execute_store_ewram_u32:
translate_region_ewram_store_align32 patch_store_u32
store_u32_smc (ewram + 0x8000)
-execute_store_u32:
+#execute_store_u32:
execute_store_iwram_u32:
translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC
store_u32_smc (iwram + 0x8000)
@@ -2380,7 +2385,7 @@ execute_store_ewram_u32a:
translate_region_ewram_store_align32 patch_store_u32a
store_u32 (ewram + 0x8000)
-execute_aligned_store32:
+#execute_aligned_store32:
execute_store_iwram_u32a:
translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC
store_u32 (iwram + 0x8000)
@@ -2529,6 +2534,7 @@ smc_write:
jal flush_translation_cache_ram # flush translation cache
sw $6, REG_PC($16) # save PC (delay slot)
+mips_lookup_pc:
lookup_pc:
lw $2, REG_CPSR($16) # $2 = cpsr
andi $2, $2, 0x20 # isolate mode bit
@@ -2624,8 +2630,7 @@ execute_store_cpsr:
and $2, $2, $4 # $2 = (cpsr & (~store_mask))
or $1, $1, $2 # $1 = new cpsr combined with old
extract_flags_body # extract flags from $1
- addiu $sp, $sp, -4
- sw $ra, ($sp)
+ sw $ra, REG_SAVE3($16)
save_registers
jal execute_store_cpsr_body # do the dirty work in this C function
addu $4, $1, $0 # load the new CPSR (delay slot)
@@ -2635,16 +2640,16 @@ execute_store_cpsr:
restore_registers
- lw $ra, ($sp)
+ lw $ra, REG_SAVE3($16)
jr $ra
- addiu $sp, $sp, 4
+ nop
changed_pc_cpsr:
jal block_lookup_address_arm # GBA address is in $4
addu $4, $2, $0 # load new address in $4 (delay slot)
restore_registers # restore registers
jr $2 # jump to the new address
- addiu $sp, $sp, 4 # get rid of the old ra (delay slot)
+ nop
# $4: new spsr
@@ -2797,11 +2802,14 @@ execute_arm_translate:
.data
.align 6
+memory_map_write:
+ .space 0x8000
+
memory_map_read:
.space 0x8000
-# This must be between memory_map_read and memory_map_write because it's used
-# to calculate their addresses elsewhere in this file.
+# memory_map_read is immediately before arm_reg on purpose (offset used
+# to access it, via lw op). We do not use write though.
reg:
.space 0x100
@@ -2815,5 +2823,14 @@ spsr:
reg_mode:
.space 196 # u32[7][7];
-memory_map_write:
- .space 0x8000
+# Here we store:
+# void *tmemld[11][16]; # 10 types of loads
+# void *tmemst[ 4][16]; # 3 types of stores
+# Essentially a list of pointers to the different mem load handlers
+# Keep them close for a fast patcher.
+tmemld:
+ .space 704
+tmemst:
+ .space 256
+
+