summaryrefslogtreecommitdiff
path: root/psp
diff options
context:
space:
mode:
authornotaz2009-05-21 18:48:31 +0300
committernotaz2009-05-21 18:48:31 +0300
commit2823a4c8196a02da86ee180cf55586d4e8c91a2f (patch)
tree7e9b3f35b55af9917b3a05dd32de14be9a8c3f3c /psp
downloadpicogpsp-2823a4c8196a02da86ee180cf55586d4e8c91a2f.tar.gz
picogpsp-2823a4c8196a02da86ee180cf55586d4e8c91a2f.tar.bz2
picogpsp-2823a4c8196a02da86ee180cf55586d4e8c91a2f.zip
original source from gpsp09-2xb_src.tar.bz2
Diffstat (limited to 'psp')
-rw-r--r--psp/Makefile26
-rw-r--r--psp/mips_emit.h2531
-rw-r--r--psp/mips_stub.S3427
3 files changed, 5984 insertions, 0 deletions
diff --git a/psp/Makefile b/psp/Makefile
new file mode 100644
index 0000000..9906e6f
--- /dev/null
+++ b/psp/Makefile
@@ -0,0 +1,26 @@
+# -x assembler-with-cpp
+# gpSP makefile
+# Gilead Kutnick - Exophase
+
+# Global definitions
+
+PSPSDK = ${shell psp-config --pspsdk-path}
+PREFIX = ${shell psp-config --psp-prefix}
+
+OBJS = main.o cpu.o video.o memory.o sound.o input.o \
+ cpu_threaded.o gui.o zip.o cheats.o mips_stub.o
+
+TARGET = gpSP
+
+VPATH += ..
+CFLAGS += -O3 -DPSP_BUILD -G0 -funsigned-char
+CFLAGS += ${shell ${PREFIX}/bin/sdl-config --cflags}
+ASFLAGS = ${CFLAGS}
+PSP_EBOOT_TITLE = gpSP
+EXTRA_TARGETS = EBOOT.PBP
+
+LIBS += ${shell ${PREFIX}/bin/sdl-config --libs} -lpsppower \
+ -lz
+
+include ${PSPSDK}/lib/build.mak
+
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
new file mode 100644
index 0000000..8fc95e8
--- /dev/null
+++ b/psp/mips_emit.h
@@ -0,0 +1,2531 @@
+/* gameplaySP
+ *
+ * Copyright (C) 2006 Exophase <exophase@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MIPS_EMIT_H
+#define MIPS_EMIT_H
+
+u32 mips_update_gba(u32 pc);
+
+// Although these are defined as a function, don't call them as
+// such (jump to it instead)
+void mips_indirect_branch_arm(u32 address);
+void mips_indirect_branch_thumb(u32 address);
+void mips_indirect_branch_dual(u32 address);
+
+u32 execute_read_cpsr();
+u32 execute_read_spsr();
+void execute_swi(u32 pc);
+
+u32 execute_spsr_restore(u32 address);
+void execute_store_cpsr(u32 new_cpsr, u32 store_mask);
+void execute_store_spsr(u32 new_spsr, u32 store_mask);
+
+u32 execute_spsr_restore_body(u32 address);
+u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address);
+
+u32 execute_lsl_flags_reg(u32 value, u32 shift);
+u32 execute_lsr_flags_reg(u32 value, u32 shift);
+u32 execute_asr_flags_reg(u32 value, u32 shift);
+u32 execute_ror_flags_reg(u32 value, u32 shift);
+
+void execute_aligned_store32(u32 address, u32 value);
+u32 execute_aligned_load32(u32 address);
+
+void step_debug_mips(u32 pc);
+
+void reg_check();
+
+typedef enum
+{
+ mips_reg_zero,
+ mips_reg_at,
+ mips_reg_v0,
+ mips_reg_v1,
+ mips_reg_a0,
+ mips_reg_a1,
+ mips_reg_a2,
+ mips_reg_a3,
+ mips_reg_t0,
+ mips_reg_t1,
+ mips_reg_t2,
+ mips_reg_t3,
+ mips_reg_t4,
+ mips_reg_t5,
+ mips_reg_t6,
+ mips_reg_t7,
+ mips_reg_s0,
+ mips_reg_s1,
+ mips_reg_s2,
+ mips_reg_s3,
+ mips_reg_s4,
+ mips_reg_s5,
+ mips_reg_s6,
+ mips_reg_s7,
+ mips_reg_t8,
+ mips_reg_t9,
+ mips_reg_k0,
+ mips_reg_k1,
+ mips_reg_gp,
+ mips_reg_sp,
+ mips_reg_fp,
+ mips_reg_ra
+} mips_reg_number;
+
+typedef enum
+{
+ mips_special_sll = 0x00,
+ mips_special_srl = 0x02,
+ mips_special_sra = 0x03,
+ mips_special_sllv = 0x04,
+ mips_special_srlv = 0x06,
+ mips_special_srav = 0x07,
+ mips_special_jr = 0x08,
+ mips_special_jalr = 0x09,
+ mips_special_movz = 0x0A,
+ mips_special_movn = 0x0B,
+ mips_special_mfhi = 0x10,
+ mips_special_mthi = 0x11,
+ mips_special_mflo = 0x12,
+ mips_special_mtlo = 0x13,
+ mips_special_mult = 0x18,
+ mips_special_multu = 0x19,
+ mips_special_div = 0x1A,
+ mips_special_divu = 0x1B,
+ mips_special_madd = 0x1C,
+ mips_special_maddu = 0x1D,
+ mips_special_add = 0x20,
+ mips_special_addu = 0x21,
+ mips_special_sub = 0x22,
+ mips_special_subu = 0x23,
+ mips_special_and = 0x24,
+ mips_special_or = 0x25,
+ mips_special_xor = 0x26,
+ mips_special_nor = 0x27,
+ mips_special_slt = 0x2A,
+ mips_special_sltu = 0x2B
+} mips_function_special;
+
+typedef enum
+{
+ mips_special3_ext = 0x00,
+ mips_special3_ins = 0x04,
+ mips_special3_bshfl = 0x20
+} mips_function_special3;
+
+typedef enum
+{
+ mips_regimm_bltz = 0x00,
+ mips_regimm_bltzal = 0x10
+} mips_function_regimm;
+
+typedef enum
+{
+ mips_opcode_special = 0x00,
+ mips_opcode_regimm = 0x01,
+ mips_opcode_j = 0x02,
+ mips_opcode_jal = 0x03,
+ mips_opcode_beq = 0x04,
+ mips_opcode_bne = 0x05,
+ mips_opcode_blez = 0x06,
+ mips_opcode_bgtz = 0x07,
+ mips_opcode_addi = 0x08,
+ mips_opcode_addiu = 0x09,
+ mips_opcode_slti = 0x0A,
+ mips_opcode_sltiu = 0x0B,
+ mips_opcode_andi = 0x0C,
+ mips_opcode_ori = 0x0D,
+ mips_opcode_xori = 0x0E,
+ mips_opcode_lui = 0x0F,
+ mips_opcode_llo = 0x18,
+ mips_opcode_lhi = 0x19,
+ mips_opcode_trap = 0x1A,
+ mips_opcode_special2 = 0x1C,
+ mips_opcode_special3 = 0x1F,
+ mips_opcode_lb = 0x20,
+ mips_opcode_lh = 0x21,
+ mips_opcode_lw = 0x23,
+ mips_opcode_lbu = 0x24,
+ mips_opcode_lhu = 0x25,
+ mips_opcode_sb = 0x28,
+ mips_opcode_sh = 0x29,
+ mips_opcode_sw = 0x2B,
+} mips_opcode;
+
+#define mips_emit_reg(opcode, rs, rt, rd, shift, function) \
+ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
+ (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | function; \
+ translation_ptr += 4 \
+
+#define mips_emit_special(function, rs, rt, rd, shift) \
+ *((u32 *)translation_ptr) = (mips_opcode_special << 26) | \
+ (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | \
+ mips_special_##function; \
+ translation_ptr += 4 \
+
+#define mips_emit_special3(function, rs, rt, imm_a, imm_b) \
+ *((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \
+ (rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \
+ mips_special3_##function; \
+ translation_ptr += 4 \
+
+#define mips_emit_imm(opcode, rs, rt, immediate) \
+ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
+ (rs << 21) | (rt << 16) | (immediate & 0xFFFF); \
+ translation_ptr += 4 \
+
+#define mips_emit_regimm(function, rs, immediate) \
+ *((u32 *)translation_ptr) = (mips_opcode_regimm << 26) | \
+ (rs << 21) | (mips_regimm_##function << 16) | (immediate & 0xFFFF); \
+ translation_ptr += 4 \
+
+#define mips_emit_jump(opcode, offset) \
+ *((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
+ (offset & 0x3FFFFFF); \
+ translation_ptr += 4 \
+
+#define mips_relative_offset(source, offset) \
+ (((u32)offset - ((u32)source + 4)) / 4) \
+
+#define mips_absolute_offset(offset) \
+ ((u32)offset / 4) \
+
+#define mips_emit_addu(rd, rs, rt) \
+ mips_emit_special(addu, rs, rt, rd, 0) \
+
+#define mips_emit_subu(rd, rs, rt) \
+ mips_emit_special(subu, rs, rt, rd, 0) \
+
+#define mips_emit_xor(rd, rs, rt) \
+ mips_emit_special(xor, rs, rt, rd, 0) \
+
+#define mips_emit_add(rd, rs, rt) \
+ mips_emit_special(and, rs, rt, rd, 0) \
+
+#define mips_emit_sub(rd, rs, rt) \
+ mips_emit_special(sub, rs, rt, rd, 0) \
+
+#define mips_emit_and(rd, rs, rt) \
+ mips_emit_special(and, rs, rt, rd, 0) \
+
+#define mips_emit_or(rd, rs, rt) \
+ mips_emit_special(or, rs, rt, rd, 0) \
+
+#define mips_emit_nor(rd, rs, rt) \
+ mips_emit_special(nor, rs, rt, rd, 0) \
+
+#define mips_emit_slt(rd, rs, rt) \
+ mips_emit_special(slt, rs, rt, rd, 0) \
+
+#define mips_emit_sltu(rd, rs, rt) \
+ mips_emit_special(sltu, rs, rt, rd, 0) \
+
+#define mips_emit_sllv(rd, rt, rs) \
+ mips_emit_special(sllv, rs, rt, rd, 0) \
+
+#define mips_emit_srlv(rd, rt, rs) \
+ mips_emit_special(srlv, rs, rt, rd, 0) \
+
+#define mips_emit_srav(rd, rt, rs) \
+ mips_emit_special(srav, rs, rt, rd, 0) \
+
+#define mips_emit_rotrv(rd, rt, rs) \
+ mips_emit_special(srlv, rs, rt, rd, 1) \
+
+#define mips_emit_sll(rd, rt, shift) \
+ mips_emit_special(sll, 0, rt, rd, shift) \
+
+#define mips_emit_srl(rd, rt, shift) \
+ mips_emit_special(srl, 0, rt, rd, shift) \
+
+#define mips_emit_sra(rd, rt, shift) \
+ mips_emit_special(sra, 0, rt, rd, shift) \
+
+#define mips_emit_rotr(rd, rt, shift) \
+ mips_emit_special(srl, 1, rt, rd, shift) \
+
+#define mips_emit_mfhi(rd) \
+ mips_emit_special(mfhi, 0, 0, rd, 0) \
+
+#define mips_emit_mflo(rd) \
+ mips_emit_special(mflo, 0, 0, rd, 0) \
+
+#define mips_emit_mthi(rs) \
+ mips_emit_special(mthi, rs, 0, 0, 0) \
+
+#define mips_emit_mtlo(rs) \
+ mips_emit_special(mtlo, rs, 0, 0, 0) \
+
+#define mips_emit_mult(rs, rt) \
+ mips_emit_special(mult, rs, rt, 0, 0) \
+
+#define mips_emit_multu(rs, rt) \
+ mips_emit_special(multu, rs, rt, 0, 0) \
+
+#define mips_emit_div(rs, rt) \
+ mips_emit_special(div, rs, rt, 0, 0) \
+
+#define mips_emit_divu(rs, rt) \
+ mips_emit_special(divu, rs, rt, 0, 0) \
+
+#define mips_emit_madd(rs, rt) \
+ mips_emit_special(madd, rs, rt, 0, 0) \
+
+#define mips_emit_maddu(rs, rt) \
+ mips_emit_special(maddu, rs, rt, 0, 0) \
+
+#define mips_emit_movn(rd, rs, rt) \
+ mips_emit_special(movn, rs, rt, rd, 0) \
+
+#define mips_emit_movz(rd, rs, rt) \
+ mips_emit_special(movz, rs, rt, rd, 0) \
+
+#define mips_emit_lb(rt, rs, offset) \
+ mips_emit_imm(lb, rs, rt, offset) \
+
+#define mips_emit_lbu(rt, rs, offset) \
+ mips_emit_imm(lbu, rs, rt, offset) \
+
+#define mips_emit_lh(rt, rs, offset) \
+ mips_emit_imm(lh, rs, rt, offset) \
+
+#define mips_emit_lhu(rt, rs, offset) \
+ mips_emit_imm(lhu, rs, rt, offset) \
+
+#define mips_emit_lw(rt, rs, offset) \
+ mips_emit_imm(lw, rs, rt, offset) \
+
+#define mips_emit_sb(rt, rs, offset) \
+ mips_emit_imm(sb, rs, rt, offset) \
+
+#define mips_emit_sh(rt, rs, offset) \
+ mips_emit_imm(sh, rs, rt, offset) \
+
+#define mips_emit_sw(rt, rs, offset) \
+ mips_emit_imm(sw, rs, rt, offset) \
+
+#define mips_emit_lui(rt, imm) \
+ mips_emit_imm(lui, 0, rt, imm) \
+
+#define mips_emit_addiu(rt, rs, imm) \
+ mips_emit_imm(addiu, rs, rt, imm) \
+
+#define mips_emit_xori(rt, rs, imm) \
+ mips_emit_imm(xori, rs, rt, imm) \
+
+#define mips_emit_ori(rt, rs, imm) \
+ mips_emit_imm(ori, rs, rt, imm) \
+
+#define mips_emit_andi(rt, rs, imm) \
+ mips_emit_imm(andi, rs, rt, imm) \
+
+#define mips_emit_slti(rt, rs, imm) \
+ mips_emit_imm(slti, rs, rt, imm) \
+
+#define mips_emit_sltiu(rt, rs, imm) \
+ mips_emit_imm(sltiu, rs, rt, imm) \
+
+#define mips_emit_ext(rt, rs, pos, size) \
+ mips_emit_special3(ext, rs, rt, (size - 1), pos) \
+
+#define mips_emit_ins(rt, rs, pos, size) \
+ mips_emit_special3(ins, rs, rt, (pos + size - 1), pos) \
+
+// Breaks down if the backpatch offset is greater than 16bits, take care
+// when using (should be okay if limited to conditional instructions)
+
+#define mips_emit_b_filler(type, rs, rt, writeback_location) \
+ (writeback_location) = translation_ptr; \
+ mips_emit_imm(type, rs, rt, 0) \
+
+// The backpatch code for this has to be handled differently than the above
+
+#define mips_emit_j_filler(writeback_location) \
+ (writeback_location) = translation_ptr; \
+ mips_emit_jump(j, 0) \
+
+#define mips_emit_b(type, rs, rt, offset) \
+ mips_emit_imm(type, rs, rt, offset) \
+
+#define mips_emit_j(offset) \
+ mips_emit_jump(j, offset) \
+
+#define mips_emit_jal(offset) \
+ mips_emit_jump(jal, offset) \
+
+#define mips_emit_jr(rs) \
+ mips_emit_special(jr, rs, 0, 0, 0) \
+
+#define mips_emit_bltzal(rs, offset) \
+ mips_emit_regimm(bltzal, rs, offset) \
+
+#define mips_emit_nop() \
+ mips_emit_sll(reg_zero, reg_zero, 0) \
+
+#define reg_base mips_reg_s0
+#define reg_cycles mips_reg_s1
+#define reg_a0 mips_reg_a0
+#define reg_a1 mips_reg_a1
+#define reg_a2 mips_reg_a2
+#define reg_rv mips_reg_v0
+#define reg_pc mips_reg_s3
+#define reg_temp mips_reg_at
+#define reg_zero mips_reg_zero
+
+#define reg_n_cache mips_reg_s4
+#define reg_z_cache mips_reg_s5
+#define reg_c_cache mips_reg_s6
+#define reg_v_cache mips_reg_s7
+
+#define reg_r0 mips_reg_v1
+#define reg_r1 mips_reg_a3
+#define reg_r2 mips_reg_t0
+#define reg_r3 mips_reg_t1
+#define reg_r4 mips_reg_t2
+#define reg_r5 mips_reg_t3
+#define reg_r6 mips_reg_t4
+#define reg_r7 mips_reg_t5
+#define reg_r8 mips_reg_t6
+#define reg_r9 mips_reg_t7
+#define reg_r10 mips_reg_s2
+#define reg_r11 mips_reg_t8
+#define reg_r12 mips_reg_t9
+#define reg_r13 mips_reg_gp
+#define reg_r14 mips_reg_fp
+
+// Writing to r15 goes straight to a0, to be chained with other ops
+
+u32 arm_to_mips_reg[] =
+{
+ reg_r0,
+ reg_r1,
+ reg_r2,
+ reg_r3,
+ reg_r4,
+ reg_r5,
+ reg_r6,
+ reg_r7,
+ reg_r8,
+ reg_r9,
+ reg_r10,
+ reg_r11,
+ reg_r12,
+ reg_r13,
+ reg_r14,
+ reg_a0,
+ reg_a1,
+ reg_a2,
+ reg_temp
+};
+
+#define arm_reg_a0 15
+#define arm_reg_a1 16
+#define arm_reg_a2 17
+#define arm_reg_temp 18
+
+#define generate_load_reg(ireg, reg_index) \
+ mips_emit_addu(ireg, arm_to_mips_reg[reg_index], reg_zero) \
+
+#define generate_load_imm(ireg, imm) \
+ if(((s32)imm >= -32768) && ((s32)imm <= 32767)) \
+ { \
+ mips_emit_addiu(ireg, reg_zero, imm); \
+ } \
+ else \
+ { \
+ if(((u32)imm >> 16) == 0x0000) \
+ { \
+ mips_emit_ori(ireg, reg_zero, imm); \
+ } \
+ else \
+ { \
+ mips_emit_lui(ireg, imm >> 16); \
+ \
+ if(((u32)imm & 0x0000FFFF) != 0x00000000) \
+ { \
+ mips_emit_ori(ireg, ireg, imm & 0xFFFF); \
+ } \
+ } \
+ } \
+
+#define generate_load_pc(ireg, new_pc) \
+{ \
+ s32 pc_delta = new_pc - stored_pc; \
+ if((pc_delta >= -32768) && (pc_delta <= 32767)) \
+ { \
+ mips_emit_addiu(ireg, reg_pc, pc_delta); \
+ } \
+ else \
+ { \
+ generate_load_imm(ireg, new_pc); \
+ } \
+} \
+
+#define generate_store_reg(ireg, reg_index) \
+ mips_emit_addu(arm_to_mips_reg[reg_index], ireg, reg_zero) \
+
+#define generate_shift_left(ireg, imm) \
+ mips_emit_sll(ireg, ireg, imm) \
+
+#define generate_shift_right(ireg, imm) \
+ mips_emit_srl(ireg, ireg, imm) \
+
+#define generate_shift_right_arithmetic(ireg, imm) \
+ mips_emit_sra(ireg, ireg, imm) \
+
+#define generate_rotate_right(ireg, imm) \
+ mips_emit_rotr(ireg, ireg, imm) \
+
+#define generate_add(ireg_dest, ireg_src) \
+ mips_emit_addu(ireg_dest, ireg_dest, ireg_src) \
+
+#define generate_sub(ireg_dest, ireg_src) \
+ mips_emit_subu(ireg_dest, ireg_dest, ireg_src) \
+
+#define generate_or(ireg_dest, ireg_src) \
+ mips_emit_or(ireg_dest, ireg_dest, ireg_src) \
+
+#define generate_xor(ireg_dest, ireg_src) \
+ mips_emit_xor(ireg_dest, ireg_dest, ireg_src) \
+
+#define generate_alu_imm(imm_type, reg_type, ireg_dest, ireg_src, imm) \
+ if(((s32)imm >= -32768) && ((s32)imm <= 32767)) \
+ { \
+ mips_emit_##imm_type(ireg_dest, ireg_src, imm); \
+ } \
+ else \
+ { \
+ generate_load_imm(reg_temp, imm); \
+ mips_emit_##reg_type(ireg_dest, ireg_src, reg_temp); \
+ } \
+
+#define generate_alu_immu(imm_type, reg_type, ireg_dest, ireg_src, imm) \
+ if(((u32)imm >= 0) && ((u32)imm <= 65535)) \
+ { \
+ mips_emit_##imm_type(ireg_dest, ireg_src, imm); \
+ } \
+ else \
+ { \
+ generate_load_imm(reg_temp, imm); \
+ mips_emit_##reg_type(ireg_dest, ireg_src, reg_temp); \
+ } \
+
+#define generate_add_imm(ireg, imm) \
+ generate_alu_imm(addiu, add, ireg, ireg, imm) \
+
+#define generate_sub_imm(ireg, imm) \
+ generate_alu_imm(addiu, add, ireg, ireg, -imm) \
+
+#define generate_xor_imm(ireg, imm) \
+ generate_alu_immu(xori, xor, ireg, ireg, imm) \
+
+#define generate_add_reg_reg_imm(ireg_dest, ireg_src, imm) \
+ generate_alu_imm(addiu, add, ireg_dest, ireg_src, imm) \
+
+#define generate_and_imm(ireg, imm) \
+ generate_alu_immu(andi, and, ireg, ireg, imm) \
+
+#define generate_mov(ireg_dest, ireg_src) \
+ mips_emit_addu(ireg_dest, ireg_src, reg_zero) \
+
+#define generate_multiply_s64() \
+ mips_emit_mult(arm_to_mips_reg[rm], arm_to_mips_reg[rs]) \
+
+#define generate_multiply_u64() \
+ mips_emit_multu(arm_to_mips_reg[rm], arm_to_mips_reg[rs]) \
+
+#define generate_multiply_s64_add() \
+ mips_emit_madd(arm_to_mips_reg[rm], arm_to_mips_reg[rs]) \
+
+#define generate_multiply_u64_add() \
+ mips_emit_maddu(arm_to_mips_reg[rm], arm_to_mips_reg[rs]) \
+
+#define generate_function_call(function_location) \
+ mips_emit_jal(mips_absolute_offset(function_location)); \
+ mips_emit_nop() \
+
+#define generate_function_call_swap_delay(function_location) \
+{ \
+ u32 delay_instruction = address32(translation_ptr, -4); \
+ translation_ptr -= 4; \
+ mips_emit_jal(mips_absolute_offset(function_location)); \
+ address32(translation_ptr, 0) = delay_instruction; \
+ translation_ptr += 4; \
+} \
+
+#define generate_swap_delay() \
+{ \
+ u32 delay_instruction = address32(translation_ptr, -8); \
+ u32 branch_instruction = address32(translation_ptr, -4); \
+ branch_instruction = (branch_instruction & 0xFFFF0000) | \
+ (((branch_instruction & 0x0000FFFF) + 1) & 0x0000FFFF); \
+ address32(translation_ptr, -8) = branch_instruction; \
+ address32(translation_ptr, -4) = delay_instruction; \
+} \
+
+#define generate_cycle_update() \
+ if(cycle_count != 0) \
+ { \
+ mips_emit_addiu(reg_cycles, reg_cycles, -cycle_count); \
+ cycle_count = 0; \
+ } \
+
+#define generate_cycle_update_force() \
+ mips_emit_addiu(reg_cycles, reg_cycles, -cycle_count); \
+ cycle_count = 0 \
+
+#define generate_branch_patch_conditional(dest, offset) \
+ *((u16 *)(dest)) = mips_relative_offset(dest, offset) \
+
+#define generate_branch_patch_unconditional(dest, offset) \
+ *((u32 *)(dest)) = (mips_opcode_j << 26) | \
+ ((mips_absolute_offset(offset)) & 0x3FFFFFF) \
+
+#define generate_branch_no_cycle_update(writeback_location, new_pc) \
+ if(pc == idle_loop_target_pc) \
+ { \
+ generate_load_pc(reg_a0, new_pc); \
+ generate_function_call_swap_delay(mips_update_gba); \
+ mips_emit_j_filler(writeback_location); \
+ mips_emit_nop(); \
+ } \
+ else \
+ { \
+ generate_load_pc(reg_a0, new_pc); \
+ mips_emit_bltzal(reg_cycles, \
+ mips_relative_offset(translation_ptr, update_trampoline)); \
+ generate_swap_delay(); \
+ mips_emit_j_filler(writeback_location); \
+ mips_emit_nop(); \
+ } \
+
+#define generate_branch_cycle_update(writeback_location, new_pc) \
+ generate_cycle_update(); \
+ generate_branch_no_cycle_update(writeback_location, new_pc) \
+
+#define generate_conditional_branch(ireg_a, ireg_b, type, writeback_location) \
+ generate_branch_filler_##type(ireg_a, ireg_b, writeback_location) \
+
+// a0 holds the destination
+
+#define generate_indirect_branch_cycle_update(type) \
+ mips_emit_j(mips_absolute_offset(mips_indirect_branch_##type)); \
+ generate_cycle_update_force() \
+
+#define generate_indirect_branch_no_cycle_update(type) \
+ mips_emit_j(mips_absolute_offset(mips_indirect_branch_##type)); \
+ mips_emit_nop() \
+
+#define generate_block_prologue() \
+ update_trampoline = translation_ptr; \
+ __asm__ \
+ ( \
+ "cache 8, 0(%0)\n" \
+ "cache 8, 0(%0)" : : "r"(translation_ptr) \
+ ); \
+ \
+ mips_emit_j(mips_absolute_offset(mips_update_gba)); \
+ mips_emit_nop(); \
+ generate_load_imm(reg_pc, stored_pc) \
+
+#define translate_invalidate_dcache() \
+ sceKernelDcacheWritebackAll() \
+
+#define block_prologue_size 8
+
+#define check_generate_n_flag \
+ (flag_status & 0x08) \
+
+#define check_generate_z_flag \
+ (flag_status & 0x04) \
+
+#define check_generate_c_flag \
+ (flag_status & 0x02) \
+
+#define check_generate_v_flag \
+ (flag_status & 0x01) \
+
+#define generate_load_reg_pc(ireg, reg_index, pc_offset) \
+ if(reg_index == REG_PC) \
+ { \
+ generate_load_pc(ireg, (pc + pc_offset)); \
+ } \
+ else \
+ { \
+ generate_load_reg(ireg, reg_index); \
+ } \
+
+#define check_load_reg_pc(arm_reg, reg_index, pc_offset) \
+ if(reg_index == REG_PC) \
+ { \
+ reg_index = arm_reg; \
+ generate_load_pc(arm_to_mips_reg[arm_reg], (pc + pc_offset)); \
+ } \
+
+#define check_store_reg_pc_no_flags(reg_index) \
+ if(reg_index == REG_PC) \
+ { \
+ generate_indirect_branch_arm(); \
+ } \
+
+#define check_store_reg_pc_flags(reg_index) \
+ if(reg_index == REG_PC) \
+ { \
+ generate_function_call(execute_spsr_restore); \
+ generate_indirect_branch_dual(); \
+ } \
+
+#define generate_shift_imm_lsl_no_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_sll(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ _rm = arm_reg; \
+ } \
+
+#define generate_shift_imm_lsr_no_flags(arm_reg, _rm, _shift) \
+ if(_shift != 0) \
+ { \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_addu(arm_to_mips_reg[arm_reg], reg_zero, reg_zero); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_imm_asr_no_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 31); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_imm_ror_no_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
+ mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_imm_lsl_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \
+ mips_emit_sll(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ _rm = arm_reg; \
+ } \
+
+#define generate_shift_imm_lsr_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_srl(reg_c_cache, arm_to_mips_reg[_rm], 31); \
+ mips_emit_addu(arm_to_mips_reg[arm_reg], reg_zero, reg_zero); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_imm_asr_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 31); \
+ mips_emit_andi(reg_c_cache, arm_to_mips_reg[arm_reg], 1); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_imm_ror_flags(arm_reg, _rm, _shift) \
+ check_load_reg_pc(arm_reg, _rm, 8); \
+ if(_shift != 0) \
+ { \
+ mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ } \
+ else \
+ { \
+ mips_emit_andi(reg_temp, arm_to_mips_reg[_rm], 1); \
+ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
+ mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
+ mips_emit_addu(reg_c_cache, reg_temp, reg_zero); \
+ } \
+ _rm = arm_reg \
+
+#define generate_shift_reg_lsl_no_flags(_rm, _rs) \
+ mips_emit_sltiu(reg_temp, arm_to_mips_reg[_rs], 32); \
+ mips_emit_sllv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]); \
+ mips_emit_movz(reg_a0, reg_zero, reg_temp) \
+
+#define generate_shift_reg_lsr_no_flags(_rm, _rs) \
+ mips_emit_sltiu(reg_temp, arm_to_mips_reg[_rs], 32); \
+ mips_emit_srlv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]); \
+ mips_emit_movz(reg_a0, reg_zero, reg_temp) \
+
+#define generate_shift_reg_asr_no_flags(_rm, _rs) \
+ mips_emit_sltiu(reg_temp, arm_to_mips_reg[_rs], 32); \
+ mips_emit_b(bne, reg_temp, reg_zero, 2); \
+ mips_emit_srav(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]); \
+ mips_emit_sra(reg_a0, reg_a0, 31) \
+
+#define generate_shift_reg_ror_no_flags(_rm, _rs) \
+ mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
+
+#define generate_shift_reg_lsl_flags(_rm, _rs) \
+ generate_load_reg_pc(reg_a0, _rm, 12); \
+ generate_load_reg_pc(reg_a1, _rs, 8); \
+ generate_function_call_swap_delay(execute_lsl_flags_reg) \
+
+#define generate_shift_reg_lsr_flags(_rm, _rs) \
+ generate_load_reg_pc(reg_a0, _rm, 12); \
+ generate_load_reg_pc(reg_a1, _rs, 8) \
+ generate_function_call_swap_delay(execute_lsr_flags_reg) \
+
+#define generate_shift_reg_asr_flags(_rm, _rs) \
+ generate_load_reg_pc(reg_a0, _rm, 12); \
+ generate_load_reg_pc(reg_a1, _rs, 8) \
+ generate_function_call_swap_delay(execute_asr_flags_reg) \
+
+#define generate_shift_reg_ror_flags(_rm, _rs) \
+ mips_emit_b(beq, arm_to_mips_reg[_rs], reg_zero, 3); \
+ mips_emit_addiu(reg_temp, arm_to_mips_reg[_rs], -1); \
+ mips_emit_srlv(reg_temp, arm_to_mips_reg[_rm], reg_temp); \
+ mips_emit_andi(reg_c_cache, reg_temp, 1); \
+ mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
+
+#define generate_shift_imm(arm_reg, name, flags_op) \
+ u32 shift = (opcode >> 7) & 0x1F; \
+ generate_shift_imm_##name##_##flags_op(arm_reg, rm, shift) \
+
+#define generate_shift_reg(arm_reg, name, flags_op) \
+ u32 rs = ((opcode >> 8) & 0x0F); \
+ generate_shift_reg_##name##_##flags_op(rm, rs); \
+ rm = arm_reg \
+
+// Made functions due to the macro expansion getting too large.
+// Returns a new rm if it redirects it (which will happen on most of these
+// cases)
+
+#define generate_load_rm_sh_builder(flags_op) \
+u32 generate_load_rm_sh_##flags_op(u32 rm) \
+{ \
+ switch((opcode >> 4) & 0x07) \
+ { \
+ /* LSL imm */ \
+ case 0x0: \
+ { \
+ generate_shift_imm(arm_reg_a0, lsl, flags_op); \
+ break; \
+ } \
+ \
+ /* LSL reg */ \
+ case 0x1: \
+ { \
+ generate_shift_reg(arm_reg_a0, lsl, flags_op); \
+ break; \
+ } \
+ \
+ /* LSR imm */ \
+ case 0x2: \
+ { \
+ generate_shift_imm(arm_reg_a0, lsr, flags_op); \
+ break; \
+ } \
+ \
+ /* LSR reg */ \
+ case 0x3: \
+ { \
+ generate_shift_reg(arm_reg_a0, lsr, flags_op); \
+ break; \
+ } \
+ \
+ /* ASR imm */ \
+ case 0x4: \
+ { \
+ generate_shift_imm(arm_reg_a0, asr, flags_op); \
+ break; \
+ } \
+ \
+ /* ASR reg */ \
+ case 0x5: \
+ { \
+ generate_shift_reg(arm_reg_a0, asr, flags_op); \
+ break; \
+ } \
+ \
+ /* ROR imm */ \
+ case 0x6: \
+ { \
+ generate_shift_imm(arm_reg_a0, ror, flags_op); \
+ break; \
+ } \
+ \
+ /* ROR reg */ \
+ case 0x7: \
+ { \
+ generate_shift_reg(arm_reg_a0, ror, flags_op); \
+ break; \
+ } \
+ } \
+ \
+ return rm; \
+} \
+
+#define read_memory_constant_u8(address) \
+ read_memory8(address) \
+
+#define read_memory_constant_u16(address) \
+ read_memory16(address) \
+
+#define read_memory_constant_u32(address) \
+ read_memory32(address) \
+
+#define read_memory_constant_s8(address) \
+ (s8)read_memory8(address) \
+
+#define read_memory_constant_s16(address) \
+ (s16)read_memory16_signed(address) \
+
+#define generate_load_memory_u8(ireg, offset) \
+ mips_emit_lbu(ireg, ireg, offset) \
+
+#define generate_load_memory_u16(ireg, offset) \
+ mips_emit_lhu(ireg, ireg, offset) \
+
+#define generate_load_memory_u32(ireg, offset) \
+ mips_emit_lw(ireg, ireg, offset) \
+
+#define generate_load_memory_s8(ireg, offset) \
+ mips_emit_lb(ireg, ireg, offset) \
+
+#define generate_load_memory_s16(ireg, offset) \
+ mips_emit_lh(ireg, ireg, offset) \
+
+#define generate_load_memory(type, ireg, address) \
+{ \
+ u32 _address = (u32)(address); \
+ u32 _address_hi = (_address + 0x8000) >> 16; \
+ generate_load_imm(ireg, address); \
+ mips_emit_lui(ireg, _address_hi >> 16) \
+ generate_load_memory_##type(ireg, _address - (_address_hi << 16)); \
+} \
+
+#define generate_known_address_load_builder(type) \
+ u32 generate_known_address_load_##type(u32 rd, u32 address) \
+ { \
+ switch(address >> 24) \
+ { \
+ /* Read from the BIOS ROM, can be converted to an immediate load. \
+ Only really possible to do this from the BIOS but should be okay \
+ to allow it everywhere */ \
+ case 0x00: \
+ u32 imm = read_memory_constant_##type(address); \
+ generate_load_imm(arm_to_mips_reg[rd], imm); \
+ return 1; \
+ \
+ /* Read from RAM, can be converted to a load */ \
+ case 0x02: \
+ generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)ewram + \
+ (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000); \
+ return 1; \
+ \
+ case 0x03: \
+ generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)iwram + \
+ (address & 0x7FFF) + 0x8000); \
+ return 1; \
+ \
+ /* Read from gamepak ROM, this has to be an immediate load because \
+ it might not actually be in memory anymore when we get to it. */ \
+ case 0x08: \
+ u32 imm = read_memory_constant_##type(address); \
+ generate_load_imm(arm_to_mips_reg[rd], imm); \
+ return 1; \
+ \
+ default: \
+ return 0; \
+ } \
+ } \
+
+#define generate_block_extra_vars() \
+ u32 stored_pc = pc; \
+ u8 *update_trampoline \
+
+#define generate_block_extra_vars_arm() \
+ generate_block_extra_vars(); \
+ generate_load_rm_sh_builder(flags); \
+ generate_load_rm_sh_builder(no_flags); \
+ \
+/* generate_known_address_load_builder(u8); \
+ generate_known_address_load_builder(u16); \
+ generate_known_address_load_builder(u32); \
+ generate_known_address_load_builder(s8); \
+ generate_known_address_load_builder(s16); */ \
+ \
+ u32 generate_load_offset_sh(u32 rm) \
+ { \
+ switch((opcode >> 5) & 0x03) \
+ { \
+ /* LSL imm */ \
+ case 0x0: \
+ { \
+ generate_shift_imm(arm_reg_a1, lsl, no_flags); \
+ break; \
+ } \
+ \
+ /* LSR imm */ \
+ case 0x1: \
+ { \
+ generate_shift_imm(arm_reg_a1, lsr, no_flags); \
+ break; \
+ } \
+ \
+ /* ASR imm */ \
+ case 0x2: \
+ { \
+ generate_shift_imm(arm_reg_a1, asr, no_flags); \
+ break; \
+ } \
+ \
+ /* ROR imm */ \
+ case 0x3: \
+ { \
+ generate_shift_imm(arm_reg_a1, ror, no_flags); \
+ break; \
+ } \
+ } \
+ \
+ return rm; \
+ } \
+ \
+ void generate_indirect_branch_arm() \
+ { \
+ if(condition == 0x0E) \
+ { \
+ generate_indirect_branch_cycle_update(arm); \
+ } \
+ else \
+ { \
+ generate_indirect_branch_no_cycle_update(arm); \
+ } \
+ } \
+ \
+ void generate_indirect_branch_dual() \
+ { \
+ if(condition == 0x0E) \
+ { \
+ generate_indirect_branch_cycle_update(dual); \
+ } \
+ else \
+ { \
+ generate_indirect_branch_no_cycle_update(dual); \
+ } \
+ } \
+
+#define generate_block_extra_vars_thumb() \
+ generate_block_extra_vars() \
+
+// It should be okay to still generate result flags, spsr will overwrite them.
+// This is pretty infrequent (returning from interrupt handlers, et al) so
+// probably not worth optimizing for.
+
+u32 execute_spsr_restore_body(u32 address)
+{
+ set_cpu_mode(cpu_modes[reg[REG_CPSR] & 0x1F]);
+ if((io_registers[REG_IE] & io_registers[REG_IF]) &&
+ io_registers[REG_IME] && ((reg[REG_CPSR] & 0x80) == 0))
+ {
+ reg_mode[MODE_IRQ][6] = address + 4;
+ spsr[MODE_IRQ] = reg[REG_CPSR];
+ reg[REG_CPSR] = 0xD2;
+ address = 0x00000018;
+ set_cpu_mode(MODE_IRQ);
+ }
+
+ if(reg[REG_CPSR] & 0x20)
+ address |= 0x01;
+
+ return address;
+}
+
+typedef enum
+{
+ CONDITION_TRUE,
+ CONDITION_FALSE,
+ CONDITION_EQUAL,
+ CONDITION_NOT_EQUAL
+} condition_check_type;
+
+
+#define generate_condition_eq() \
+ mips_emit_b_filler(beq, reg_z_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_ne() \
+ mips_emit_b_filler(bne, reg_z_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_cs() \
+ mips_emit_b_filler(beq, reg_c_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_cc() \
+ mips_emit_b_filler(bne, reg_c_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_mi() \
+ mips_emit_b_filler(beq, reg_n_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_pl() \
+ mips_emit_b_filler(bne, reg_n_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_vs() \
+ mips_emit_b_filler(beq, reg_v_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_vc() \
+ mips_emit_b_filler(bne, reg_v_cache, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_hi() \
+ mips_emit_xori(reg_temp, reg_c_cache, 1); \
+ mips_emit_or(reg_temp, reg_temp, reg_z_cache); \
+ mips_emit_b_filler(bne, reg_temp, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_ls() \
+ mips_emit_xori(reg_temp, reg_c_cache, 1); \
+ mips_emit_or(reg_temp, reg_temp, reg_z_cache); \
+ mips_emit_b_filler(beq, reg_temp, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_ge() \
+ mips_emit_b_filler(bne, reg_n_cache, reg_v_cache, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_lt() \
+ mips_emit_b_filler(beq, reg_n_cache, reg_v_cache, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_gt() \
+ mips_emit_xor(reg_temp, reg_n_cache, reg_v_cache); \
+ mips_emit_or(reg_temp, reg_temp, reg_z_cache); \
+ mips_emit_b_filler(bne, reg_temp, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition_le() \
+ mips_emit_xor(reg_temp, reg_n_cache, reg_v_cache); \
+ mips_emit_or(reg_temp, reg_temp, reg_z_cache); \
+ mips_emit_b_filler(beq, reg_temp, reg_zero, backpatch_address); \
+ generate_cycle_update_force() \
+
+#define generate_condition() \
+ switch(condition) \
+ { \
+ case 0x0: \
+ generate_condition_eq(); \
+ break; \
+ \
+ case 0x1: \
+ generate_condition_ne(); \
+ break; \
+ \
+ case 0x2: \
+ generate_condition_cs(); \
+ break; \
+ \
+ case 0x3: \
+ generate_condition_cc(); \
+ break; \
+ \
+ case 0x4: \
+ generate_condition_mi(); \
+ break; \
+ \
+ case 0x5: \
+ generate_condition_pl(); \
+ break; \
+ \
+ case 0x6: \
+ generate_condition_vs(); \
+ break; \
+ \
+ case 0x7: \
+ generate_condition_vc(); \
+ break; \
+ \
+ case 0x8: \
+ generate_condition_hi(); \
+ break; \
+ \
+ case 0x9: \
+ generate_condition_ls(); \
+ break; \
+ \
+ case 0xA: \
+ generate_condition_ge(); \
+ break; \
+ \
+ case 0xB: \
+ generate_condition_lt(); \
+ break; \
+ \
+ case 0xC: \
+ generate_condition_gt(); \
+ break; \
+ \
+ case 0xD: \
+ generate_condition_le(); \
+ break; \
+ \
+ case 0xE: \
+ break; \
+ \
+ case 0xF: \
+ break; \
+ } \
+
+#define generate_branch() \
+{ \
+ if(condition == 0x0E) \
+ { \
+ generate_branch_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ } \
+ else \
+ { \
+ generate_branch_no_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ } \
+ block_exit_position++; \
+} \
+
+#define generate_op_and_reg(_rd, _rn, _rm) \
+ mips_emit_and(_rd, _rn, _rm) \
+
+#define generate_op_orr_reg(_rd, _rn, _rm) \
+ mips_emit_or(_rd, _rn, _rm) \
+
+#define generate_op_eor_reg(_rd, _rn, _rm) \
+ mips_emit_xor(_rd, _rn, _rm) \
+
+#define generate_op_bic_reg(_rd, _rn, _rm) \
+ mips_emit_nor(reg_temp, _rm, reg_zero); \
+ mips_emit_and(_rd, _rn, reg_temp) \
+
+#define generate_op_sub_reg(_rd, _rn, _rm) \
+ mips_emit_subu(_rd, _rn, _rm) \
+
+#define generate_op_rsb_reg(_rd, _rn, _rm) \
+ mips_emit_subu(_rd, _rm, _rn) \
+
+#define generate_op_sbc_reg(_rd, _rn, _rm) \
+ mips_emit_subu(_rd, _rn, _rm); \
+ mips_emit_xori(reg_temp, reg_c_cache, 1); \
+ mips_emit_subu(_rd, _rd, reg_temp) \
+
+#define generate_op_rsc_reg(_rd, _rn, _rm) \
+ mips_emit_addu(reg_temp, _rm, reg_c_cache); \
+ mips_emit_addiu(reg_temp, reg_temp, -1); \
+ mips_emit_subu(_rd, reg_temp, _rn) \
+
+#define generate_op_add_reg(_rd, _rn, _rm) \
+ mips_emit_addu(_rd, _rn, _rm) \
+
+#define generate_op_adc_reg(_rd, _rn, _rm) \
+ mips_emit_addu(reg_temp, _rm, reg_c_cache); \
+ mips_emit_addu(_rd, _rn, reg_temp) \
+
+#define generate_op_mov_reg(_rd, _rn, _rm) \
+ mips_emit_addu(_rd, _rm, reg_zero) \
+
+#define generate_op_mvn_reg(_rd, _rn, _rm) \
+ mips_emit_nor(_rd, _rm, reg_zero) \
+
+#define generate_op_imm_wrapper(name, _rd, _rn) \
+ if(imm != 0) \
+ { \
+ generate_load_imm(reg_a0, imm); \
+ generate_op_##name##_reg(_rd, _rn, reg_a0); \
+ } \
+ else \
+ { \
+ generate_op_##name##_reg(_rd, _rn, reg_zero); \
+ } \
+
+#define generate_op_and_imm(_rd, _rn) \
+ generate_alu_immu(andi, and, _rd, _rn, imm) \
+
+#define generate_op_orr_imm(_rd, _rn) \
+ generate_alu_immu(ori, or, _rd, _rn, imm) \
+
+#define generate_op_eor_imm(_rd, _rn) \
+ generate_alu_immu(xori, xor, _rd, _rn, imm) \
+
+#define generate_op_bic_imm(_rd, _rn) \
+ generate_alu_immu(andi, and, _rd, _rn, (~imm)) \
+
+#define generate_op_sub_imm(_rd, _rn) \
+ generate_alu_imm(addiu, addu, _rd, _rn, (-imm)) \
+
+#define generate_op_rsb_imm(_rd, _rn) \
+ if(imm != 0) \
+ { \
+ generate_load_imm(reg_temp, imm); \
+ mips_emit_subu(_rd, reg_temp, _rn); \
+ } \
+ else \
+ { \
+ mips_emit_subu(_rd, reg_zero, _rn); \
+ } \
+
+#define generate_op_sbc_imm(_rd, _rn) \
+ generate_op_imm_wrapper(sbc, _rd, _rn) \
+
+#define generate_op_rsc_imm(_rd, _rn) \
+ generate_op_imm_wrapper(rsc, _rd, _rn) \
+
+#define generate_op_add_imm(_rd, _rn) \
+ generate_alu_imm(addiu, addu, _rd, _rn, imm) \
+
+#define generate_op_adc_imm(_rd, _rn) \
+ generate_op_imm_wrapper(adc, _rd, _rn) \
+
+#define generate_op_mov_imm(_rd, _rn) \
+ generate_load_imm(_rd, imm) \
+
+#define generate_op_mvn_imm(_rd, _rn) \
+ generate_load_imm(_rd, (~imm)) \
+
+#define generate_op_logic_flags(_rd) \
+ if(check_generate_n_flag) \
+ { \
+ mips_emit_srl(reg_n_cache, _rd, 31); \
+ } \
+ if(check_generate_z_flag) \
+ { \
+ mips_emit_sltiu(reg_z_cache, _rd, 1); \
+ } \
+
+#define generate_op_sub_flags_prologue(_rn, _rm) \
+ if(check_generate_c_flag) \
+ { \
+ mips_emit_sltu(reg_c_cache, _rn, _rm); \
+ mips_emit_xori(reg_c_cache, reg_c_cache, 1); \
+ } \
+ if(check_generate_v_flag) \
+ { \
+ mips_emit_slt(reg_v_cache, _rn, _rm); \
+ } \
+
+#define generate_op_sub_flags_epilogue(_rd) \
+ generate_op_logic_flags(_rd); \
+ if(check_generate_v_flag) \
+ { \
+ if(!check_generate_n_flag) \
+ { \
+ mips_emit_srl(reg_n_cache, _rd, 31); \
+ } \
+ mips_emit_xor(reg_v_cache, reg_v_cache, reg_n_cache); \
+ } \
+
+#define generate_add_flags_prologue(_rn, _rm) \
+ if(check_generate_c_flag | check_generate_v_flag) \
+ { \
+ mips_emit_addu(reg_c_cache, _rn, reg_zero); \
+ } \
+ if(check_generate_v_flag) \
+ { \
+ mips_emit_slt(reg_v_cache, _rm, reg_zero); \
+ } \
+
+#define generate_add_flags_epilogue(_rd) \
+ if(check_generate_v_flag) \
+ { \
+ mips_emit_slt(reg_a0, _rd, reg_c_cache); \
+ mips_emit_xor(reg_v_cache, reg_v_cache, reg_a0); \
+ } \
+ if(check_generate_c_flag) \
+ { \
+ mips_emit_sltu(reg_c_cache, _rd, reg_c_cache); \
+ } \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_ands_reg(_rd, _rn, _rm) \
+ mips_emit_and(_rd, _rn, _rm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_orrs_reg(_rd, _rn, _rm) \
+ mips_emit_or(_rd, _rn, _rm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_eors_reg(_rd, _rn, _rm) \
+ mips_emit_xor(_rd, _rn, _rm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_bics_reg(_rd, _rn, _rm) \
+ mips_emit_nor(reg_temp, _rm, reg_zero); \
+ mips_emit_and(_rd, _rn, reg_temp); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_subs_reg(_rd, _rn, _rm) \
+ generate_op_sub_flags_prologue(_rn, _rm); \
+ mips_emit_subu(_rd, _rn, _rm); \
+ generate_op_sub_flags_epilogue(_rd) \
+
+#define generate_op_rsbs_reg(_rd, _rn, _rm) \
+ generate_op_sub_flags_prologue(_rm, _rn); \
+ mips_emit_subu(_rd, _rm, _rn); \
+ generate_op_sub_flags_epilogue(_rd) \
+
+#define generate_op_sbcs_reg(_rd, _rn, _rm) \
+ mips_emit_subu(_rd, _rn, _rm); \
+ mips_emit_xori(reg_temp, reg_c_cache, 1); \
+ generate_op_sub_flags_prologue(_rd, reg_temp); \
+ mips_emit_subu(_rd, _rd, reg_temp); \
+ generate_op_sub_flags_epilogue(_rd) \
+
+#define generate_op_rscs_reg(_rd, _rn, _rm) \
+ mips_emit_addu(reg_temp, _rm, reg_c_cache); \
+ mips_emit_addiu(reg_temp, reg_temp, -1); \
+ generate_op_sub_flags_prologue(reg_temp, _rn); \
+ mips_emit_subu(_rd, reg_temp, _rn); \
+ generate_op_sub_flags_epilogue(_rd) \
+
+#define generate_op_adds_reg(_rd, _rn, _rm) \
+ generate_add_flags_prologue(_rn, _rm); \
+ mips_emit_addu(_rd, _rn, _rm); \
+ generate_add_flags_epilogue(_rd) \
+
+#define generate_op_adcs_reg(_rd, _rn, _rm) \
+ mips_emit_addu(reg_temp, _rm, reg_c_cache); \
+ generate_add_flags_prologue(_rn, _rm); \
+ mips_emit_addu(_rd, _rn, reg_temp); \
+ generate_add_flags_epilogue(_rd) \
+
+#define generate_op_movs_reg(_rd, _rn, _rm) \
+ mips_emit_addu(_rd, _rm, reg_zero); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_mvns_reg(_rd, _rn, _rm) \
+ mips_emit_nor(_rd, _rm, reg_zero); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_neg_reg(_rd, _rn, _rm) \
+ generate_op_subs_reg(_rd, reg_zero, _rm) \
+
+#define generate_op_muls_reg(_rd, _rn, _rm) \
+ mips_emit_multu(_rn, _rm); \
+ mips_emit_mflo(_rd); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_cmp_reg(_rd, _rn, _rm) \
+ generate_op_subs_reg(reg_temp, _rn, _rm) \
+
+#define generate_op_cmn_reg(_rd, _rn, _rm) \
+ generate_op_adds_reg(reg_temp, _rn, _rm) \
+
+#define generate_op_tst_reg(_rd, _rn, _rm) \
+ generate_op_ands_reg(reg_temp, _rn, _rm) \
+
+#define generate_op_teq_reg(_rd, _rn, _rm) \
+ generate_op_eors_reg(reg_temp, _rn, _rm) \
+
+#define generate_op_ands_imm(_rd, _rn) \
+ generate_alu_immu(andi, and, _rd, _rn, imm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_orrs_imm(_rd, _rn) \
+ generate_alu_immu(ori, or, _rd, _rn, imm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_eors_imm(_rd, _rn) \
+ generate_alu_immu(xori, xor, _rd, _rn, imm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_bics_imm(_rd, _rn) \
+ generate_alu_immu(andi, and, _rd, _rn, (~imm)); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_subs_imm(_rd, _rn) \
+ generate_op_imm_wrapper(subs, _rd, _rn) \
+
+#define generate_op_rsbs_imm(_rd, _rn) \
+ generate_op_imm_wrapper(rsbs, _rd, _rn) \
+
+#define generate_op_sbcs_imm(_rd, _rn) \
+ generate_op_imm_wrapper(sbcs, _rd, _rn) \
+
+#define generate_op_rscs_imm(_rd, _rn) \
+ generate_op_imm_wrapper(rscs, _rd, _rn) \
+
+#define generate_op_adds_imm(_rd, _rn) \
+ generate_op_imm_wrapper(adds, _rd, _rn) \
+
+#define generate_op_adcs_imm(_rd, _rn) \
+ generate_op_imm_wrapper(adcs, _rd, _rn) \
+
+#define generate_op_movs_imm(_rd, _rn) \
+ generate_load_imm(_rd, imm); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_mvns_imm(_rd, _rn) \
+ generate_load_imm(_rd, (~imm)); \
+ generate_op_logic_flags(_rd) \
+
+#define generate_op_cmp_imm(_rd, _rn) \
+ generate_op_imm_wrapper(cmp, _rd, _rn) \
+
+#define generate_op_cmn_imm(_rd, _rn) \
+ generate_op_imm_wrapper(cmn, _rd, _rn) \
+
+#define generate_op_tst_imm(_rd, _rn) \
+ generate_op_ands_imm(reg_temp, _rn) \
+
+#define generate_op_teq_imm(_rd, _rn) \
+ generate_op_eors_imm(reg_temp, _rn) \
+
+#define arm_generate_op_load_yes() \
+ generate_load_reg_pc(reg_a1, rn, 8) \
+
+#define arm_generate_op_load_no() \
+
+#define arm_op_check_yes() \
+ check_load_reg_pc(arm_reg_a1, rn, 8) \
+
+#define arm_op_check_no() \
+
+#define arm_generate_op_reg_flags(name, load_op) \
+ arm_decode_data_proc_reg(); \
+ if(check_generate_c_flag) \
+ { \
+ rm = generate_load_rm_sh_flags(rm); \
+ } \
+ else \
+ { \
+ rm = generate_load_rm_sh_no_flags(rm); \
+ } \
+ \
+ arm_op_check_##load_op(); \
+ generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rn], \
+ arm_to_mips_reg[rm]) \
+
+#define arm_generate_op_reg(name, load_op) \
+ arm_decode_data_proc_reg(); \
+ rm = generate_load_rm_sh_no_flags(rm); \
+ arm_op_check_##load_op(); \
+ generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rn], \
+ arm_to_mips_reg[rm]) \
+
+#define arm_generate_op_imm(name, load_op) \
+ arm_decode_data_proc_imm(); \
+ arm_op_check_##load_op(); \
+ generate_op_##name##_imm(arm_to_mips_reg[rd], arm_to_mips_reg[rn]) \
+
+#define arm_data_proc(name, type, flags_op) \
+{ \
+ arm_generate_op_##type(name, yes); \
+ check_store_reg_pc_##flags_op(rd); \
+} \
+
+#define arm_data_proc_test(name, type) \
+{ \
+ arm_generate_op_##type(name, yes); \
+} \
+
+#define arm_data_proc_unary(name, type, flags_op) \
+{ \
+ arm_generate_op_##type(name, no); \
+ check_store_reg_pc_##flags_op(rd); \
+} \
+
+#define arm_multiply_flags_yes(_rd) \
+ generate_op_logic_flags(_rd) \
+
+#define arm_multiply_flags_no(_rd) \
+
+#define arm_multiply_add_no() \
+ mips_emit_mflo(arm_to_mips_reg[rd]) \
+
+#define arm_multiply_add_yes() \
+ mips_emit_mflo(reg_temp); \
+ mips_emit_addu(arm_to_mips_reg[rd], reg_temp, arm_to_mips_reg[rn]) \
+
+#define arm_multiply(add_op, flags) \
+{ \
+ arm_decode_multiply(); \
+ mips_emit_multu(arm_to_mips_reg[rm], arm_to_mips_reg[rs]); \
+ arm_multiply_add_##add_op(); \
+ arm_multiply_flags_##flags(arm_to_mips_reg[rd]); \
+} \
+
+#define arm_multiply_long_flags_yes(_rdlo, _rdhi) \
+ mips_emit_sltiu(reg_z_cache, _rdlo, 1); \
+ mips_emit_sltiu(reg_a0, _rdhi, 1); \
+ mips_emit_and(reg_z_cache, reg_z_cache, reg_a0); \
+ mips_emit_srl(reg_n_cache, _rdhi, 31); \
+
+#define arm_multiply_long_flags_no(_rdlo, _rdhi) \
+
+#define arm_multiply_long_add_yes(name) \
+ mips_emit_mtlo(arm_to_mips_reg[rdlo]); \
+ mips_emit_mthi(arm_to_mips_reg[rdhi]); \
+ generate_multiply_##name() \
+
+#define arm_multiply_long_add_no(name) \
+ generate_multiply_##name() \
+
+#define arm_multiply_long(name, add_op, flags) \
+{ \
+ arm_decode_multiply_long(); \
+ arm_multiply_long_add_##add_op(name); \
+ mips_emit_mflo(arm_to_mips_reg[rdlo]); \
+ mips_emit_mfhi(arm_to_mips_reg[rdhi]); \
+ arm_multiply_long_flags_##flags(arm_to_mips_reg[rdlo], \
+ arm_to_mips_reg[rdhi]); \
+} \
+
+#define arm_psr_read(op_type, psr_reg) \
+ generate_function_call(execute_read_##psr_reg); \
+ generate_store_reg(reg_rv, rd) \
+
+u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
+{
+ reg[REG_CPSR] = _cpsr;
+ if(store_mask & 0xFF)
+ {
+ set_cpu_mode(cpu_modes[_cpsr & 0x1F]);
+ if((io_registers[REG_IE] & io_registers[REG_IF]) &&
+ io_registers[REG_IME] && ((_cpsr & 0x80) == 0))
+ {
+ reg_mode[MODE_IRQ][6] = address + 4;
+ spsr[MODE_IRQ] = _cpsr;
+ reg[REG_CPSR] = 0xD2;
+ set_cpu_mode(MODE_IRQ);
+ return 0x00000018;
+ }
+ }
+
+ return 0;
+}
+
+#define arm_psr_load_new_reg() \
+ generate_load_reg(reg_a0, rm) \
+
+#define arm_psr_load_new_imm() \
+ generate_load_imm(reg_a0, imm) \
+
+#define arm_psr_store(op_type, psr_reg) \
+ arm_psr_load_new_##op_type(); \
+ generate_load_imm(reg_a1, psr_masks[psr_field]); \
+ generate_load_pc(reg_a2, (pc + 4)); \
+ generate_function_call_swap_delay(execute_store_##psr_reg) \
+
+#define arm_psr(op_type, transfer_type, psr_reg) \
+{ \
+ arm_decode_psr_##op_type(); \
+ arm_psr_##transfer_type(op_type, psr_reg); \
+} \
+
+#define arm_access_memory_load(mem_type) \
+ cycle_count += 2; \
+ mips_emit_jal(mips_absolute_offset(execute_load_##mem_type)); \
+ generate_load_pc(reg_a1, (pc + 8)); \
+ generate_store_reg(reg_rv, rd); \
+ check_store_reg_pc_no_flags(rd) \
+
+#define arm_access_memory_store(mem_type) \
+ cycle_count++; \
+ generate_load_pc(reg_a2, (pc + 4)); \
+ generate_load_reg_pc(reg_a1, rd, 12); \
+ generate_function_call_swap_delay(execute_store_##mem_type) \
+
+#define arm_access_memory_reg_pre_up() \
+ mips_emit_addu(reg_a0, arm_to_mips_reg[rn], arm_to_mips_reg[rm]) \
+
+#define arm_access_memory_reg_pre_down() \
+ mips_emit_subu(reg_a0, arm_to_mips_reg[rn], arm_to_mips_reg[rm]) \
+
+#define arm_access_memory_reg_pre(adjust_dir) \
+ check_load_reg_pc(arm_reg_a0, rn, 8); \
+ arm_access_memory_reg_pre_##adjust_dir() \
+
+#define arm_access_memory_reg_pre_wb(adjust_dir) \
+ arm_access_memory_reg_pre(adjust_dir); \
+ generate_store_reg(reg_a0, rn) \
+
+#define arm_access_memory_reg_post_up() \
+ mips_emit_addu(arm_to_mips_reg[rn], arm_to_mips_reg[rn], \
+ arm_to_mips_reg[rm]) \
+
+#define arm_access_memory_reg_post_down() \
+ mips_emit_subu(arm_to_mips_reg[rn], arm_to_mips_reg[rn], \
+ arm_to_mips_reg[rm]) \
+
+#define arm_access_memory_reg_post(adjust_dir) \
+ generate_load_reg(reg_a0, rn); \
+ arm_access_memory_reg_post_##adjust_dir() \
+
+#define arm_access_memory_imm_pre_up() \
+ mips_emit_addiu(reg_a0, arm_to_mips_reg[rn], offset) \
+
+#define arm_access_memory_imm_pre_down() \
+ mips_emit_addiu(reg_a0, arm_to_mips_reg[rn], -offset) \
+
+#define arm_access_memory_imm_pre(adjust_dir) \
+ check_load_reg_pc(arm_reg_a0, rn, 8); \
+ arm_access_memory_imm_pre_##adjust_dir() \
+
+#define arm_access_memory_imm_pre_wb(adjust_dir) \
+ arm_access_memory_imm_pre(adjust_dir); \
+ generate_store_reg(reg_a0, rn) \
+
+#define arm_access_memory_imm_post_up() \
+ mips_emit_addiu(arm_to_mips_reg[rn], arm_to_mips_reg[rn], offset) \
+
+#define arm_access_memory_imm_post_down() \
+ mips_emit_addiu(arm_to_mips_reg[rn], arm_to_mips_reg[rn], -offset) \
+
+#define arm_access_memory_imm_post(adjust_dir) \
+ generate_load_reg(reg_a0, rn); \
+ arm_access_memory_imm_post_##adjust_dir() \
+
+#define arm_data_trans_reg(adjust_op, adjust_dir) \
+ arm_decode_data_trans_reg(); \
+ rm = generate_load_offset_sh(rm); \
+ arm_access_memory_reg_##adjust_op(adjust_dir) \
+
+#define arm_data_trans_imm(adjust_op, adjust_dir) \
+ arm_decode_data_trans_imm(); \
+ arm_access_memory_imm_##adjust_op(adjust_dir) \
+
+#define arm_data_trans_half_reg(adjust_op, adjust_dir) \
+ arm_decode_half_trans_r(); \
+ arm_access_memory_reg_##adjust_op(adjust_dir) \
+
+#define arm_data_trans_half_imm(adjust_op, adjust_dir) \
+ arm_decode_half_trans_of(); \
+ arm_access_memory_imm_##adjust_op(adjust_dir) \
+
+#define arm_access_memory(access_type, direction, adjust_op, mem_type, \
+ offset_type) \
+{ \
+ arm_data_trans_##offset_type(adjust_op, direction); \
+ arm_access_memory_##access_type(mem_type); \
+} \
+
+#define word_bit_count(word) \
+ (bit_count[word >> 8] + bit_count[word & 0xFF]) \
+
+#define sprint_no(access_type, pre_op, post_op, wb) \
+
+#define sprint_yes(access_type, pre_op, post_op, wb) \
+ printf("sbit on %s %s %s %s\n", #access_type, #pre_op, #post_op, #wb) \
+
+#define arm_block_memory_load() \
+ generate_function_call_swap_delay(execute_aligned_load32); \
+ generate_store_reg(reg_rv, i) \
+
+#define arm_block_memory_store() \
+ generate_load_reg_pc(reg_a1, i, 8); \
+ generate_function_call_swap_delay(execute_aligned_store32) \
+
+#define arm_block_memory_final_load() \
+ arm_block_memory_load() \
+
+#define arm_block_memory_final_store() \
+ generate_load_pc(reg_a2, (pc + 4)); \
+ mips_emit_jal(mips_absolute_offset(execute_store_u32)); \
+ generate_load_reg(reg_a1, i) \
+
+#define arm_block_memory_adjust_pc_store() \
+
+#define arm_block_memory_adjust_pc_load() \
+ if(reg_list & 0x8000) \
+ { \
+ generate_mov(reg_a0, reg_rv); \
+ generate_indirect_branch_arm(); \
+ } \
+
+#define arm_block_memory_sp_load() \
+ mips_emit_lw(arm_to_mips_reg[i], reg_a1, offset); \
+
+#define arm_block_memory_sp_store() \
+{ \
+ u32 store_reg = i; \
+ check_load_reg_pc(arm_reg_a0, store_reg, 8); \
+ mips_emit_sw(arm_to_mips_reg[store_reg], reg_a1, offset); \
+} \
+
+#define arm_block_memory_sp_adjust_pc_store() \
+
+#define arm_block_memory_sp_adjust_pc_load() \
+ if(reg_list & 0x8000) \
+ { \
+ generate_indirect_branch_arm(); \
+ } \
+
+#define arm_block_memory_offset_down_a() \
+ mips_emit_addiu(reg_a2, base_reg, (-((word_bit_count(reg_list) * 4) - 4))) \
+
+#define arm_block_memory_offset_down_b() \
+ mips_emit_addiu(reg_a2, base_reg, (word_bit_count(reg_list) * -4)) \
+
+#define arm_block_memory_offset_no() \
+ mips_emit_addu(reg_a2, base_reg, reg_zero) \
+
+#define arm_block_memory_offset_up() \
+ mips_emit_addiu(reg_a2, base_reg, 4) \
+
+#define arm_block_memory_writeback_down() \
+ mips_emit_addiu(base_reg, base_reg, (-(word_bit_count(reg_list) * 4))) \
+
+#define arm_block_memory_writeback_up() \
+ mips_emit_addiu(base_reg, base_reg, (word_bit_count(reg_list) * 4)) \
+
+#define arm_block_memory_writeback_no()
+
+// Only emit writeback if the register is not in the list
+
+#define arm_block_memory_writeback_load(writeback_type) \
+ if(!((reg_list >> rn) & 0x01)) \
+ { \
+ arm_block_memory_writeback_##writeback_type(); \
+ } \
+
+#define arm_block_memory_writeback_store(writeback_type) \
+ arm_block_memory_writeback_##writeback_type() \
+
+#define arm_block_memory(access_type, offset_type, writeback_type, s_bit) \
+{ \
+ arm_decode_block_trans(); \
+ u32 i; \
+ u32 offset = 0; \
+ u32 base_reg = arm_to_mips_reg[rn]; \
+ \
+ arm_block_memory_offset_##offset_type(); \
+ arm_block_memory_writeback_##access_type(writeback_type); \
+ \
+ if((rn == REG_SP) && iwram_stack_optimize) \
+ { \
+ mips_emit_andi(reg_a1, reg_a2, 0x7FFC); \
+ generate_load_imm(reg_a0, ((u32)(iwram + 0x8000))); \
+ mips_emit_addu(reg_a1, reg_a1, reg_a0); \
+ \
+ for(i = 0; i < 16; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ arm_block_memory_sp_##access_type(); \
+ offset += 4; \
+ } \
+ } \
+ \
+ arm_block_memory_sp_adjust_pc_##access_type(); \
+ } \
+ else \
+ { \
+ mips_emit_ins(reg_a2, reg_zero, 0, 2); \
+ \
+ for(i = 0; i < 16; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ mips_emit_addiu(reg_a0, reg_a2, offset); \
+ if(reg_list & ~((2 << i) - 1)) \
+ { \
+ arm_block_memory_##access_type(); \
+ offset += 4; \
+ } \
+ else \
+ { \
+ arm_block_memory_final_##access_type(); \
+ break; \
+ } \
+ } \
+ } \
+ \
+ arm_block_memory_adjust_pc_##access_type(); \
+ } \
+} \
+
+#define arm_block_writeback_no()
+
+#define arm_block_writeback_yes() \
+ mips_emit_addu(arm_to_mips_reg[rn], reg_a2, reg_zero) \
+
+#define arm_block_address_preadjust_up_full(wb) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[rn], \
+ ((word_bit_count(reg_list)) * 4)); \
+ arm_block_writeback_##wb() \
+
+#define arm_block_address_preadjust_up(wb) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[rn], 4); \
+ arm_block_writeback_##wb() \
+
+#define arm_block_address_preadjust_down_full(wb) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[rn], \
+ -((word_bit_count(reg_list)) * 4)); \
+ arm_block_writeback_##wb() \
+
+#define arm_block_address_preadjust_down(wb) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[rn], \
+ -(((word_bit_count(reg_list)) * 4) - 4)); \
+ arm_block_writeback_##wb()
+
+#define arm_block_address_preadjust_no(wb) \
+ mips_emit_addu(reg_a2, arm_to_mips_reg[rn], reg_zero) \
+
+#define arm_block_address_postadjust_no() \
+
+#define arm_block_address_postadjust_up() \
+ mips_emit_addiu(arm_to_mips_reg[rn], reg_a2, \
+ ((word_bit_count(reg_list)) * 4)) \
+
+#define arm_block_address_postadjust_down() \
+ mips_emit_addiu(arm_to_mips_reg[rn], reg_a2, \
+ -((word_bit_count(reg_list)) * 4)) \
+
+#define sprint_no(access_type, pre_op, post_op, wb) \
+
+#define sprint_yes(access_type, pre_op, post_op, wb) \
+ printf("sbit on %s %s %s %s\n", #access_type, #pre_op, #post_op, #wb) \
+
+#define arm_block_memory_load() \
+ generate_function_call_swap_delay(execute_aligned_load32); \
+ generate_store_reg(reg_rv, i) \
+
+#define arm_block_memory_store() \
+ generate_load_reg_pc(reg_a1, i, 8); \
+ generate_function_call_swap_delay(execute_aligned_store32) \
+
+#define arm_block_memory_final_load() \
+ arm_block_memory_load() \
+
+#define arm_block_memory_final_store() \
+ generate_load_pc(reg_a2, (pc + 4)); \
+ mips_emit_jal(mips_absolute_offset(execute_store_u32)); \
+ generate_load_reg(reg_a1, i) \
+
+#define arm_block_memory_adjust_pc_store() \
+
+#define arm_block_memory_adjust_pc_load() \
+ if(reg_list & 0x8000) \
+ { \
+ generate_mov(reg_a0, reg_rv); \
+ generate_indirect_branch_arm(); \
+ } \
+
+#define arm_block_memory_sp_load() \
+ mips_emit_lw(arm_to_mips_reg[i], reg_a1, offset); \
+
+#define arm_block_memory_sp_store() \
+{ \
+ u32 store_reg = i; \
+ check_load_reg_pc(arm_reg_a0, store_reg, 8); \
+ mips_emit_sw(arm_to_mips_reg[store_reg], reg_a1, offset); \
+} \
+
+#define arm_block_memory_sp_adjust_pc_store() \
+
+#define arm_block_memory_sp_adjust_pc_load() \
+ if(reg_list & 0x8000) \
+ { \
+ generate_indirect_branch_arm(); \
+ } \
+
+#define old_arm_block_memory(access_type, pre_op, post_op, wb, s_bit) \
+{ \
+ arm_decode_block_trans(); \
+ u32 i; \
+ u32 offset = 0; \
+ u32 base_reg = arm_to_mips_reg[rn]; \
+ \
+ arm_block_address_preadjust_##pre_op(wb); \
+ arm_block_address_postadjust_##post_op(); \
+ \
+ sprint_##s_bit(access_type, pre_op, post_op, wb); \
+ \
+ if((rn == REG_SP) && iwram_stack_optimize) \
+ { \
+ mips_emit_andi(reg_a1, reg_a2, 0x7FFC); \
+ generate_load_imm(reg_a0, ((u32)(iwram + 0x8000))); \
+ mips_emit_addu(reg_a1, reg_a1, reg_a0); \
+ \
+ for(i = 0; i < 16; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ arm_block_memory_sp_##access_type(); \
+ offset += 4; \
+ } \
+ } \
+ \
+ arm_block_memory_sp_adjust_pc_##access_type(); \
+ } \
+ else \
+ { \
+ mips_emit_ins(reg_a2, reg_zero, 0, 2); \
+ \
+ for(i = 0; i < 16; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ mips_emit_addiu(reg_a0, reg_a2, offset); \
+ if(reg_list & ~((2 << i) - 1)) \
+ { \
+ arm_block_memory_##access_type(); \
+ offset += 4; \
+ } \
+ else \
+ { \
+ arm_block_memory_final_##access_type(); \
+ break; \
+ } \
+ } \
+ } \
+ \
+ arm_block_memory_adjust_pc_##access_type(); \
+ } \
+}
+
+
+
+// This isn't really a correct implementation, may have to fix later.
+
+#define arm_swap(type) \
+{ \
+ arm_decode_swap(); \
+ cycle_count += 3; \
+ mips_emit_jal(mips_absolute_offset(execute_load_##type)); \
+ generate_load_reg(reg_a0, rn); \
+ generate_mov(reg_a2, reg_rv); \
+ generate_load_reg(reg_a0, rn); \
+ mips_emit_jal(mips_absolute_offset(execute_store_##type)); \
+ generate_load_reg(reg_a1, rm); \
+ generate_store_reg(reg_a2, rd); \
+} \
+
+#define thumb_generate_op_load_yes(_rs) \
+ generate_load_reg(reg_a1, _rs) \
+
+#define thumb_generate_op_load_no(_rs) \
+
+#define thumb_generate_op_reg(name, _rd, _rs, _rn) \
+ generate_op_##name##_reg(arm_to_mips_reg[_rd], \
+ arm_to_mips_reg[_rs], arm_to_mips_reg[_rn]) \
+
+#define thumb_generate_op_imm(name, _rd, _rs, _rn) \
+ generate_op_##name##_imm(arm_to_mips_reg[_rd], arm_to_mips_reg[_rs]) \
+
+// Types: add_sub, add_sub_imm, alu_op, imm
+// Affects N/Z/C/V flags
+
+#define thumb_data_proc(type, name, rn_type, _rd, _rs, _rn) \
+{ \
+ thumb_decode_##type(); \
+ thumb_generate_op_##rn_type(name, _rd, _rs, _rn); \
+} \
+
+#define thumb_data_proc_test(type, name, rn_type, _rs, _rn) \
+{ \
+ thumb_decode_##type(); \
+ thumb_generate_op_##rn_type(name, 0, _rs, _rn); \
+} \
+
+#define thumb_data_proc_unary(type, name, rn_type, _rd, _rn) \
+{ \
+ thumb_decode_##type(); \
+ thumb_generate_op_##rn_type(name, _rd, 0, _rn); \
+} \
+
+#define check_store_reg_pc_thumb(_rd) \
+ if(_rd == REG_PC) \
+ { \
+ generate_indirect_branch_cycle_update(thumb); \
+ } \
+
+#define thumb_data_proc_hi(name) \
+{ \
+ thumb_decode_hireg_op(); \
+ u32 dest_rd = rd; \
+ check_load_reg_pc(arm_reg_a0, rs, 4); \
+ check_load_reg_pc(arm_reg_a1, rd, 4); \
+ generate_op_##name##_reg(arm_to_mips_reg[dest_rd], arm_to_mips_reg[rd], \
+ arm_to_mips_reg[rs]); \
+ check_store_reg_pc_thumb(dest_rd); \
+} \
+
+/*
+
+#define thumb_data_proc_hi(name) \
+{ \
+ thumb_decode_hireg_op(); \
+ check_load_reg_pc(arm_reg_a0, rs, 4); \
+ check_load_reg_pc(arm_reg_a1, rd, 4); \
+ generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rd], \
+ arm_to_mips_reg[rs]); \
+ check_store_reg_pc_thumb(rd); \
+} \
+
+*/
+
+#define thumb_data_proc_test_hi(name) \
+{ \
+ thumb_decode_hireg_op(); \
+ check_load_reg_pc(arm_reg_a0, rs, 4); \
+ check_load_reg_pc(arm_reg_a1, rd, 4); \
+ generate_op_##name##_reg(reg_temp, arm_to_mips_reg[rd], \
+ arm_to_mips_reg[rs]); \
+} \
+
+#define thumb_data_proc_mov_hi() \
+{ \
+ thumb_decode_hireg_op(); \
+ check_load_reg_pc(arm_reg_a0, rs, 4); \
+ mips_emit_addu(arm_to_mips_reg[rd], arm_to_mips_reg[rs], reg_zero); \
+ check_store_reg_pc_thumb(rd); \
+} \
+
+#define thumb_load_pc(_rd) \
+{ \
+ thumb_decode_imm(); \
+ generate_load_pc(arm_to_mips_reg[_rd], (((pc & ~2) + 4) + (imm * 4))); \
+} \
+
+#define thumb_load_sp(_rd) \
+{ \
+ thumb_decode_imm(); \
+ mips_emit_addiu(arm_to_mips_reg[_rd], reg_r13, (imm * 4)); \
+} \
+
+#define thumb_adjust_sp(value) \
+{ \
+ thumb_decode_add_sp(); \
+ mips_emit_addiu(reg_r13, reg_r13, (value)); \
+} \
+
+// Decode types: shift, alu_op
+// Operation types: lsl, lsr, asr, ror
+// Affects N/Z/C flags
+
+#define thumb_generate_shift_imm(name) \
+ if(check_generate_c_flag) \
+ { \
+ generate_shift_imm_##name##_flags(rd, rs, imm); \
+ } \
+ else \
+ { \
+ generate_shift_imm_##name##_no_flags(rd, rs, imm); \
+ } \
+ if(rs != rd) \
+ { \
+ mips_emit_addu(arm_to_mips_reg[rd], arm_to_mips_reg[rs], reg_zero); \
+ } \
+
+#define thumb_generate_shift_reg(name) \
+{ \
+ u32 original_rd = rd; \
+ if(check_generate_c_flag) \
+ { \
+ generate_shift_reg_##name##_flags(rd, rs); \
+ } \
+ else \
+ { \
+ generate_shift_reg_##name##_no_flags(rd, rs); \
+ } \
+ mips_emit_addu(arm_to_mips_reg[original_rd], reg_a0, reg_zero); \
+} \
+
+#define thumb_shift(decode_type, op_type, value_type) \
+{ \
+ thumb_decode_##decode_type(); \
+ thumb_generate_shift_##value_type(op_type); \
+ generate_op_logic_flags(arm_to_mips_reg[rd]); \
+} \
+
+// Operation types: imm, mem_reg, mem_imm
+
+#define thumb_access_memory_load(mem_type, reg_rd) \
+ cycle_count += 2; \
+ mips_emit_jal(mips_absolute_offset(execute_load_##mem_type)); \
+ generate_load_pc(reg_a1, (pc + 4)); \
+ generate_store_reg(reg_rv, reg_rd) \
+
+#define thumb_access_memory_store(mem_type, reg_rd) \
+ cycle_count++; \
+ generate_load_pc(reg_a2, (pc + 2)); \
+ mips_emit_jal(mips_absolute_offset(execute_store_##mem_type)); \
+ generate_load_reg(reg_a1, reg_rd) \
+
+#define thumb_access_memory_generate_address_pc_relative(offset, reg_rb, \
+ reg_ro) \
+ generate_load_pc(reg_a0, (offset)) \
+
+#define thumb_access_memory_generate_address_reg_imm(offset, reg_rb, reg_ro) \
+ mips_emit_addiu(reg_a0, arm_to_mips_reg[reg_rb], (offset)) \
+
+#define thumb_access_memory_generate_address_reg_reg(offset, reg_rb, reg_ro) \
+ mips_emit_addu(reg_a0, arm_to_mips_reg[reg_rb], arm_to_mips_reg[reg_ro]) \
+
+#define thumb_access_memory(access_type, op_type, reg_rd, reg_rb, reg_ro, \
+ address_type, offset, mem_type) \
+{ \
+ thumb_decode_##op_type(); \
+ thumb_access_memory_generate_address_##address_type(offset, reg_rb, \
+ reg_ro); \
+ thumb_access_memory_##access_type(mem_type, reg_rd); \
+} \
+
+
+#define thumb_block_address_preadjust_no(base_reg) \
+ mips_emit_addu(reg_a2, arm_to_mips_reg[base_reg], reg_zero) \
+
+#define thumb_block_address_preadjust_up(base_reg) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[base_reg], \
+ (bit_count[reg_list] * 4)); \
+ mips_emit_addu(arm_to_mips_reg[base_reg], reg_a2, reg_zero) \
+
+#define thumb_block_address_preadjust_down(base_reg) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[base_reg], \
+ -(bit_count[reg_list] * 4)); \
+ mips_emit_addu(arm_to_mips_reg[base_reg], reg_a2, reg_zero) \
+
+#define thumb_block_address_preadjust_push_lr(base_reg) \
+ mips_emit_addiu(reg_a2, arm_to_mips_reg[base_reg], \
+ -((bit_count[reg_list] + 1) * 4)); \
+ mips_emit_addu(arm_to_mips_reg[base_reg], reg_a2, reg_zero) \
+
+#define thumb_block_address_postadjust_no(base_reg) \
+
+#define thumb_block_address_postadjust_up(base_reg) \
+ mips_emit_addiu(arm_to_mips_reg[base_reg], reg_a2, \
+ (bit_count[reg_list] * 4)) \
+
+#define thumb_block_address_postadjust_down(base_reg) \
+ mips_emit_addiu(arm_to_mips_reg[base_reg], reg_a2, \
+ -(bit_count[reg_list] * 4)) \
+
+#define thumb_block_address_postadjust_pop_pc(base_reg) \
+ mips_emit_addiu(arm_to_mips_reg[base_reg], reg_a2, \
+ ((bit_count[reg_list] * 4) + 4)) \
+
+#define thumb_block_address_postadjust_push_lr(base_reg) \
+
+#define thumb_block_memory_load() \
+ generate_function_call_swap_delay(execute_aligned_load32); \
+ generate_store_reg(reg_rv, i) \
+
+#define thumb_block_memory_store() \
+ mips_emit_jal(mips_absolute_offset(execute_aligned_store32)); \
+ generate_load_reg(reg_a1, i) \
+
+#define thumb_block_memory_final_load() \
+ thumb_block_memory_load() \
+
+#define thumb_block_memory_final_store() \
+ generate_load_pc(reg_a2, (pc + 2)); \
+ mips_emit_jal(mips_absolute_offset(execute_store_u32)); \
+ generate_load_reg(reg_a1, i) \
+
+#define thumb_block_memory_final_no(access_type) \
+ thumb_block_memory_final_##access_type() \
+
+#define thumb_block_memory_final_up(access_type) \
+ thumb_block_memory_final_##access_type() \
+
+#define thumb_block_memory_final_down(access_type) \
+ thumb_block_memory_final_##access_type() \
+
+#define thumb_block_memory_final_push_lr(access_type) \
+ thumb_block_memory_##access_type() \
+
+#define thumb_block_memory_final_pop_pc(access_type) \
+ thumb_block_memory_##access_type() \
+
+#define thumb_block_memory_extra_no() \
+
+#define thumb_block_memory_extra_up() \
+
+#define thumb_block_memory_extra_down() \
+
+#define thumb_block_memory_extra_push_lr() \
+ mips_emit_addiu(reg_a0, reg_a2, (bit_count[reg_list] * 4)); \
+ mips_emit_jal(mips_absolute_offset(execute_aligned_store32)); \
+ generate_load_reg(reg_a1, REG_LR) \
+
+#define thumb_block_memory_extra_pop_pc() \
+ mips_emit_jal(mips_absolute_offset(execute_aligned_load32)); \
+ mips_emit_addiu(reg_a0, reg_a2, (bit_count[reg_list] * 4)); \
+ generate_mov(reg_a0, reg_rv); \
+ generate_indirect_branch_cycle_update(thumb) \
+
+#define thumb_block_memory_sp_load() \
+ mips_emit_lw(arm_to_mips_reg[i], reg_a1, offset) \
+
+#define thumb_block_memory_sp_store() \
+ mips_emit_sw(arm_to_mips_reg[i], reg_a1, offset) \
+
+#define thumb_block_memory_sp_extra_no() \
+
+#define thumb_block_memory_sp_extra_up() \
+
+#define thumb_block_memory_sp_extra_down() \
+
+#define thumb_block_memory_sp_extra_pop_pc() \
+ mips_emit_lw(reg_a0, reg_a1, (bit_count[reg_list] * 4)); \
+ generate_indirect_branch_cycle_update(thumb) \
+
+#define thumb_block_memory_sp_extra_push_lr() \
+ mips_emit_sw(reg_r14, reg_a1, (bit_count[reg_list] * 4)) \
+
+#define thumb_block_memory(access_type, pre_op, post_op, base_reg) \
+{ \
+ thumb_decode_rlist(); \
+ u32 i; \
+ u32 offset = 0; \
+ \
+ thumb_block_address_preadjust_##pre_op(base_reg); \
+ thumb_block_address_postadjust_##post_op(base_reg); \
+ \
+ if((base_reg == REG_SP) && iwram_stack_optimize) \
+ { \
+ mips_emit_andi(reg_a1, reg_a2, 0x7FFC); \
+ generate_load_imm(reg_a0, ((u32)(iwram + 0x8000))); \
+ generate_add(reg_a1, reg_a0); \
+ \
+ for(i = 0; i < 8; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ thumb_block_memory_sp_##access_type(); \
+ offset += 4; \
+ } \
+ } \
+ \
+ thumb_block_memory_sp_extra_##post_op(); \
+ } \
+ else \
+ { \
+ mips_emit_ins(reg_a2, reg_zero, 0, 2); \
+ \
+ for(i = 0; i < 8; i++) \
+ { \
+ if((reg_list >> i) & 0x01) \
+ { \
+ cycle_count++; \
+ mips_emit_addiu(reg_a0, reg_a2, offset); \
+ if(reg_list & ~((2 << i) - 1)) \
+ { \
+ thumb_block_memory_##access_type(); \
+ offset += 4; \
+ } \
+ else \
+ { \
+ thumb_block_memory_final_##post_op(access_type); \
+ break; \
+ } \
+ } \
+ } \
+ \
+ thumb_block_memory_extra_##post_op(); \
+ } \
+}
+
+
+
+#define thumb_conditional_branch(condition) \
+{ \
+ condition_check_type condition_check; \
+ generate_condition_##condition(); \
+ generate_branch_no_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ generate_branch_patch_conditional(backpatch_address, translation_ptr); \
+ block_exit_position++; \
+} \
+
+#define arm_conditional_block_header() \
+ generate_condition(); \
+
+#define arm_b() \
+ generate_branch() \
+
+#define arm_bl() \
+ generate_load_pc(reg_r14, (pc + 4)); \
+ generate_branch() \
+
+#define arm_bx() \
+ arm_decode_branchx(); \
+ generate_load_reg(reg_a0, rn); \
+ /*generate_load_pc(reg_a2, pc);*/ \
+ generate_indirect_branch_dual() \
+
+#define arm_swi() \
+ generate_swi_hle_handler((opcode >> 16) & 0xFF); \
+ generate_load_pc(reg_a0, (pc + 4)); \
+ generate_function_call_swap_delay(execute_swi); \
+ generate_branch() \
+
+#define thumb_b() \
+ generate_branch_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ block_exit_position++ \
+
+#define thumb_bl() \
+ generate_load_pc(reg_r14, ((pc + 2) | 0x01)); \
+ generate_branch_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ block_exit_position++ \
+
+#define thumb_blh() \
+{ \
+ thumb_decode_branch(); \
+ generate_alu_imm(addiu, addu, reg_a0, reg_r14, (offset * 2)); \
+ generate_load_pc(reg_r14, ((pc + 2) | 0x01)); \
+ generate_indirect_branch_cycle_update(dual); \
+ break; \
+} \
+
+#define thumb_bx() \
+{ \
+ thumb_decode_hireg_op(); \
+ generate_load_reg_pc(reg_a0, rs, 4); \
+ /*generate_load_pc(reg_a2, pc);*/ \
+ generate_indirect_branch_cycle_update(dual); \
+} \
+
+#define thumb_swi() \
+ generate_swi_hle_handler(opcode & 0xFF); \
+ generate_load_pc(reg_a0, (pc + 2)); \
+ generate_function_call_swap_delay(execute_swi); \
+ generate_branch_cycle_update( \
+ block_exits[block_exit_position].branch_source, \
+ block_exits[block_exit_position].branch_target); \
+ block_exit_position++ \
+
+u8 swi_hle_handle[256] =
+{
+ 0x0, // SWI 0: SoftReset
+ 0x0, // SWI 1: RegisterRAMReset
+ 0x0, // SWI 2: Halt
+ 0x0, // SWI 3: Stop/Sleep
+ 0x0, // SWI 4: IntrWait
+ 0x0, // SWI 5: VBlankIntrWait
+ 0x1, // SWI 6: Div
+ 0x0, // SWI 7: DivArm
+ 0x0, // SWI 8: Sqrt
+ 0x0, // SWI 9: ArcTan
+ 0x0, // SWI A: ArcTan2
+ 0x0, // SWI B: CpuSet
+ 0x0, // SWI C: CpuFastSet
+ 0x0, // SWI D: GetBIOSCheckSum
+ 0x0, // SWI E: BgAffineSet
+ 0x0, // SWI F: ObjAffineSet
+ 0x0, // SWI 10: BitUnpack
+ 0x0, // SWI 11: LZ77UnCompWram
+ 0x0, // SWI 12: LZ77UnCompVram
+ 0x0, // SWI 13: HuffUnComp
+ 0x0, // SWI 14: RLUnCompWram
+ 0x0, // SWI 15: RLUnCompVram
+ 0x0, // SWI 16: Diff8bitUnFilterWram
+ 0x0, // SWI 17: Diff8bitUnFilterVram
+ 0x0, // SWI 18: Diff16bitUnFilter
+ 0x0, // SWI 19: SoundBias
+ 0x0, // SWI 1A: SoundDriverInit
+ 0x0, // SWI 1B: SoundDriverMode
+ 0x0, // SWI 1C: SoundDriverMain
+ 0x0, // SWI 1D: SoundDriverVSync
+ 0x0, // SWI 1E: SoundChannelClear
+ 0x0, // SWI 1F: MidiKey2Freq
+ 0x0, // SWI 20: SoundWhatever0
+ 0x0, // SWI 21: SoundWhatever1
+ 0x0, // SWI 22: SoundWhatever2
+ 0x0, // SWI 23: SoundWhatever3
+ 0x0, // SWI 24: SoundWhatever4
+ 0x0, // SWI 25: MultiBoot
+ 0x0, // SWI 26: HardReset
+ 0x0, // SWI 27: CustomHalt
+ 0x0, // SWI 28: SoundDriverVSyncOff
+ 0x0, // SWI 29: SoundDriverVSyncOn
+ 0x0 // SWI 2A: SoundGetJumpList
+};
+
+#define generate_swi_hle_handler(_swi_number) \
+{ \
+ u32 swi_number = _swi_number; \
+ if(swi_hle_handle[swi_number]) \
+ { \
+ /* Div */ \
+ if(swi_number == 0x06) \
+ { \
+ mips_emit_div(reg_r0, reg_r1); \
+ mips_emit_mflo(reg_r0); \
+ mips_emit_mfhi(reg_r1); \
+ mips_emit_sra(reg_a0, reg_r0, 31); \
+ mips_emit_xor(reg_r3, reg_r0, reg_a0); \
+ mips_emit_subu(reg_r3, reg_r3, reg_a0); \
+ } \
+ break; \
+ } \
+} \
+
+#define generate_translation_gate(type) \
+ generate_load_pc(reg_a0, pc); \
+ generate_indirect_branch_no_cycle_update(type) \
+
+#define generate_step_debug() \
+ generate_load_imm(reg_a0, pc); \
+ generate_function_call(step_debug_mips) \
+
+#define generate_update_pc_reg() \
+ generate_load_pc(reg_a0, pc); \
+ mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \
+
+#endif
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
new file mode 100644
index 0000000..65d5e9d
--- /dev/null
+++ b/psp/mips_stub.S
@@ -0,0 +1,3427 @@
+# gameplaySP
+#
+# Copyright (C) 2006 Exophase <exophase@gmail.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+.align 4
+
+.global mips_update_gba
+.global mips_indirect_branch_arm
+.global mips_indirect_branch_thumb
+.global mips_indirect_branch_dual
+.global execute_load_u8
+.global execute_load_u16
+.global execute_load_u32
+.global execute_load_s8
+.global execute_load_s16
+.global execute_store_u8
+.global execute_store_u16
+.global execute_store_u32
+.global execute_aligned_load32
+.global execute_aligned_store32
+.global execute_read_cpsr
+.global execute_read_spsr
+.global execute_swi
+.global execute_spsr_restore
+.global execute_store_cpsr
+.global execute_store_spsr
+.global execute_lsl_flags_reg
+.global execute_lsr_flags_reg
+.global execute_asr_flags_reg
+.global execute_ror_flags_reg
+.global execute_arm_translate
+.global invalidate_icache_region
+.global invalidate_all_cache
+.global step_debug_mips
+.global reg_check
+
+.global memory_map_read
+.global memory_map_write
+.global reg
+
+.extern reg
+.extern spsr
+
+# MIPS register layout:
+
+# $0 - constant zero
+# $1 - temporary
+# $2 - temporary / return value
+# $3 - ARM r0 (not saved)
+# $4 - temporary / function argument 0
+# $5 - temporary / function argument 1
+# $6 - temporary / function argument 2
+# $7 - ARM r1 (not saved)
+# $8 - ARM r2 (not saved)
+# $9 - ARM r3 (not saved)
+# $10 - ARM r4 (not saved)
+# $11 - ARM r5 (not saved)
+# $12 - ARM r6 (not saved)
+# $13 - ARM r7 (not saved)
+# $14 - ARM r8 (not saved)
+# $15 - ARM r9 (not saved)
+# $16 - ARM machine state pointer (saved)
+# $17 - cycle counter (saved)
+# $18 - ARM r10 (saved)
+# $19 - block start address (roughly r15) (saved)
+# $20 - ARM negative register (saved)
+# $21 - ARM zero register (saved)
+# $22 - ARM carry register (saved)
+# $23 - ARM overflow register (saved)
+# $24 - ARM r11 (not saved)
+# $25 - ARM r12 (not saved)
+# $26 - kernel temporary 0
+# $27 - kernel temporary 1
+# $28 - ARM r13 (saved)
+# $29 - stack pointer
+# $30 - ARM r14 (saved)
+# $31 - return address
+
+.equ REG_R0, (0 * 4)
+.equ REG_R1, (1 * 4)
+.equ REG_R2, (2 * 4)
+.equ REG_R3, (3 * 4)
+.equ REG_R4, (4 * 4)
+.equ REG_R5, (5 * 4)
+.equ REG_R6, (6 * 4)
+.equ REG_R7, (7 * 4)
+.equ REG_R8, (8 * 4)
+.equ REG_R9, (9 * 4)
+.equ REG_R10, (10 * 4)
+.equ REG_R11, (11 * 4)
+.equ REG_R12, (12 * 4)
+.equ REG_R13, (13 * 4)
+.equ REG_R14, (14 * 4)
+.equ REG_LR, (14 * 4)
+.equ REG_PC, (15 * 4)
+.equ REG_N_FLAG, (16 * 4)
+.equ REG_Z_FLAG, (17 * 4)
+.equ REG_C_FLAG, (18 * 4)
+.equ REG_V_FLAG, (19 * 4)
+.equ REG_CPSR, (20 * 4)
+.equ REG_SAVE, (21 * 4)
+.equ REG_SAVE2, (22 * 4)
+.equ REG_SAVE3, (23 * 4)
+.equ CPU_MODE, (29 * 4)
+.equ CPU_HALT_STATE, (30 * 4)
+.equ CHANGED_PC_STATUS, (31 * 4)
+.equ GP_SAVE, (32 * 4)
+
+.equ SUPERVISOR_LR, (reg_mode + (3 * (7 * 4)) + (6 * 4))
+.equ SUPERVISOR_SPSR, (spsr + (3 * 4))
+
+.set noat
+.set noreorder
+
+# make sure $16 has the register base for these macros
+
+.macro collapse_flag flag_reg, shift
+ ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
+.endm
+
+.macro collapse_flags
+ lw $2, REG_CPSR($16) # load CPSR
+ andi $2, $2, 0xFF # isolate lower 8bits
+ collapse_flag 20, 31 # store flags
+ collapse_flag 21, 30
+ collapse_flag 22, 29
+ collapse_flag 23, 28
+ sw $2, REG_CPSR($16) # store CPSR
+.endm
+
+.macro extract_flag shift, flag_reg
+ ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
+.endm
+
+.macro extract_flags_body # extract flags from $1
+ extract_flag 31, 20 # load flags
+ extract_flag 30, 21
+ extract_flag 29, 22
+ extract_flag 28, 23
+.endm
+
+.macro extract_flags
+ lw $1, REG_CPSR($16) # load CPSR
+ extract_flags_body
+.endm
+
+.macro save_registers
+ sw $3, REG_R0($16)
+ sw $7, REG_R1($16)
+ sw $8, REG_R2($16)
+ sw $9, REG_R3($16)
+ sw $10, REG_R4($16)
+ sw $11, REG_R5($16)
+ sw $12, REG_R6($16)
+ sw $13, REG_R7($16)
+ sw $14, REG_R8($16)
+ sw $15, REG_R9($16)
+ sw $24, REG_R11($16)
+ sw $25, REG_R12($16)
+
+ sw $18, REG_R10($16)
+ sw $28, REG_R13($16)
+ sw $30, REG_R14($16)
+
+ lw $28, GP_SAVE($16)
+.endm
+
+.macro restore_registers
+ lw $3, REG_R0($16)
+ lw $7, REG_R1($16)
+ lw $8, REG_R2($16)
+ lw $9, REG_R3($16)
+ lw $10, REG_R4($16)
+ lw $11, REG_R5($16)
+ lw $12, REG_R6($16)
+ lw $13, REG_R7($16)
+ lw $14, REG_R8($16)
+ lw $15, REG_R9($16)
+ lw $24, REG_R11($16)
+ lw $25, REG_R12($16)
+
+ lw $18, REG_R10($16)
+ lw $28, REG_R13($16)
+ lw $30, REG_R14($16)
+.endm
+
+# Process a hardware event. Since an interrupt might be
+# raised we have to check if the PC has changed.
+
+# $4: next address
+# $16: register base
+# $17: cycle counter
+
+.balign 64
+
+mips_update_gba:
+ sw $4, REG_PC($16) # current PC = $4
+
+ addiu $sp, $sp, -4 # make room on the stack
+ sw $ra,($sp) # save return address
+ collapse_flags # update cpsr
+ save_registers # save registers
+ jal update_gba # process the next event
+ sw $0, CHANGED_PC_STATUS($16)
+
+ lw $ra, ($sp) # restore return address
+ addiu $sp, $sp, 4 # fix stack
+
+ lw $1, CHANGED_PC_STATUS($16)
+ bne $1, $0, lookup_pc
+ addu $17, $2, $0 # $17 = new cycle count (delay slot)
+
+ restore_registers
+
+ jr $ra # if not, go back to caller
+ nop
+
+# Perform an indirect branch.
+
+# $4: GBA address to branch to
+
+mips_indirect_branch_arm:
+ save_registers
+ jal block_lookup_address_arm # $2 = MIPS address to jump to
+ nop
+ restore_registers
+ jr $2 # jump to it
+ nop
+
+mips_indirect_branch_thumb:
+ save_registers
+ jal block_lookup_address_thumb # $2 = MIPS address to jump to
+ nop
+ restore_registers
+ jr $2 # jump to it
+ nop
+
+mips_indirect_branch_dual:
+ save_registers
+ jal block_lookup_address_dual # $2 = MIPS address to jump to
+ nop
+ restore_registers
+ jr $2 # jump to it
+ nop
+
+
+# $4: address to write to
+# $5: current PC
+
+# Will patch the return address with a call to the correct handler as
+# listed in the given table.
+
+# Value will be set to force_open if it's open
+
+.macro patch_handler ftable, force_open
+ srl $1, $4, 24 # $1 = address region
+ sltu $2, $1, 0x0F # check if the value is open
+ bne $2, $0, 1f
+ sll $1, $1, 2 # make address word indexed (delay)
+
+ addiu $1, $0, (\force_open * 4)
+
+1:
+ lui $2, %hi(\ftable)
+ addu $2, $2, $1
+ lw $2, %lo(\ftable)($2) # new function handler is in $2
+ srl $2, $2, 2 # remove lower two bits
+
+ lui $1, %hi(3 << 26) # $1 = 3 (JAL opcode)
+ ins $1, $2, 0, 26 # insert offset into jal
+
+ addiu $ra, $ra, -8 # rewind return address to function call
+ sw $1, ($ra) # modify to call new handler
+
+ cache 0x1a, ($ra) # hit writeback dcache line
+ cache 0x08, ($ra) # hit invalidate icache line
+
+ jr $ra # return
+ nop # wary of putting cache here
+.endm
+
+
+# Like the above, but will use the table of the proper alignment,
+# The tables should be ordered by alignment
+
+.macro patch_handler_align ftable, alignment
+ srl $1, $4, 24 # $1 = address region
+ sltu $2, $1, 0x0F # check if the value is open
+ bne $2, $0, 1f
+ sll $1, $1, 2 # make address word indexed (delay)
+
+ addiu $1, $0, 4 # force address to 0x1 (open)
+
+1:
+ ins $1, $4, 6, \alignment # place alignment bits into offset
+ lui $2, %hi(\ftable)
+
+ addu $2, $2, $1
+ lw $2, %lo(\ftable)($2) # new function handler is in $2
+
+ srl $2, $2, 2 # remove lower two bits
+
+ lui $1, %hi(3 << 26) # $1 = 3 (JAL opcode)
+ ins $1, $2, 0, 26 # insert offset into jal
+
+ addiu $ra, $ra, -8 # rewind return address to function call
+ sw $1, ($ra) # modify to call new handler
+
+ cache 0x1a, ($ra) # hit writeback dcache line
+ cache 0x08, ($ra) # hit invalidate icache line
+
+ jr $ra # return
+ nop # wary of putting cache here
+.endm
+
+
+.macro region_check region, patch_handler
+ srl $1, $4, 24 # check upper 8bits of address
+ xor $1, $1, \region # see if it is the given region
+ bne $1, $0, \patch_handler # if not repatch/try again
+.endm
+
+.macro region_check_open patch_handler
+ srl $1, $4, 24 # check upper 8bits of address
+ sltiu $2, $1, 0x0F # true if it is a low address
+ addiu $1, $1, -1 # non-zero if it is not a low open
+ sltu $1, $0, $1 # true if lower bits != 1
+ and $1, $1, $2 # true if low address and not open
+ bne $1, $0, \patch_handler # if above is true, patch
+.endm
+
+
+.macro region_check_align region, align_bits, alignment, patch_handler
+ srl $1, $4, 24 # check upper 8bits of address
+ ins $1, $4, 8, \align_bits # look at lower bits of address too
+ # See if it is the given region and alignment
+ xori $1, $1, (\region | (\alignment << 8))
+ bne $1, $0, \patch_handler # if not repatch/try again
+.endm
+
+.macro region_check_open_align align_bits, alignment, patch_handler
+ srl $1, $4, 24 # check upper 8bits of address
+ sltiu $2, $1, 0x0F # true if it is a low address
+ addiu $1, $1, -1 # non-zero if it is not a low open
+ sltu $1, $0, $1 # true if $1 != 0
+ and $1, $1, $2 # true if low address and not open
+ ext $2, $4, 0, \align_bits # $2 = low bits of 4
+ xori $2, $2, \alignment # true if alignment doesn't match
+ or $1, $1, $2 # align failure will trigger too
+ bne $1, $0, \patch_handler # if above is true, patch
+.endm
+
+
+.macro ignore_region region, patch_handler
+ region_check \region, \patch_handler
+ nop
+ jr $ra
+ nop
+.endm
+
+.macro ignore_high patch_handler
+ srl $1, $4, 24 # check upper 8bits of address
+ sltiu $1, $1, 0x0F # see if it is not high
+ bne $1, $0, \patch_handler # if not repatch/try again
+ nop
+ jr $ra
+ nop
+.endm
+
+
+.macro translate_region_core base, size
+ lui $2, %hi(\base) # generate upper address
+ andi $4, $4, \size # generate offset
+ addu $2, $2, $4 # add ptr upper and offset
+.endm
+
+.macro translate_region region, patch_handler, base, size
+ region_check \region, \patch_handler
+ translate_region_core \base, \size
+.endm
+
+# I refuse to have > 80 char lines, and GAS has a problem with the param
+# list spilling over (grumble)
+
+.macro translate_region_align region, a_b, alignment, p_h, base, size
+ region_check_align \region, \a_b, \alignment, \p_h
+ translate_region_core \base, \size
+.endm
+
+
+.macro translate_region_ewram_core mask
+ lui $2, %hi(ewram + 0x8000) # generate upper address (delay)
+ andi $1, $4, \mask # generate 15bit offset
+ ext $4, $4, 15, 3 # isolate top 3 bits of offset
+ ins $1, $4, 16, 3 # reinsert into top 4 bits
+ addu $2, $2, $1
+.endm
+
+.macro translate_region_ewram patch_handler
+ region_check 2, \patch_handler
+ translate_region_ewram_core 0x7FFF
+.endm
+
+.macro translate_region_ewram_load_align align_bits, alignment, patch_handler
+ region_check_align 2, \align_bits, \alignment, \patch_handler
+ translate_region_ewram_core 0x7FFF
+.endm
+
+.macro translate_region_ewram_load_align16 align_bits, alignment, patch_handler
+ region_check_align 2, \align_bits, \alignment, \patch_handler
+ translate_region_ewram_core 0x7FFE
+.endm
+
+.macro translate_region_ewram_load_align32 align_bits, alignment, patch_handler
+ region_check_align 2, \align_bits, \alignment, \patch_handler
+ translate_region_ewram_core 0x7FFC
+.endm
+
+.macro translate_region_ewram_store_align16 patch_handler
+ region_check 2, \patch_handler
+ translate_region_ewram_core 0x7FFE
+.endm
+
+.macro translate_region_ewram_store_align32 patch_handler
+ region_check 2, \patch_handler
+ translate_region_ewram_core 0x7FFC
+.endm
+
+
+.macro translate_region_vram_core
+ addiu $2, $2, -3 # see if it's 3
+ ext $4, $4, 0, 17 # generate 17bit offset
+ bne $2, $0, 1f
+ lui $1, %hi(vram) # start loading vram address (delay)
+
+ addiu $4, $4, -0x8000 # move address into VRAM region
+
+1:
+ addu $2, $1, $4 # $2 = (hi)vram + address
+.endm
+
+.macro translate_region_vram patch_handler
+ region_check 6, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ translate_region_vram_core
+.endm
+
+.macro translate_region_vram_load_align align_bits, alignment, patch_handler
+ region_check_align 6, \align_bits, \alignment, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ translate_region_vram_core
+.endm
+
+.macro translate_region_vram_load_align16 align_bits, alignment, patch_handler
+ region_check_align 6, \align_bits, \alignment, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ ins $4, $0, 0, 1 # mask out lower bit of address
+ translate_region_vram_core
+.endm
+
+.macro translate_region_vram_load_align32 align_bits, alignment, patch_handler
+ region_check_align 6, \align_bits, \alignment, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ ins $4, $0, 0, 2 # mask out lower two bits of address
+ translate_region_vram_core
+.endm
+
+.macro translate_region_vram_store_align16 patch_handler
+ region_check 6, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ ins $4, $0, 0, 1 # mask out lower bit of address
+ translate_region_vram_core
+.endm
+
+.macro translate_region_vram_store_align32 patch_handler
+ region_check 6, \patch_handler
+ ext $2, $4, 15, 2 # $2 = bits 15 and 16 of address (delay)
+ ins $4, $0, 0, 2 # mask out lower two bits of address
+ translate_region_vram_core
+.endm
+
+
+
+.macro translate_region_gamepak_core mask
+ srl $2, $4, 15 # $2 = page number of address (delay)
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $2 = memory_map_read[address >> 15]
+ lw $2, -32768($2)
+ bne $2, $0, 1f # if it's non-NULL continue
+ andi $1, $4, \mask # $1 = low 15bits of address (delay slot)
+
+ sw $ra, REG_SAVE2($16) # save return address
+
+ save_registers # save the registers
+ ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF
+
+ jal load_gamepak_page # get page in $2
+ sw $1, REG_SAVE($16) # save offset (delay)
+ lw $1, REG_SAVE($16) # restore offset (delay)
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE2($16) # restore return address
+
+1:
+ addu $2, $2, $1 # add the memory map offset
+.endm
+
+.macro translate_region_gamepak region, patch_handler
+ region_check \region, \patch_handler
+ translate_region_gamepak_core 0x7FFF
+.endm
+
+.macro translate_region_gamepak_align region, a_b, alignment, patch_handler
+ region_check_align \region, \a_b, \alignment, \patch_handler
+ translate_region_gamepak_core 0x7FFF
+.endm
+
+.macro translate_region_gamepak_align16 region, a_b, alignment, patch_handler
+ region_check_align \region, \a_b, \alignment, \patch_handler
+ translate_region_gamepak_core 0x7FFE
+.endm
+
+.macro translate_region_gamepak_align32 region, a_b, alignment, patch_handler
+ region_check_align \region, \a_b, \alignment, \patch_handler
+ translate_region_gamepak_core 0x7FFC
+.endm
+
+
+.macro translate_region_gamepak_a region, patch_handler
+ region_check \region, \patch_handler
+ srl $2, $4, 15 # $2 = page number of address (delay)
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $2 = memory_map_read[address >> 15]
+ lw $2, -32768($2)
+ bne $2, $0, 1f # if it's non-NULL continue
+ andi $1, $4, 0x7FFF # $1 = low 15bits of address (delay slot)
+
+ sw $ra, REG_SAVE2($16) # save return address
+ sw $6, REG_SAVE3($16) # save a2
+
+ save_registers # save the registers
+ ext $4, $4, 15, 10 # $4 = (address >> 15) & 0x3FF
+
+ jal load_gamepak_page # get page in $2
+ sw $1, REG_SAVE($16) # save offset (delay)
+ lw $1, REG_SAVE($16) # restore offset (delay)
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE2($16) # restore return address
+ lw $6, REG_SAVE3($16) # restore a2
+
+1:
+ addu $2, $2, $1 # add the memory map offset
+.endm
+
+
+.macro eeprom_load_a patch_handler
+ region_check 0xD, \patch_handler
+
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ sw $6, REG_SAVE2($16) # save a2
+
+ save_registers # save the registers
+
+ jal read_eeprom # get eeprom value in $2
+ nop
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ lw $6, REG_SAVE2($16) # restore a2
+.endm
+
+
+.macro eeprom_load_core
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+
+ save_registers # save the registers
+
+ jal read_eeprom # get eeprom value in $2
+ nop
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ nop
+.endm
+
+.macro eeprom_load patch_handler
+ region_check 0xD, \patch_handler
+ eeprom_load_core
+.endm
+
+.macro eeprom_load_align align_bits, alignment, patch_handler
+ region_check_align 0xD, \align_bits, \alignment, \patch_handler
+ eeprom_load_core
+.endm
+
+.macro eeprom_load_align16 align_bits, alignment, patch_handler
+ eeprom_load_align \align_bits, \alignment, \patch_handler
+.endm
+
+.macro eeprom_load_align32 align_bits, alignment, patch_handler
+ eeprom_load_align \align_bits, \alignment, \patch_handler
+.endm
+
+
+.macro backup_load_core
+ save_registers # save the registers
+
+ jal read_backup # get backup value in $2
+ ext $4, $4, 0, 16 # address &= 0xFFFF
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+.endm
+
+.macro backup_load_a patch_handler
+ region_check 0xE, \patch_handler
+ sw $ra, REG_SAVE($16) # save return address (delay)
+ sw $6, REG_SAVE2($16) # save a2
+
+ save_registers # save the registers
+
+ jal read_backup # get backup value in $2
+ ext $4, $4, 0, 16 # address &= 0xFFFF
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ lw $6, REG_SAVE2($16) # restore a2
+.endm
+
+
+.macro backup_load patch_handler
+ region_check 0xE, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ backup_load_core
+.endm
+
+.macro backup_load_align align_bits, alignment, patch_handler
+ region_check_align 0xE, \align_bits, \alignment, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ backup_load_core
+.endm
+
+.macro backup_load_align16 align_bits, alignment, patch_handler
+ region_check_align 0xE, \align_bits, \alignment, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ ins $4, $0, 0, 1 # mask out lower bit
+ backup_load_core
+.endm
+
+.macro backup_load_align32 align_bits, alignment, patch_handler
+ region_check_align 0xE, \align_bits, \alignment, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ ins $4, $0, 0, 2 # mask out lower two bits
+ backup_load_core
+.endm
+
+
+.macro open_load8_core
+ lw $2, REG_CPSR($16) # $2 = CPSR (delay)
+ andi $2, $2, 0x20 # test T bit
+ beq $2, $0, 1f # branch if ARM mode
+ andi $4, $4, 0x03 # isolate lower 3bits from address (delay)
+
+ andi $4, $4, 0x01 # in Thumb mode, isolate one more bit
+
+1:
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ save_registers # save the registers
+
+ jal read_memory8 # get instruction at PC
+ addu $4, $5, $4 # a0 = PC + low bits of address
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+.endm
+
+.macro open_load8 patch_handler
+ region_check_open \patch_handler
+ open_load8_core
+.endm
+
+
+
+.macro open_load16_core
+ lw $2, REG_CPSR($16) # $2 = CPSR (delay)
+ andi $2, $2, 0x20 # test T bit
+ beq $2, $0, 1f # branch if ARM mode
+ andi $4, $4, 0x02 # isolate bit 1 from address (delay)
+
+ addu $4, $0, $0 # zero out address bit
+
+1:
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+ save_registers # save the registers
+
+ jal read_memory16 # get instruction at PC
+ addu $4, $5, $4 # a0 = PC + low bits of address
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+.endm
+
+.macro open_load16_align align_bits, alignment, patch_handler
+ region_check_open_align \align_bits, \alignment, \patch_handler
+ open_load16_core
+.endm
+
+.macro open_load16_align16 align_bits, alignment, patch_handler
+ open_load16_align \align_bits, \alignment, \patch_handler
+.endm
+
+
+
+.macro open_load32_core
+ lw $2, REG_CPSR($16) # $2 = CPSR (delay)
+ andi $2, $2, 0x20 # test T bit
+
+ save_registers # save the registers
+
+ beq $2, $0, 1f # branch if ARM mode
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+
+ jal read_memory16 # get instruction at PC
+ addu $4, $5, $0 # a0 = PC
+
+ j 2f
+ ins $2, $2, 16, 16 # result = (result << 16) | result (delay)
+
+1:
+ jal read_memory32 # get instruction at PC
+ addu $4, $5, $4 # a0 = PC
+
+2: # join point
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+.endm
+
+.macro open_load32_a patch_handler
+ region_check_open \patch_handler
+
+ lw $2, REG_CPSR($16) # $2 = CPSR (delay)
+ andi $2, $2, 0x20 # test T bit
+
+ save_registers # save the registers
+ sw $6, REG_SAVE2($16) # save a2
+
+ beq $2, $0, 1f # branch if ARM mode
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+
+ jal read_memory16 # get instruction at PC
+ addu $4, $5, $0 # a0 = PC
+
+ j 2f
+ ins $2, $2, 16, 16 # result = (result << 16) | result (delay)
+
+1:
+ jal read_memory32 # get instruction at PC
+ addu $4, $5, $4 # a0 = PC
+
+2:
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ lw $6, REG_SAVE2($16) # restore a2 (delay)
+.endm
+
+.macro open_load32_align align_bits, alignment, patch_handler
+ region_check_open_align \align_bits, \alignment, \patch_handler
+ open_load32_core
+.endm
+
+.macro open_load32_align32 align_bits, alignment, patch_handler
+ open_load32_align \align_bits, \alignment, \patch_handler
+.endm
+
+
+.macro store_function function, region, patch_handler, mask
+ region_check \region, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+
+ save_registers # save the registers
+
+ jal \function # store value out
+ andi $4, $4, \mask # mask address
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ nop
+.endm
+
+
+.macro store_function_a function, region, patch_handler, mask
+ region_check \region, \patch_handler
+ sw $ra, REG_SAVE($16) # save the return address (delay)
+
+ save_registers # save the registers
+
+ jal \function # store value out
+ andi $4, $4, \mask # mask address
+
+ restore_registers # restore the other registers
+
+ lw $ra, REG_SAVE($16) # restore return address
+ jr $ra # return
+ nop
+.endm
+
+
+
+.macro load_u8 base
+ jr $ra # return
+ lbu $2, %lo(\base)($2) # return base[offset]
+.endm
+
+.macro load_s8 base
+ jr $ra # return
+ lb $2, %lo(\base)($2) # return base[offset]
+.endm
+
+.macro load_u16 base
+ jr $ra # return
+ lhu $2, %lo(\base)($2) # return base[offset]
+.endm
+
+.macro load_s16 base
+ jr $ra # return
+ lh $2, %lo(\base)($2) # return base[offset]
+.endm
+
+.macro load_u32 base
+ jr $ra # return
+ lw $2, %lo(\base)($2) # return base[offset]
+.endm
+
+
+# 16bit unaligned load will always have a 1 in the LSB;
+# should have already been taken care of in indexing.
+
+.macro load_u16_unaligned base
+ lhu $2, %lo(\base)($2) # load base[offset]
+ jr $ra # return
+ ror $2, $2, 8 # rotate value by 8bits
+.endm
+
+# This is technically the same as load_s8, but kept to
+# avoid confusion.
+
+.macro load_s16_unaligned base
+ jr $ra # return
+ lb $2, %lo(\base)($2) # return base[offset]
+.endm
+
+# Unalignment must be known statically (use the tables to
+# patch correctly)
+
+.macro load_u32_unaligned base, alignment
+ lw $2, %lo(\base)($2) # load base[offset]
+ jr $ra # return
+ ror $2, $2, (\alignment * 8) # rotate value by 8bits
+.endm
+
+
+.macro store_u8 base
+ jr $ra # return
+ sb $5, %lo(\base)($2) # store value at base[offset]
+.endm
+
+.macro store_u16 base
+ jr $ra # return
+ sh $5, %lo(\base)($2) # store value at base[offset]
+.endm
+
+.macro store_u32 base
+ jr $ra # return
+ sw $5, %lo(\base)($2) # store value at base[offset]
+.endm
+
+
+# Store the value double mirrored (u16)
+
+.macro store_u8_double base
+ ins $5, $5, 8, 8 # value = (value << 8) | value
+ jr $ra # return
+ sh $5, %lo(\base)($2) # store value at base[offset]
+.endm
+
+
+# Store the values and check if it overwrote code there
+
+.macro store_u8_smc base
+ addiu $2, $2, %lo(\base) # offset the address
+ lb $1, -32768($2) # load the SMC status
+ bne $1, $0, smc_write # is there code there?
+ sb $5, ($2) # store value at base[offset] (delay)
+ jr $ra # return
+ nop
+.endm
+
+.macro store_u16_smc base
+ addiu $2, $2, %lo(\base) # offset the address
+ lh $1, -32768($2) # load the SMC status
+ bne $1, $0, smc_write # is there code there?
+ sh $5, ($2) # store value at base[offset] (delay)
+ jr $ra # return
+ nop
+.endm
+
+.macro store_u32_smc base
+ addiu $2, $2, %lo(\base) # offset the address
+ lw $1, -32768($2) # load the SMC status
+ bne $1, $0, smc_write # is there code there?
+ sw $5, ($2) # store value at base[offset] (delay)
+ jr $ra # return
+ nop
+.endm
+
+
+
+# Unsigned 8bit load handlers
+
+execute_load_bios_u8:
+ region_check 0, patch_load_u8
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_u8 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ins $2, $4, 0, 2 # lower 2 bits address contributes
+ load_u8 bios_read_protect
+
+2:
+ open_load8_core
+ nop
+
+
+execute_load_ewram_u8:
+ translate_region_ewram patch_load_u8
+ load_u8 (ewram + 0x8000)
+
+# Put the generic address over the handler you want to be default
+# IWRAM is typically the most frequently read and written to.
+
+execute_load_u8:
+execute_load_iwram_u8:
+ translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF
+ load_u8 (iwram + 0x8000)
+
+execute_load_io_u8:
+ translate_region 4, patch_load_u8, io_registers, 0x3FF
+ load_u8 io_registers
+
+execute_load_palette_u8:
+ translate_region 5, patch_load_u8, palette_ram, 0x3FF
+ load_u8 palette_ram
+
+execute_load_vram_u8:
+ translate_region_vram patch_load_u8
+ load_u8 vram
+
+execute_load_oam_u8:
+ translate_region 7, patch_load_u8, oam_ram, 0x3FF
+ load_u8 oam_ram
+
+execute_load_gamepak8_u8:
+ translate_region_gamepak 8, patch_load_u8
+ load_u8 0
+
+execute_load_gamepak9_u8:
+ translate_region_gamepak 9, patch_load_u8
+ load_u8 0
+
+execute_load_gamepakA_u8:
+ translate_region_gamepak 10, patch_load_u8
+ load_u8 0
+
+execute_load_gamepakB_u8:
+ translate_region_gamepak 11, patch_load_u8
+ load_u8 0
+
+execute_load_gamepakC_u8:
+ translate_region_gamepak 12, patch_load_u8
+ load_u8 0
+
+execute_load_eeprom_u8:
+ eeprom_load patch_load_u8
+
+execute_load_backup_u8:
+ backup_load patch_load_u8
+ nop
+
+execute_load_open_u8:
+ open_load8 patch_load_u8
+ nop
+
+load_u8_ftable:
+ .long execute_load_bios_u8 # 0x00 BIOS
+ .long execute_load_open_u8 # 0x01 open address
+ .long execute_load_ewram_u8 # 0x02 EWRAM
+ .long execute_load_iwram_u8 # 0x03 IWRAM
+ .long execute_load_io_u8 # 0x04 I/O registers
+ .long execute_load_palette_u8 # 0x05 Palette RAM
+ .long execute_load_vram_u8 # 0x06 VRAM
+ .long execute_load_oam_u8 # 0x07 OAM RAM
+ .long execute_load_gamepak8_u8 # 0x08 gamepak
+ .long execute_load_gamepak9_u8 # 0x09 gamepak
+ .long execute_load_gamepakA_u8 # 0x0A gamepak
+ .long execute_load_gamepakB_u8 # 0x0B gamepak
+ .long execute_load_gamepakC_u8 # 0x0C gamepak
+ .long execute_load_eeprom_u8 # 0x0D gamepak/eeprom
+ .long execute_load_backup_u8 # 0x0E Flash ROM/SRAM
+ .long execute_load_open_u8 # 0x0F open address
+
+patch_load_u8:
+ patch_handler load_u8_ftable, 0x01
+
+
+
+# Signed 8bit load handlers
+
+execute_load_bios_s8:
+ region_check 0, patch_load_s8
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_s8 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ins $2, $4, 0, 2 # lower 2 bits contribute
+ load_s8 bios_read_protect
+
+2:
+ open_load8_core
+ seb $2, $2
+
+
+execute_load_ewram_s8:
+ translate_region_ewram patch_load_s8
+ load_s8 (ewram + 0x8000)
+
+execute_load_s8:
+execute_load_iwram_s8:
+ translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF
+ load_s8 (iwram + 0x8000)
+
+execute_load_io_s8:
+ translate_region 4, patch_load_s8, io_registers, 0x3FF
+ load_s8 io_registers
+
+execute_load_palette_s8:
+ translate_region 5, patch_load_s8, palette_ram, 0x3FF
+ load_s8 palette_ram
+
+execute_load_vram_s8:
+ translate_region_vram patch_load_s8
+ load_s8 vram
+
+execute_load_oam_s8:
+ translate_region 7, patch_load_s8, oam_ram, 0x3FF
+ load_s8 oam_ram
+
+execute_load_gamepak8_s8:
+ translate_region_gamepak 8, patch_load_s8
+ load_s8 0
+
+execute_load_gamepak9_s8:
+ translate_region_gamepak 9, patch_load_s8
+ load_s8 0
+
+execute_load_gamepakA_s8:
+ translate_region_gamepak 10, patch_load_s8
+ load_s8 0
+
+execute_load_gamepakB_s8:
+ translate_region_gamepak 11, patch_load_s8
+ load_s8 0
+
+execute_load_gamepakC_s8:
+ translate_region_gamepak 12, patch_load_s8
+ load_s8 0
+
+execute_load_eeprom_s8:
+ eeprom_load patch_load_s8
+
+execute_load_backup_s8:
+ backup_load patch_load_s8
+ seb $2, $2 # sign extend result (delay)
+
+execute_load_open_s8:
+ open_load8 patch_load_s8
+ seb $2, $2 # sign extend result (delay)
+
+load_s8_ftable:
+ .long execute_load_bios_s8 # 0x00 BIOS
+ .long execute_load_open_s8 # 0x01 open address
+ .long execute_load_ewram_s8 # 0x02 EWRAM
+ .long execute_load_iwram_s8 # 0x03 IWRAM
+ .long execute_load_io_s8 # 0x04 I/O registers
+ .long execute_load_palette_s8 # 0x05 Palette RAM
+ .long execute_load_vram_s8 # 0x06 VRAM
+ .long execute_load_oam_s8 # 0x07 OAM RAM
+ .long execute_load_gamepak8_s8 # 0x08 gamepak
+ .long execute_load_gamepak9_s8 # 0x09 gamepak
+ .long execute_load_gamepakA_s8 # 0x0A gamepak
+ .long execute_load_gamepakB_s8 # 0x0B gamepak
+ .long execute_load_gamepakC_s8 # 0x0C gamepak
+ .long execute_load_eeprom_s8 # 0x0D gamepak/eeprom
+ .long execute_load_backup_s8 # 0x0E Flash ROM/SRAM
+ .long execute_load_open_s8 # 0x0F open address
+
+patch_load_s8:
+ patch_handler load_s8_ftable, 1
+
+
+
+# Unsigned aligned 16bit load handlers
+
+execute_load_bios_u16:
+ region_check_align 0, 1, 0, patch_load_u16
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_u16 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ins $2, $4, 0, 2 # bit 1 contributes
+ load_u16 bios_read_protect
+
+2:
+ open_load16_core
+ nop
+
+execute_load_ewram_u16:
+ translate_region_ewram_load_align 1, 0, patch_load_u16
+ load_u16 (ewram + 0x8000)
+
+execute_load_u16:
+execute_load_iwram_u16:
+ translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF
+ load_u16 (iwram + 0x8000)
+
+execute_load_io_u16:
+ translate_region_align 4, 1, 0, patch_load_u16, io_registers, 0x3FF
+ load_u16 io_registers
+
+execute_load_palette_u16:
+ translate_region_align 5, 1, 0, patch_load_u16, palette_ram, 0x3FF
+ load_u16 palette_ram
+
+execute_load_vram_u16:
+ translate_region_vram_load_align 1, 0, patch_load_u16
+ load_u16 vram
+
+execute_load_oam_u16:
+ translate_region_align 7, 1, 0, patch_load_u16, oam_ram, 0x3FF
+ load_u16 oam_ram
+
+execute_load_gamepak8_u16:
+ translate_region_gamepak_align 8, 1, 0, patch_load_u16
+ load_u16 0
+
+execute_load_gamepak9_u16:
+ translate_region_gamepak_align 9, 1, 0, patch_load_u16
+ load_u16 0
+
+execute_load_gamepakA_u16:
+ translate_region_gamepak_align 10, 1, 0, patch_load_u16
+ load_u16 0
+
+execute_load_gamepakB_u16:
+ translate_region_gamepak_align 11, 1, 0, patch_load_u16
+ load_u16 0
+
+execute_load_gamepakC_u16:
+ translate_region_gamepak_align 12, 1, 0, patch_load_u16
+ load_u16 0
+
+execute_load_eeprom_u16:
+ eeprom_load_align 1, 0, patch_load_u16
+
+execute_load_backup_u16:
+ backup_load_align 1, 0, patch_load_u16
+ nop
+
+execute_load_open_u16:
+ open_load16_align 1, 0, patch_load_u16
+ nop
+
+
+# Unsigned unaligned 16bit load handlers
+
+execute_load_bios_u16u:
+ region_check_align 0, 1, 1, patch_load_u16
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFE # generate offset
+ addu $2, $2, $4
+ load_u16_unaligned bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ext $1, $4, 1, 1
+ ins $2, $1, 1, 1 # bit 1 contributes
+ load_u16_unaligned bios_read_protect
+
+2:
+ open_load16_core
+ ror $2, $2, 8
+
+
+execute_load_ewram_u16u:
+ translate_region_ewram_load_align16 1, 1, patch_load_u16
+ load_u16_unaligned (ewram + 0x8000)
+
+execute_load_iwram_u16u:
+ translate_region_align 3, 1, 1, patch_load_u16, (iwram + 0x8000), 0x7FFE
+ load_u16_unaligned (iwram + 0x8000)
+
+execute_load_io_u16u:
+ translate_region_align 4, 1, 1, patch_load_u16, io_registers, 0x3FE
+ load_u16_unaligned io_registers
+
+execute_load_palette_u16u:
+ translate_region_align 5, 1, 1, patch_load_u16, palette_ram, 0x3FE
+ load_u16_unaligned palette_ram
+
+execute_load_vram_u16u:
+ translate_region_vram_load_align16 1, 1, patch_load_u16
+ load_u16_unaligned vram
+
+execute_load_oam_u16u:
+ translate_region_align 7, 1, 1, patch_load_u16, oam_ram, 0x3FE
+ load_u16_unaligned oam_ram
+
+execute_load_gamepak8_u16u:
+ translate_region_gamepak_align16 8, 1, 1, patch_load_u16
+ load_u16_unaligned 0
+
+execute_load_gamepak9_u16u:
+ translate_region_gamepak_align16 9, 1, 1, patch_load_u16
+ load_u16_unaligned 0
+
+execute_load_gamepakA_u16u:
+ translate_region_gamepak_align16 10, 1, 1, patch_load_u16
+ load_u16_unaligned 0
+
+execute_load_gamepakB_u16u:
+ translate_region_gamepak_align16 11, 1, 1, patch_load_u16
+ load_u16_unaligned 0
+
+execute_load_gamepakC_u16u:
+ translate_region_gamepak_align16 12, 1, 1, patch_load_u16
+ load_u16_unaligned 0
+
+execute_load_eeprom_u16u:
+ eeprom_load_align16 1, 1, patch_load_u16
+
+execute_load_backup_u16u:
+ backup_load_align16 1, 1, patch_load_u16
+ ror $2, $2, 8 # rotate value by 8bits
+
+execute_load_open_u16u:
+ open_load16_align16 1, 1, patch_load_u16
+ ror $2, $2, 8 # rotate value by 8bits
+
+load_u16_ftable:
+# .long execute_load_full_u16
+ .long execute_load_bios_u16 # 0x00 BIOS
+ .long execute_load_open_u16 # 0x01 open address
+ .long execute_load_ewram_u16 # 0x02 EWRAM
+ .long execute_load_iwram_u16 # 0x03 IWRAM
+ .long execute_load_io_u16 # 0x04 I/O registers
+ .long execute_load_palette_u16 # 0x05 Palette RAM
+ .long execute_load_vram_u16 # 0x06 VRAM
+ .long execute_load_oam_u16 # 0x07 OAM RAM
+ .long execute_load_gamepak8_u16 # 0x08 gamepak
+ .long execute_load_gamepak9_u16 # 0x09 gamepak
+ .long execute_load_gamepakA_u16 # 0x0A gamepak
+ .long execute_load_gamepakB_u16 # 0x0B gamepak
+ .long execute_load_gamepakC_u16 # 0x0C gamepak
+
+ .long execute_load_eeprom_u16 # 0x0D gamepak/eeprom
+ .long execute_load_backup_u16 # 0x0E Flash ROM/SRAM
+ .long execute_load_open_u16 # 0x0F open
+
+ .long execute_load_bios_u16u # 0x00 BIOS unaligned
+ .long execute_load_open_u16u # 0x01 open address unaligned
+ .long execute_load_ewram_u16u # 0x02 EWRAM unaligned
+ .long execute_load_iwram_u16u # 0x03 IWRAM unaligned
+ .long execute_load_io_u16u # 0x04 I/O registers unaligned
+ .long execute_load_palette_u16u # 0x05 Palette RAM unaligned
+ .long execute_load_vram_u16u # 0x06 VRAM unaligned
+ .long execute_load_oam_u16u # 0x07 OAM RAM unaligned
+ .long execute_load_gamepak8_u16u# 0x08 gamepak unaligned
+ .long execute_load_gamepak9_u16u# 0x09 gamepak unaligned
+ .long execute_load_gamepakA_u16u# 0x0A gamepak unaligned
+ .long execute_load_gamepakB_u16u# 0x0B gamepak unaligned
+ .long execute_load_gamepakC_u16u# 0x0C gamepak unaligned
+ .long execute_load_eeprom_u16u # 0x0D gamepak/eeprom unaligned
+ .long execute_load_backup_u16u # 0x0E Flash ROM/SRAM unaligned
+ .long execute_load_open_u16u # 0x0F open unaligned
+
+
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+ .long execute_load_full_u16
+
+
+
+patch_load_u16:
+ patch_handler_align load_u16_ftable, 1
+
+# Signed aligned 16bit load handlers
+
+execute_load_bios_s16:
+ region_check_align 0, 1, 0, patch_load_s16
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_s16 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ins $2, $4, 0, 2 # bit 1 contributes
+ load_s16 bios_read_protect
+
+2:
+ open_load16_core
+ seh $2, $2
+
+
+execute_load_ewram_s16:
+ translate_region_ewram_load_align 1, 0, patch_load_s16
+ load_s16 (ewram + 0x8000)
+
+execute_load_s16:
+execute_load_iwram_s16:
+ translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF
+ load_s16 (iwram + 0x8000)
+
+execute_load_io_s16:
+ translate_region_align 4, 1, 0, patch_load_s16, io_registers, 0x3FF
+ load_s16 io_registers
+
+execute_load_palette_s16:
+ translate_region_align 5, 1, 0, patch_load_s16, palette_ram, 0x3FF
+ load_s16 palette_ram
+
+execute_load_vram_s16:
+ translate_region_vram_load_align 1, 0, patch_load_s16
+ load_s16 vram
+
+execute_load_oam_s16:
+ translate_region_align 7, 1, 0, patch_load_s16, oam_ram, 0x3FF
+ load_s16 oam_ram
+
+execute_load_gamepak8_s16:
+ translate_region_gamepak_align 8, 1, 0, patch_load_s16
+ load_s16 0
+
+execute_load_gamepak9_s16:
+ translate_region_gamepak_align 9, 1, 0, patch_load_s16
+ load_s16 0
+
+execute_load_gamepakA_s16:
+ translate_region_gamepak_align 10, 1, 0, patch_load_s16
+ load_s16 0
+
+execute_load_gamepakB_s16:
+ translate_region_gamepak_align 11, 1, 0, patch_load_s16
+ load_s16 0
+
+execute_load_gamepakC_s16:
+ translate_region_gamepak_align 12, 1, 0, patch_load_s16
+ load_s16 0
+
+execute_load_eeprom_s16:
+ eeprom_load_align 1, 0, patch_load_s16
+
+execute_load_backup_s16:
+ backup_load_align 1, 0, patch_load_s16
+ nop
+
+execute_load_open_s16:
+ open_load16_align 1, 0, patch_load_s16
+ nop
+
+
+# Signed unaligned 16bit load handlers
+
+execute_load_bios_s16u:
+ region_check_align 0, 1, 1, patch_load_s16
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFE # generate offset
+ addu $2, $1, $4
+ load_s16_unaligned bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ ext $1, $4, 1, 1
+ ins $2, $1, 1, 1 # bit 1 contributes
+ load_s16_unaligned bios_read_protect
+
+2:
+ open_load16_core
+ seb $2, $2
+
+execute_load_ewram_s16u:
+ translate_region_ewram_load_align16 1, 1, patch_load_s16
+ load_s16_unaligned (ewram + 0x8000)
+
+execute_load_iwram_s16u:
+ translate_region_align 3, 1, 1, patch_load_s16, (iwram + 0x8000), 0x7FFE
+ load_s16_unaligned (iwram + 0x8000)
+
+execute_load_io_s16u:
+ translate_region_align 4, 1, 1, patch_load_s16, io_registers, 0x3FE
+ load_s16_unaligned io_registers
+
+execute_load_palette_s16u:
+ translate_region_align 5, 1, 1, patch_load_s16, palette_ram, 0x3FE
+ load_s16_unaligned palette_ram
+
+execute_load_vram_s16u:
+ translate_region_vram_load_align16 1, 1, patch_load_s16
+ load_s16_unaligned vram
+
+execute_load_oam_s16u:
+ translate_region_align 7, 1, 1, patch_load_s16, oam_ram, 0x3FE
+ load_s16_unaligned oam_ram
+
+execute_load_gamepak8_s16u:
+ translate_region_gamepak_align16 8, 1, 1, patch_load_s16
+ load_s16_unaligned 0
+
+execute_load_gamepak9_s16u:
+ translate_region_gamepak_align16 9, 1, 1, patch_load_s16
+ load_s16_unaligned 0
+
+execute_load_gamepakA_s16u:
+ translate_region_gamepak_align16 10, 1, 1, patch_load_s16
+ load_s16_unaligned 0
+
+execute_load_gamepakB_s16u:
+ translate_region_gamepak_align16 11, 1, 1, patch_load_s16
+ load_s16_unaligned 0
+
+execute_load_gamepakC_s16u:
+ translate_region_gamepak_align16 12, 1, 1, patch_load_s16
+ load_s16_unaligned 0
+
+execute_load_eeprom_s16u:
+ eeprom_load_align 1, 1, patch_load_s16
+
+execute_load_backup_s16u:
+ backup_load_align 1, 1, patch_load_s16
+ seb $2, $2 # sign extend result from 8bits
+
+execute_load_open_s16u:
+ open_load16_align 1, 1, patch_load_s16
+ seb $2, $2 # sign extend result from 8bits
+
+load_s16_ftable:
+ .long execute_load_bios_s16 # 0x00 BIOS
+ .long execute_load_open_s16 # 0x01 open address
+ .long execute_load_ewram_s16 # 0x02 EWRAM
+ .long execute_load_iwram_s16 # 0x03 IWRAM
+ .long execute_load_io_s16 # 0x04 I/O registers
+ .long execute_load_palette_s16 # 0x05 Palette RAM
+ .long execute_load_vram_s16 # 0x06 VRAM
+ .long execute_load_oam_s16 # 0x07 OAM RAM
+ .long execute_load_gamepak8_s16 # 0x08 gamepak
+ .long execute_load_gamepak9_s16 # 0x09 gamepak
+ .long execute_load_gamepakA_s16 # 0x0A gamepak
+ .long execute_load_gamepakB_s16 # 0x0B gamepak
+ .long execute_load_gamepakC_s16 # 0x0C gamepak
+ .long execute_load_eeprom_s16 # 0x0D gamepak/eeprom
+ .long execute_load_backup_s16 # 0x0E Flash ROM/SRAM
+ .long execute_load_open_s16 # 0x0F open unaligned
+
+ .long execute_load_bios_s16u # 0x00 BIOS unaligned
+ .long execute_load_open_s16u # 0x01 open address unaligned
+ .long execute_load_ewram_s16u # 0x02 EWRAM unaligned
+ .long execute_load_iwram_s16u # 0x03 IWRAM unaligned
+ .long execute_load_io_s16u # 0x04 I/O registers unaligned
+ .long execute_load_palette_s16u # 0x05 Palette RAM unaligned
+ .long execute_load_vram_s16u # 0x06 VRAM unaligned
+ .long execute_load_oam_s16u # 0x07 OAM RAM unaligned
+ .long execute_load_gamepak8_s16u# 0x08 gamepak unaligned
+ .long execute_load_gamepak9_s16u# 0x09 gamepak unaligned
+ .long execute_load_gamepakA_s16u# 0x0A gamepak unaligned
+ .long execute_load_gamepakB_s16u# 0x0B gamepak unaligned
+ .long execute_load_gamepakC_s16u# 0x0C gamepak unaligned
+ .long execute_load_eeprom_s16u # 0x0D gamepak/eeprom unaligned
+ .long execute_load_backup_s16u # 0x0E Flash ROM/SRAM unaligned
+ .long execute_load_open_s16u # 0x0F open unaligned
+
+patch_load_s16:
+ patch_handler_align load_s16_ftable, 1
+
+
+
+# Unsigned aligned 32bit load handlers
+
+execute_load_bios_u32:
+ region_check_align 0, 2, 0, patch_load_u32
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_u32 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ load_u32 bios_read_protect
+
+2:
+ open_load32_core
+ nop
+
+
+execute_load_ewram_u32:
+ translate_region_ewram_load_align 2, 0, patch_load_u32
+ load_u32 (ewram + 0x8000)
+
+execute_load_u32:
+execute_load_iwram_u32:
+ translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF
+ load_u32 (iwram + 0x8000)
+
+execute_load_io_u32:
+ translate_region_align 4, 2, 0, patch_load_u32, io_registers, 0x3FF
+ load_u32 io_registers
+
+execute_load_palette_u32:
+ translate_region_align 5, 2, 0, patch_load_u32, palette_ram, 0x3FF
+ load_u32 palette_ram
+
+execute_load_vram_u32:
+ translate_region_vram_load_align 2, 0, patch_load_u32
+ load_u32 vram
+
+execute_load_oam_u32:
+ translate_region_align 7, 2, 0, patch_load_u32, oam_ram, 0x3FF
+ load_u32 oam_ram
+
+execute_load_gamepak8_u32:
+ translate_region_gamepak_align 8, 2, 0, patch_load_u32
+ load_u32 0
+
+execute_load_gamepak9_u32:
+ translate_region_gamepak_align 9, 2, 0, patch_load_u32
+ load_u32 0
+
+execute_load_gamepakA_u32:
+ translate_region_gamepak_align 10, 2, 0, patch_load_u32
+ load_u32 0
+
+execute_load_gamepakB_u32:
+ translate_region_gamepak_align 11, 2, 0, patch_load_u32
+ load_u32 0
+
+execute_load_gamepakC_u32:
+ translate_region_gamepak_align 12, 2, 0, patch_load_u32
+ load_u32 0
+
+execute_load_eeprom_u32:
+ eeprom_load_align 2, 0, patch_load_u32
+
+execute_load_backup_u32:
+ backup_load_align 2, 0, patch_load_u32
+ nop
+
+execute_load_open_u32:
+ open_load32_align 2, 0, patch_load_u32
+ nop
+
+
+# Unsigned unaligned (by 1) 32bit load handlers
+
+execute_load_bios_u32u1:
+ region_check_align 0, 2, 1, patch_load_u32
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFC # generate offset
+ addu $2, $2, $4
+ load_u32_unaligned bios_rom, 1
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ load_u32_unaligned bios_read_protect, 1
+
+2:
+ open_load32_core
+ ror $2, $2, 8
+
+execute_load_ewram_u32u1:
+ translate_region_ewram_load_align32 2, 1, patch_load_u32
+ load_u32_unaligned (ewram + 0x8000), 1
+
+execute_load_iwram_u32u1:
+ translate_region_align 3, 2, 1, patch_load_u32, (iwram + 0x8000), 0x7FFC
+ load_u32_unaligned (iwram + 0x8000), 1
+
+execute_load_io_u32u1:
+ translate_region_align 4, 2, 1, patch_load_u32, io_registers, 0x3FC
+ load_u32_unaligned io_registers, 1
+
+execute_load_palette_u32u1:
+ translate_region_align 5, 2, 1, patch_load_u32, palette_ram, 0x3FC
+ load_u32_unaligned palette_ram, 1
+
+execute_load_vram_u32u1:
+ translate_region_vram_load_align32 2, 1, patch_load_u32
+ load_u32_unaligned vram, 1
+
+execute_load_oam_u32u1:
+ translate_region_align 7, 2, 1, patch_load_u32, oam_ram, 0x3FC
+ load_u32_unaligned oam_ram, 1
+
+execute_load_gamepak8_u32u1:
+ translate_region_gamepak_align32 8, 2, 1, patch_load_u32
+ load_u32_unaligned 0, 1
+
+execute_load_gamepak9_u32u1:
+ translate_region_gamepak_align32 9, 2, 1, patch_load_u32
+ load_u32_unaligned 0, 1
+
+execute_load_gamepakA_u32u1:
+ translate_region_gamepak_align32 10, 2, 1, patch_load_u32
+ load_u32_unaligned 0, 1
+
+execute_load_gamepakB_u32u1:
+ translate_region_gamepak_align32 11, 2, 1, patch_load_u32
+ load_u32_unaligned 0, 1
+
+execute_load_gamepakC_u32u1:
+ translate_region_gamepak_align32 12, 2, 1, patch_load_u32
+ load_u32_unaligned 0, 1
+
+execute_load_eeprom_u32u1:
+ eeprom_load_align32 2, 1, patch_load_u32
+
+execute_load_backup_u32u1:
+ backup_load_align32 2, 1, patch_load_u32
+ ror $2, $2, 8 # rotate value by 8bits
+
+execute_load_open_u32u1:
+ open_load32_align32 2, 1, patch_load_u32
+ ror $2, $2, 8 # rotate value by 8bits
+
+
+# Unsigned unaligned (by 2) 32bit load handlers
+
+execute_load_bios_u32u2:
+ region_check_align 0, 2, 2, patch_load_u32
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFC # generate offset
+ addu $2, $2, $4
+ load_u32_unaligned bios_rom, 2
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ load_u32_unaligned bios_read_protect, 2
+
+2:
+ open_load32_core
+ ror $2, $2, 16
+
+execute_load_ewram_u32u2:
+ translate_region_ewram_load_align32 2, 2, patch_load_u32
+ load_u32_unaligned (ewram + 0x8000), 2
+
+execute_load_iwram_u32u2:
+ translate_region_align 3, 2, 2, patch_load_u32, (iwram + 0x8000), 0x7FFC
+ load_u32_unaligned (iwram + 0x8000), 2
+
+execute_load_io_u32u2:
+ translate_region_align 4, 2, 2, patch_load_u32, io_registers, 0x3FC
+ load_u32_unaligned io_registers, 2
+
+execute_load_palette_u32u2:
+ translate_region_align 5, 2, 2, patch_load_u32, palette_ram, 0x3FC
+ load_u32_unaligned palette_ram, 2
+
+execute_load_vram_u32u2:
+ translate_region_vram_load_align32 2, 2, patch_load_u32
+ load_u32_unaligned vram, 2
+
+execute_load_oam_u32u2:
+ translate_region_align 7, 2, 2, patch_load_u32, oam_ram, 0x3FC
+ load_u32_unaligned oam_ram, 2
+
+execute_load_gamepak8_u32u2:
+ translate_region_gamepak_align32 8, 2, 2, patch_load_u32
+ load_u32_unaligned 0, 2
+
+execute_load_gamepak9_u32u2:
+ translate_region_gamepak_align32 9, 2, 2, patch_load_u32
+ load_u32_unaligned 0, 2
+
+execute_load_gamepakA_u32u2:
+ translate_region_gamepak_align32 10, 2, 2, patch_load_u32
+ load_u32_unaligned 0, 2
+
+execute_load_gamepakB_u32u2:
+ translate_region_gamepak_align32 11, 2, 2, patch_load_u32
+ load_u32_unaligned 0, 2
+
+execute_load_gamepakC_u32u2:
+ translate_region_gamepak_align32 12, 2, 2, patch_load_u32
+ load_u32_unaligned 0, 2
+
+execute_load_eeprom_u32u2:
+ eeprom_load_align32 2, 2, patch_load_u32
+
+execute_load_backup_u32u2:
+ backup_load_align32 2, 2, patch_load_u32
+ ror $2, $2, 16 # rotate value by 16bits
+
+execute_load_open_u32u2:
+ open_load32_align32 2, 2, patch_load_u32
+ ror $2, $2, 16 # rotate value by 16bits
+
+# Unsigned unaligned (by 1) 32bit load handlers
+
+execute_load_bios_u32u3:
+ region_check_align 0, 2, 3, patch_load_u32
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFC # generate offset
+ addu $2, $2, $4
+ load_u32_unaligned bios_rom, 3
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ load_u32_unaligned bios_read_protect, 3
+
+2:
+ open_load32_core
+ ror $2, $2, 24
+
+execute_load_ewram_u32u3:
+ translate_region_ewram_load_align32 2, 3, patch_load_u32
+ load_u32_unaligned (ewram + 0x8000), 3
+
+execute_load_iwram_u32u3:
+ translate_region_align 3, 2, 3, patch_load_u32, (iwram + 0x8000), 0x7FFC
+ load_u32_unaligned (iwram + 0x8000), 3
+
+execute_load_io_u32u3:
+ translate_region_align 4, 2, 3, patch_load_u32, io_registers, 0x3FC
+ load_u32_unaligned io_registers, 3
+
+execute_load_palette_u32u3:
+ translate_region_align 5, 2, 3, patch_load_u32, palette_ram, 0x3FC
+ load_u32_unaligned palette_ram, 3
+
+execute_load_vram_u32u3:
+ translate_region_vram_load_align32 2, 3, patch_load_u32
+ load_u32_unaligned vram, 3
+
+execute_load_oam_u32u3:
+ translate_region_align 7, 2, 3, patch_load_u32, oam_ram, 0x3FC
+ load_u32_unaligned oam_ram, 3
+
+execute_load_gamepak8_u32u3:
+ translate_region_gamepak_align32 8, 2, 3, patch_load_u32
+ load_u32_unaligned 0, 3
+
+execute_load_gamepak9_u32u3:
+ translate_region_gamepak_align32 9, 2, 3, patch_load_u32
+ load_u32_unaligned 0, 3
+
+execute_load_gamepakA_u32u3:
+ translate_region_gamepak_align32 10, 2, 3, patch_load_u32
+ load_u32_unaligned 0, 3
+
+execute_load_gamepakB_u32u3:
+ translate_region_gamepak_align32 11, 2, 3, patch_load_u32
+ load_u32_unaligned 0, 3
+
+execute_load_gamepakC_u32u3:
+ translate_region_gamepak_align32 12, 2, 3, patch_load_u32
+ load_u32_unaligned 0, 3
+
+execute_load_eeprom_u32u3:
+ eeprom_load_align32 2, 3, patch_load_u32
+
+execute_load_backup_u32u3:
+ backup_load_align32 2, 3, patch_load_u32
+ ror $2, $2, 24 # rotate value by 24bits
+
+execute_load_open_u32u3:
+ open_load32_align32 2, 3, patch_load_u32
+ ror $2, $2, 24 # rotate value by 24bits
+
+
+load_u32_ftable:
+ .long execute_load_bios_u32 # 0x00 BIOS
+ .long execute_load_open_u32 # 0x01 open address
+ .long execute_load_ewram_u32 # 0x02 EWRAM
+ .long execute_load_iwram_u32 # 0x03 IWRAM
+ .long execute_load_io_u32 # 0x04 I/O registers
+ .long execute_load_palette_u32 # 0x05 Palette RAM
+ .long execute_load_vram_u32 # 0x06 VRAM
+ .long execute_load_oam_u32 # 0x07 OAM RAM
+ .long execute_load_gamepak8_u32 # 0x08 gamepak
+ .long execute_load_gamepak9_u32 # 0x09 gamepak
+ .long execute_load_gamepakA_u32 # 0x0A gamepak
+ .long execute_load_gamepakB_u32 # 0x0B gamepak
+ .long execute_load_gamepakC_u32 # 0x0C gamepak
+
+ .long execute_load_eeprom_u32 # 0x0D gamepak/eeprom
+ .long execute_load_backup_u32 # 0x0E Flash ROM/SRAM
+ .long execute_load_open_u32 # 0x0F open
+
+ .long execute_load_bios_u32u1 # 0x00 BIOS unaligned (1b)
+ .long execute_load_open_u32u1 # 0x01 open address unaligned (1b)
+ .long execute_load_ewram_u32u1 # 0x02 EWRAM unaligned (1b)
+ .long execute_load_iwram_u32u1 # 0x03 IWRAM unaligned (1b)
+ .long execute_load_io_u32u1 # 0x04 I/O registers unaligned (1b)
+ .long execute_load_palette_u32u1 # 0x05 Palette RAM unaligned (1b)
+ .long execute_load_vram_u32u1 # 0x06 VRAM unaligned (1b)
+ .long execute_load_oam_u32u1 # 0x07 OAM RAM unaligned (1b)
+ .long execute_load_gamepak8_u32u1 # 0x08 gamepak unaligned (1b)
+ .long execute_load_gamepak9_u32u1 # 0x09 gamepak unaligned (1b)
+ .long execute_load_gamepakA_u32u1 # 0x0A gamepak unaligned (1b)
+ .long execute_load_gamepakB_u32u1 # 0x0B gamepak unaligned (1b)
+ .long execute_load_gamepakC_u32u1 # 0x0C gamepak unaligned (1b)
+ .long execute_load_eeprom_u32u1 # 0x0D gamepak/eeprom unaligned (1b)
+ .long execute_load_backup_u32u1 # 0x0E Flash ROM/SRAM unaligned (1b)
+ .long execute_load_open_u32u1 # 0x0F open unaligned (1b)
+
+ .long execute_load_bios_u32u2 # 0x00 BIOS unaligned (2b)
+ .long execute_load_open_u32u2 # 0x01 open address unaligned (2b)
+ .long execute_load_ewram_u32u2 # 0x02 EWRAM unaligned (2b)
+ .long execute_load_iwram_u32u2 # 0x03 IWRAM unaligned (2b)
+ .long execute_load_io_u32u2 # 0x04 I/O registers unaligned (2b)
+ .long execute_load_palette_u32u2 # 0x05 Palette RAM unaligned (2b)
+ .long execute_load_vram_u32u2 # 0x06 VRAM unaligned (2b)
+ .long execute_load_oam_u32u2 # 0x07 OAM RAM unaligned (2b)
+ .long execute_load_gamepak8_u32u2 # 0x08 gamepak unaligned (2b)
+ .long execute_load_gamepak9_u32u2 # 0x09 gamepak unaligned (2b)
+ .long execute_load_gamepakA_u32u2 # 0x0A gamepak unaligned (2b)
+ .long execute_load_gamepakB_u32u2 # 0x0B gamepak unaligned (2b)
+ .long execute_load_gamepakC_u32u2 # 0x0C gamepak unaligned (2b)
+ .long execute_load_eeprom_u32u2 # 0x0D gamepak/eeprom unaligned (2b)
+ .long execute_load_backup_u32u2 # 0x0E Flash ROM/SRAM unaligned (2b)
+ .long execute_load_open_u32u2 # 0x0F open unaligned (2b)
+
+ .long execute_load_bios_u32u3 # 0x00 BIOS unaligned (3b)
+ .long execute_load_open_u32u3 # 0x01 open address unaligned (3b)
+ .long execute_load_ewram_u32u3 # 0x02 EWRAM unaligned (3b)
+ .long execute_load_iwram_u32u3 # 0x03 IWRAM unaligned (3b)
+ .long execute_load_io_u32u3 # 0x04 I/O registers unaligned (3b)
+ .long execute_load_palette_u32u3 # 0x05 Palette RAM unaligned (3b)
+ .long execute_load_vram_u32u3 # 0x06 VRAM unaligned (3b)
+ .long execute_load_oam_u32u3 # 0x07 OAM RAM unaligned (3b)
+ .long execute_load_gamepak8_u32u3 # 0x08 gamepak unaligned (3b)
+ .long execute_load_gamepak9_u32u3 # 0x09 gamepak unaligned (3b)
+ .long execute_load_gamepakA_u32u3 # 0x0A gamepak unaligned (3b)
+ .long execute_load_gamepakB_u32u3 # 0x0B gamepak unaligned (3b)
+ .long execute_load_gamepakC_u32u3 # 0x0C gamepak unaligned (3b)
+ .long execute_load_eeprom_u32u3 # 0x0D gamepak/eeprom unaligned (3b)
+ .long execute_load_backup_u32u3 # 0x0E Flash ROM/SRAM unaligned (3b)
+ .long execute_load_open_u32u3 # 0x0F open unaligned (3b)
+
+patch_load_u32:
+ patch_handler_align load_u32_ftable, 2
+
+
+
+# Unsigned always aligned 32bit load handlers
+
+execute_load_bios_u32a:
+ region_check 0, patch_load_u32a
+ srl $2, $4, 14 # check if address is in BIOS region
+ bne $2, $0, 2f # if not, perform open read
+ srl $1, $5, 14 # check if PC is in BIOS region
+ bne $1, $0, 1f # if not, perform BIOS protected read
+ lui $2, %hi(bios_rom) # generate upper address (delay)
+
+ andi $4, $4, 0x3FFF # generate offset
+ addu $2, $2, $4
+ load_u32 bios_rom
+
+1:
+ lui $2, %hi(bios_read_protect) # generate upper address
+ load_u32 bios_read_protect
+
+2:
+ open_load32_a
+ nop
+
+execute_load_ewram_u32a:
+ translate_region_ewram patch_load_u32a
+ load_u32 (ewram + 0x8000)
+
+execute_aligned_load32:
+execute_load_iwram_u32a:
+ translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF
+ load_u32 (iwram + 0x8000)
+
+execute_load_io_u32a:
+ translate_region 4, patch_load_u32a, io_registers, 0x3FF
+ load_u32 io_registers
+
+execute_load_palette_u32a:
+ translate_region 5, patch_load_u32a, palette_ram, 0x3FF
+ load_u32 palette_ram
+
+execute_load_vram_u32a:
+ translate_region_vram patch_load_u32a
+ load_u32 vram
+
+execute_load_oam_u32a:
+ translate_region 7, patch_load_u32a, oam_ram, 0x3FF
+ load_u32 oam_ram
+
+execute_load_gamepak8_u32a:
+ translate_region_gamepak_a 8, patch_load_u32a
+ load_u32 0
+
+execute_load_gamepak9_u32a:
+ translate_region_gamepak_a 9, patch_load_u32a
+ load_u32 0
+
+execute_load_gamepakA_u32a:
+ translate_region_gamepak_a 10, patch_load_u32a
+ load_u32 0
+
+execute_load_gamepakB_u32a:
+ translate_region_gamepak_a 11, patch_load_u32a
+ load_u32 0
+
+execute_load_gamepakC_u32a:
+ translate_region_gamepak_a 12, patch_load_u32a
+ load_u32 0
+
+execute_load_eeprom_u32a:
+ eeprom_load_a patch_load_u32a
+
+execute_load_backup_u32a:
+ backup_load_a patch_load_u32a
+ nop
+
+execute_load_open_u32a:
+ open_load32_a patch_load_u32a
+
+load_u32a_ftable:
+ .long execute_load_bios_u32a # 0x00 BIOS unaligned (3b)
+ .long execute_load_open_u32a # 0x01 open address unaligned (3b)
+ .long execute_load_ewram_u32a # 0x02 EWRAM unaligned (3b)
+ .long execute_load_iwram_u32a # 0x03 IWRAM unaligned (3b)
+ .long execute_load_io_u32a # 0x04 I/O registers unaligned (3b)
+ .long execute_load_palette_u32a # 0x05 Palette RAM unaligned (3b)
+ .long execute_load_vram_u32a # 0x06 VRAM unaligned (3b)
+ .long execute_load_oam_u32a # 0x07 OAM RAM unaligned (3b)
+ .long execute_load_gamepak8_u32a # 0x08 gamepak unaligned (3b)
+ .long execute_load_gamepak9_u32a # 0x09 gamepak unaligned (3b)
+ .long execute_load_gamepakA_u32a # 0x0A gamepak unaligned (3b)
+ .long execute_load_gamepakB_u32a # 0x0B gamepak unaligned (3b)
+ .long execute_load_gamepakC_u32a # 0x0C gamepak unaligned (3b)
+ .long execute_load_eeprom_u32a # 0x0D gamepak/eeprom unaligned (3b)
+ .long execute_load_backup_u32a # 0x0E Flash ROM/SRAM unaligned (3b)
+ .long execute_load_open_u32a # 0x0F open unaligned (3b)
+
+patch_load_u32a:
+ patch_handler load_u32a_ftable, 1
+
+
+# Unsigned 8bit store handlers
+
+execute_store_ignore0_u8:
+ ignore_region 0, patch_store_u8
+
+execute_store_ignore1_u8:
+ ignore_region 1, patch_store_u8
+
+execute_store_ewram_u8:
+ translate_region_ewram patch_store_u8
+ store_u8_smc (ewram + 0x8000)
+
+execute_store_u8:
+execute_store_iwram_u8:
+ translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF
+ store_u8_smc (iwram + 0x8000)
+
+execute_store_io_u8:
+ region_check 4, patch_store_u8
+ andi $5, $5, 0xFF # make value 8bit
+ andi $4, $4, 0x3FF # wrap around address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+
+ save_registers
+ jal write_io_register8 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+execute_store_palette_u8:
+ region_check 5, patch_store_u8
+ lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
+ ins $5, $5, 8, 8 # double value
+ andi $4, $4, 0x3FE # align palette address
+ addu $2, $2, $4
+ sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+ lui $2, %hi(palette_ram_converted)
+ addu $2, $2, $4
+ jr $ra # return
+ sh $1, %lo(palette_ram_converted)($2)
+
+execute_store_vram_u8:
+ translate_region_vram_store_align16 patch_store_u8
+ store_u8_double vram
+
+execute_store_oam_u8:
+ translate_region 7, patch_store_u8, oam_ram, 0x3FE
+ lui $1, %hi(oam_update) # write non-zero to oam_update
+ sw $1, %lo(oam_update)($1) # cheap, but this is non-zero
+ store_u8_double oam_ram
+
+execute_store_ignore8_u8:
+ ignore_region 8, patch_store_u8
+
+execute_store_ignore9_u8:
+ ignore_region 9, patch_store_u8
+
+execute_store_ignoreA_u8:
+ ignore_region 10, patch_store_u8
+
+execute_store_ignoreB_u8:
+ ignore_region 11, patch_store_u8
+
+execute_store_ignoreC_u8:
+ ignore_region 12, patch_store_u8
+
+execute_store_eeprom_u8:
+ store_function write_eeprom, 13, patch_store_u8, 0x3FF
+
+execute_store_backup_u8:
+ store_function write_backup, 14, patch_store_u8, 0xFFFF
+
+execute_store_ignoreF_u8:
+ ignore_high patch_store_u8
+
+store_u8_ftable:
+ .long execute_store_ignore0_u8 # 0x00 BIOS
+ .long execute_store_ignore1_u8 # 0x01 open address
+ .long execute_store_ewram_u8 # 0x02 EWRAM
+ .long execute_store_iwram_u8 # 0x03 IWRAM
+ .long execute_store_io_u8 # 0x04 I/O registers
+ .long execute_store_palette_u8 # 0x05 Palette RAM
+ .long execute_store_vram_u8 # 0x06 VRAM
+ .long execute_store_oam_u8 # 0x07 OAM RAM
+ .long execute_store_ignore8_u8 # 0x08 gamepak
+ .long execute_store_ignore9_u8 # 0x09 gamepak
+ .long execute_store_ignoreA_u8 # 0x0A gamepak
+ .long execute_store_ignoreB_u8 # 0x0B gamepak
+ .long execute_store_ignoreC_u8 # 0x0C gamepak
+ .long execute_store_eeprom_u8 # 0x0D gamepak/eeprom
+ .long execute_store_backup_u8 # 0x0E Flash ROM/SRAM
+ .long execute_store_ignoreF_u8 # 0x0F open address
+
+patch_store_u8:
+ patch_handler store_u8_ftable, 0x0F
+
+
+# Unsigned 16bit store handlers
+
+execute_store_ignore0_u16:
+ ignore_region 0, patch_store_u16
+
+execute_store_ignore1_u16:
+ ignore_region 1, patch_store_u16
+
+execute_store_ewram_u16:
+ translate_region_ewram_store_align16 patch_store_u16
+ store_u16_smc (ewram + 0x8000)
+
+execute_store_u16:
+execute_store_iwram_u16:
+ translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE
+ store_u16_smc (iwram + 0x8000)
+
+execute_store_io_u16:
+ region_check 4, patch_store_u16
+ andi $5, $5, 0xFFFF # make value 16bit
+ andi $4, $4, 0x3FE # wrap around/align address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+
+ save_registers
+ jal write_io_register16 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+execute_store_palette_u16:
+ region_check 5, patch_store_u16
+ lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
+ andi $4, $4, 0x3FE # wrap/align palette address
+ addu $2, $2, $4
+ sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+ lui $2, %hi(palette_ram_converted)
+ addu $2, $2, $4
+ jr $ra # return
+ sh $1, %lo(palette_ram_converted)($2)
+
+execute_store_vram_u16:
+ translate_region_vram_store_align16 patch_store_u16
+ store_u16 vram
+
+execute_store_oam_u16:
+ translate_region 7, patch_store_u16, oam_ram, 0x3FE
+ lui $1, %hi(oam_update) # write non-zero to oam_update
+ sw $1, %lo(oam_update)($1) # cheap, but this is non-zero
+ store_u16 oam_ram
+
+execute_store_rtc_u16:
+ store_function write_rtc, 8, patch_store_u16, 0xFE
+
+execute_store_ignore9_u16:
+ ignore_region 9, patch_store_u16
+
+execute_store_ignoreA_u16:
+ ignore_region 10, patch_store_u16
+
+execute_store_ignoreB_u16:
+ ignore_region 11, patch_store_u16
+
+execute_store_ignoreC_u16:
+ ignore_region 12, patch_store_u16
+
+execute_store_eeprom_u16:
+ store_function write_eeprom, 13, patch_store_u16, 0x3FE
+
+execute_store_ignoreE_u16:
+ ignore_region 14, patch_store_u16
+
+execute_store_ignoreF_u16:
+ ignore_high patch_store_u16
+
+store_u16_ftable:
+ .long execute_store_ignore0_u16 # 0x00 BIOS
+ .long execute_store_ignore1_u16 # 0x01 open address
+ .long execute_store_ewram_u16 # 0x02 EWRAM
+ .long execute_store_iwram_u16 # 0x03 IWRAM
+ .long execute_store_io_u16 # 0x04 I/O registers
+ .long execute_store_palette_u16 # 0x05 Palette RAM
+ .long execute_store_vram_u16 # 0x06 VRAM
+ .long execute_store_oam_u16 # 0x07 OAM RAM
+ .long execute_store_rtc_u16 # 0x08 gamepak
+ .long execute_store_ignore9_u16 # 0x09 gamepak
+ .long execute_store_ignoreA_u16 # 0x0A gamepak
+ .long execute_store_ignoreB_u16 # 0x0B gamepak
+ .long execute_store_ignoreC_u16 # 0x0C gamepak
+ .long execute_store_eeprom_u16 # 0x0D gamepak/eeprom
+ .long execute_store_ignoreE_u16 # 0x0E Flash ROM/SRAM
+ .long execute_store_ignoreF_u16 # 0x0F open address
+
+
+patch_store_u16:
+ patch_handler store_u16_ftable, 0x0F
+
+
+
+
+# Unsigned 32bit store handlers
+
+execute_store_ignore0_u32:
+ ignore_region 0, patch_store_u32
+
+execute_store_ignore1_u32:
+ ignore_region 1, patch_store_u32
+
+execute_store_ewram_u32:
+ translate_region_ewram_store_align32 patch_store_u32
+ store_u32_smc (ewram + 0x8000)
+
+execute_store_u32:
+execute_store_iwram_u32:
+ translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC
+ store_u32_smc (iwram + 0x8000)
+
+execute_store_io_u32:
+ region_check 4, patch_store_u32
+ nop
+ andi $4, $4, 0x3FC # wrap around/align address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+
+ save_registers
+ jal write_io_register32 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+execute_store_palette_u32:
+ region_check 5, patch_store_u32
+ lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
+ andi $4, $4, 0x3FC # wrap/align palette address
+ addu $2, $2, $4
+ sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
+
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+ lui $2, %hi(palette_ram_converted)
+ addu $2, $2, $4
+ addiu $2, $2, %lo(palette_ram_converted)
+ sh $1, ($2)
+
+ srl $5, $5, 16 # shift down to next palette value
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+
+ jr $ra # return
+ sh $1, 2($2)
+
+execute_store_vram_u32:
+ translate_region_vram_store_align32 patch_store_u32
+ store_u32 vram
+
+execute_store_oam_u32:
+ translate_region 7, patch_store_u32, oam_ram, 0x3FC
+ lui $1, %hi(oam_update) # write non-zero to oam_update
+ sw $1, %lo(oam_update)($1) # cheap, but this is non-zero
+ store_u32 oam_ram
+
+execute_store_ignore8_u32:
+ ignore_region 8, patch_store_u32
+
+execute_store_ignore9_u32:
+ ignore_region 9, patch_store_u32
+
+execute_store_ignoreA_u32:
+ ignore_region 10, patch_store_u32
+
+execute_store_ignoreB_u32:
+ ignore_region 11, patch_store_u32
+
+execute_store_ignoreC_u32:
+ ignore_region 12, patch_store_u32
+
+execute_store_eeprom_u32:
+ store_function write_eeprom, 13, patch_store_u32, 0x3FC
+
+execute_store_ignoreE_u32:
+ ignore_region 14, patch_store_u32
+
+execute_store_ignoreF_u32:
+ ignore_high patch_store_u32
+
+store_u32_ftable:
+ .long execute_store_ignore0_u32 # 0x00 BIOS
+ .long execute_store_ignore1_u32 # 0x01 open address
+ .long execute_store_ewram_u32 # 0x02 EWRAM
+ .long execute_store_iwram_u32 # 0x03 IWRAM
+ .long execute_store_io_u32 # 0x04 I/O registers
+ .long execute_store_palette_u32 # 0x05 Palette RAM
+ .long execute_store_vram_u32 # 0x06 VRAM
+ .long execute_store_oam_u32 # 0x07 OAM RAM
+ .long execute_store_ignore8_u32 # 0x08 gamepak
+ .long execute_store_ignore9_u32 # 0x09 gamepak
+ .long execute_store_ignoreA_u32 # 0x0A gamepak
+ .long execute_store_ignoreB_u32 # 0x0B gamepak
+ .long execute_store_ignoreC_u32 # 0x0C gamepak
+ .long execute_store_eeprom_u32 # 0x0D gamepak/eeprom
+ .long execute_store_ignoreE_u32 # 0x0E Flash ROM/SRAM
+ .long execute_store_ignoreF_u32 # 0x0F open address
+
+
+patch_store_u32:
+ patch_handler store_u32_ftable, 0x0F
+
+
+
+# Unsigned always aligned, a2 safe 32bit store handlers
+
+execute_store_ignore0_u32a:
+ ignore_region 0, patch_store_u32a
+
+execute_store_ignore1_u32a:
+ ignore_region 1, patch_store_u32a
+
+execute_store_ewram_u32a:
+ translate_region_ewram_store_align32 patch_store_u32a
+ store_u32 (ewram + 0x8000)
+
+execute_aligned_store32:
+execute_store_iwram_u32a:
+ translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC
+ store_u32 (iwram + 0x8000)
+
+execute_store_io_u32a:
+ region_check 4, patch_store_u32a
+ nop
+ sw $6, REG_SAVE($16) # save a2
+ sw $ra, REG_SAVE2($16) # save ra
+
+ andi $4, $4, 0x3FC # wrap around/align address
+
+ save_registers
+ jal write_io_register32 # write the value out
+ nop
+
+ restore_registers
+
+ lw $ra, REG_SAVE2($16) # restore ra
+ jr $ra
+ lw $6, REG_SAVE($16) # restore a2
+
+execute_store_palette_u32a:
+ region_check 5, patch_store_u32a
+ lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
+ andi $4, $4, 0x3FC # wrap/align palette address
+ addu $2, $2, $4
+ sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
+
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+ lui $2, %hi(palette_ram_converted)
+ addu $2, $2, $4
+ addiu $2, $2, %lo(palette_ram_converted)
+ sh $1, ($2)
+
+ srl $5, $5, 16 # shift down to next palette value
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+
+ jr $ra # return
+ sh $1, 2($2)
+
+execute_store_vram_u32a:
+ translate_region_vram_store_align32 patch_store_u32a
+ store_u32 vram
+
+execute_store_oam_u32a:
+ translate_region 7, patch_store_u32a, oam_ram, 0x3FC
+ lui $1, %hi(oam_update) # write non-zero to oam_update
+ sw $1, %lo(oam_update)($1) # cheap, but this is non-zero
+ store_u32 oam_ram
+
+execute_store_ignore8_u32a:
+ ignore_region 8, patch_store_u32a
+
+execute_store_ignore9_u32a:
+ ignore_region 9, patch_store_u32a
+
+execute_store_ignoreA_u32a:
+ ignore_region 10, patch_store_u32a
+
+execute_store_ignoreB_u32a:
+ ignore_region 11, patch_store_u32a
+
+execute_store_ignoreC_u32a:
+ ignore_region 12, patch_store_u32a
+
+execute_store_eeprom_u32a:
+ store_function_a write_eeprom, 13, patch_store_u32a, 0x3FC
+
+execute_store_ignoreE_u32a:
+ ignore_region 14, patch_store_u32a
+
+execute_store_ignoreF_u32a:
+ ignore_high patch_store_u32a
+
+store_u32a_ftable:
+ .long execute_store_ignore0_u32a# 0x00 BIOS
+ .long execute_store_ignore1_u32a# 0x01 open address
+ .long execute_store_ewram_u32a # 0x02 EWRAM
+ .long execute_store_iwram_u32a # 0x03 IWRAM
+ .long execute_store_io_u32a # 0x04 I/O registers
+ .long execute_store_palette_u32a# 0x05 Palette RAM
+ .long execute_store_vram_u32a # 0x06 VRAM
+ .long execute_store_oam_u32a # 0x07 OAM RAM
+ .long execute_store_ignore8_u32a# 0x08 gamepak
+ .long execute_store_ignore9_u32a# 0x09 gamepak
+ .long execute_store_ignoreA_u32a# 0x0A gamepak
+ .long execute_store_ignoreB_u32a# 0x0B gamepak
+ .long execute_store_ignoreC_u32a# 0x0C gamepak
+ .long execute_store_eeprom_u32a # 0x0D gamepak/eeprom
+ .long execute_store_ignoreE_u32a# 0x0E Flash ROM/SRAM
+ .long execute_store_ignoreF_u32a# 0x0F open address
+
+patch_store_u32a:
+ patch_handler store_u32a_ftable, 0x0F
+
+
+
+#execute_load_u8:
+execute_load_full_u8:
+ srl $1, $4, 28 # check if the address is out of range
+ bne $1, $0, ext_load_u8 # if it is, perform an extended read
+ srl $2, $4, 15 # $1 = page number of address
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($2)
+ beq $1, $0, ext_load_u8 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lbu $2, ($1) # read the value
+
+ext_load_u8:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory8 # read the value
+ nop
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 4 # fix stack (delay slot)
+
+#execute_load_s8:
+execute_load_full_s8:
+ srl $1, $4, 28 # check if the address is out of range
+ bne $1, $0, ext_load_s8 # if it is, perform an extended read
+ srl $2, $4, 15 # $1 = page number of address
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($2)
+ beq $1, $0, ext_load_s8 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lb $2, ($1) # read the value
+
+ext_load_s8:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory8 # read the value
+ nop
+ restore_registers
+ seb $2, $2 # sign extend the read value
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 4 # fix stack (delay slot)
+
+#execute_load_u16:
+execute_load_full_u16:
+ srl $1, $4, 28 # check if the address is out of range
+ ins $1, $4, 4, 1 # or unaligned (bottom bit)
+ bne $1, $0, ext_load_u16 # if it is, perform an extended read
+ srl $2, $4, 15 # $1 = page number of address
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($2)
+ beq $1, $0, ext_load_u16 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lhu $2, ($1) # read the value
+
+ext_load_u16:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory16 # read the value
+ nop
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 4 # fix stack (delay slot)
+
+#execute_load_s16:
+execute_load_full_s16:
+ srl $1, $4, 28 # check if the address is out of range
+ ins $1, $4, 4, 1 # or unaligned (bottom bit)
+ bne $1, $0, ext_load_s16 # if it is, perform an extended read
+ srl $2, $4, 15 # $1 = page number of address
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($2)
+ beq $1, $0, ext_load_s16 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lh $2, ($1) # read the value
+
+ext_load_s16:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory16_signed # read the value
+ nop
+ restore_registers
+ seh $2, $2 # sign extend the return value
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 4 # fix stack (delay slot)
+
+#execute_load_u32:
+execute_load_full_u32:
+ srl $1, $4, 28 # check if the address is out of range
+ ins $1, $4, 4, 2 # or unaligned (bottom two bits)
+ bne $1, $0, ext_load_u32 # if it is, perform an extended read
+ srl $2, $4, 15 # $1 = page number of address
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($2)
+ beq $1, $0, ext_load_u32 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lw $2, ($1) # read the value
+
+ext_load_u32:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory32 # read the value
+ nop
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 4 # fix stack (delay slot)
+
+#execute_aligned_load32:
+ srl $2, $4, 28 # check if the address is out of range
+ bne $2, $0, ext_aligned_load32 # if it is, perform an extended load
+ srl $1, $4, 15 # $1 = page number of address
+ sll $1, $1, 2 # adjust to word index
+ addu $1, $1, $16 # $1 = memory_map_read[address >> 15]
+ lw $1, -32768($1)
+ beq $1, $0, ext_aligned_load32 # if it's NULL perform an extended read
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ lw $2, ($1) # read the value
+
+ext_aligned_load32:
+ addiu $sp, $sp, -8 # make room on the stack for $ra
+ sw $6, 4($sp)
+ sw $ra, ($sp) # store return address
+ save_registers
+ jal read_memory32 # read the value
+ nop
+ restore_registers
+ lw $6, 4($sp)
+ lw $ra, ($sp) # restore return address
+ jr $ra # return
+ addiu $sp, $sp, 8 # fix stack (delay slot)
+
+# General ext memory routines
+
+ext_store_ignore:
+ jr $ra # ignore these writes
+ nop
+
+write_io_epilogue:
+ beq $2, $0, no_alert # 0 means nothing happened
+ addiu $4, $2, -2 # see if return value is 2 (delay slot)
+ beq $4, $0, smc_dma # is it an SMC alert? (return value = 2)
+ nop
+ addiu $4, $2, -3 # see if return value is 3
+ beq $4, $0, irq_alert # is it an IRQ alert? (return value = 3)
+ nop
+ collapse_flags # make sure flags are good for update_gba
+
+alert_loop:
+ jal update_gba # process the next event
+ nop
+ lw $1, CPU_HALT_STATE($16) # check if CPU is sleeping
+ bne $1, $0, alert_loop # see if it hasn't changed
+ nop
+
+ addu $17, $2, $0 # $17 = new cycle counter
+ lw $4, REG_PC($16) # $4 = new PC
+
+ j lookup_pc
+ addiu $sp, $sp, 4 # fix the stack (delay slot)
+
+irq_alert:
+ restore_registers
+ j lookup_pc # PC has changed, get a new one
+ addiu $sp, $sp, 4 # fix the stack
+
+no_alert:
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # we can return
+ addiu $sp, $sp, 4 # fix the stack
+
+smc_dma:
+ addiu $sp, $sp, 4 # fix the stack
+ jal flush_translation_cache_ram # flush translation cache
+ nop
+ j lookup_pc
+ nop
+
+
+ext_store_eeprom:
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_eeprom # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # we can return
+ addiu $sp, $sp, 4 # fix the stack
+
+
+# 8bit ext memory routines
+
+ext_store_io8:
+ andi $5, $5, 0xFF # make value 8bit
+ andi $4, $4, 0x3FF # wrap around address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_io_register8 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+ext_store_palette8:
+ j ext_store_palette16b # perform 16bit palette write
+ andi $4, $4, 0x3FE # wrap + align (delay)
+
+ext_store_vram8:
+ ins $5, $5, 8, 8 # value = (value << 8) | value
+ ext $4, $4, 0, 17 # address = adress & 0x1FFFF
+ ins $4, $0, 0, 1 # align out bottom bit
+ lui $1, %hi(0x18000) # $1 = 0x18000
+ sltu $1, $4, $1 # see if address < 0x18000
+ bne $1, $0, ext_store_vram8b
+ lui $2, %hi(vram) # start loading vram address (delay)
+
+ addiu $4, $4, -0x8000 # move address into VRAM region
+
+ext_store_vram8b:
+ addu $2, $2, $4 # $2 = (hi)vram + address
+ jr $ra # return
+ sh $5, %lo(vram)($2) # vram[address] = value (delay)
+
+ext_store_oam8:
+ lui $1, %hi(oam_update) # $1 = oam_update
+ addiu $1, %lo(oam_update)
+ li $2, 1 # $2 = 1
+ sw $2, ($1) # *oam_update = 1
+ andi $4, $4, 0x3FE # wrap around address and align to 16bits
+ ins $5, $5, 8, 8 # value = (value << 8) | value
+ lui $1, %hi(oam_ram) # $1 = (hi)oam_ram
+ addu $1, $1, $4 # $1 = (hi)oam_ram + address
+ jr $ra # return
+ sh $5, %lo(oam_ram)($1) # oam_ram[address] = value (delay)
+
+ext_store_backup:
+ andi $5, $5, 0xFF # make value 8bit
+ andi $4, $4, 0xFFFF # mask value
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_backup # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # we can return
+ addiu $sp, $sp, 4 # fix the stack
+
+ext_store_u8_jtable:
+ .long ext_store_ignore # 0x00 BIOS
+ .long ext_store_ignore # 0x01 invalid
+ .long ext_store_ignore # 0x02 EWRAM
+ .long ext_store_ignore # 0x03 IWRAM
+ .long ext_store_io8 # 0x04 I/O registers
+ .long ext_store_palette8 # 0x05 Palette RAM
+ .long ext_store_vram8 # 0x06 VRAM
+ .long ext_store_oam8 # 0x07 OAM RAM
+ .long ext_store_ignore # 0x08 gamepak (no RTC accepted in 8bit)
+ .long ext_store_ignore # 0x09 gamepak, ignore
+ .long ext_store_ignore # 0x0A gamepak, ignore
+ .long ext_store_ignore # 0x0B gamepak, ignore
+ .long ext_store_ignore # 0x0C gamepak, ignore
+ .long ext_store_eeprom # 0x0D EEPROM (possibly)
+ .long ext_store_backup # 0x0E Flash ROM/SRAM
+ .long ext_store_ignore # 0x0F invalid
+
+
+
+ext_store_u8:
+ srl $1, $4, 24 # $1 = address >> 24
+ sltu $2, $1, 16 # check if the value is out of range
+ beq $2, $0, ext_store_ignore
+ sll $1, $1, 2 # make address word indexed (delay)
+ lui $2, %hi(ext_store_u8_jtable)
+ addu $2, $2, $1
+ # $2 = ext_store_u8_jtable[address >> 24]
+ lw $2, %lo(ext_store_u8_jtable)($2)
+ jr $2 # jump to table location
+ nop
+
+# $4: address to write to
+# $5: value to write
+# $6: current PC
+
+#execute_store_u8:
+ srl $1, $4, 28 # check if the address is out of range
+ bne $1, $0, ext_store_u8 # if it is, perform an extended write
+ srl $2, $4, 15 # $1 = page number of address (delay slot)
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16
+ lw $1, 256($2) # $1 = memory_map_write[address >> 15]
+ beq $1, $0, ext_store_u8 # if it's NULL perform an extended write
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ lb $2, -32768($1) # load the SMC status
+ bne $2, $0, smc_write # is there code there?
+ sb $5, ($1) # store the value (delay slot)
+ jr $ra # return
+ nop
+
+# 16bit ext memory routines
+
+ext_store_io16:
+ andi $4, $4, 0x3FF # wrap around address
+ andi $5, $5, 0xFFFF # make value 16bit
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_io_register16 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+ext_store_palette16:
+ andi $4, 0x3FF # wrap address
+
+ext_store_palette16b:
+ lui $2, %hi(palette_ram)
+ addu $2, $2, $4
+ sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
+ sll $1, $5, 1 # make green 6bits
+ ins $1, $0, 0, 6 # make bottom bit 0
+ ins $1, $5, 0, 5 # insert red channel into $1
+ lui $2, %hi(palette_ram_converted)
+ addu $2, $2, $4
+ jr $ra # return
+ sh $1, %lo(palette_ram_converted)($2)
+
+ext_store_vram16:
+ ext $4, $4, 0, 17 # address = adress & 0x1FFFF
+ lui $1, %hi(0x18000) # $1 = 0x18000
+ sltu $1, $4, $1 # see if address < 0x18000
+ bne $1, $0, ext_store_vram16b
+ lui $2, %hi(vram) # start loading vram address (delay)
+
+ addiu $4, $4, -0x8000 # move address into VRAM region
+
+ext_store_vram16b:
+ addu $2, $2, $4 # $2 = (hi)vram + address
+ jr $ra # return
+ sh $5, %lo(vram)($2) # vram[address] = value (delay)
+
+ext_store_oam16:
+ lui $1, %hi(oam_update) # $1 = oam_update
+ addiu $1, %lo(oam_update)
+ li $2, 1 # $2 = 1
+ sw $2, ($1) # *oam_update = 1
+ andi $4, $4, 0x3FF # wrap around address
+ lui $1, %hi(oam_ram) # $1 = (hi)oam_ram
+ addu $1, $1, $4 # $1 = (hi)oam_ram + address
+ jr $ra # return
+ sh $5, %lo(oam_ram)($1) # oam_ram[address] = value (delay)
+
+ext_store_rtc:
+ andi $5, $5, 0xFFFF # make value 16bit
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_rtc # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ restore_registers
+ lw $ra, ($sp) # restore return address
+ jr $ra # we can return
+ addiu $sp, $sp, 4 # fix the stack
+
+ext_store_u16_jtable:
+ .long ext_store_ignore # 0x00 BIOS, ignore
+ .long ext_store_ignore # 0x01 invalid, ignore
+ .long ext_store_ignore # 0x02 EWRAM, should have been hit already
+ .long ext_store_ignore # 0x03 IWRAM, should have been hit already
+ .long ext_store_io16 # 0x04 I/O registers
+ .long ext_store_palette16 # 0x05 Palette RAM
+ .long ext_store_vram16 # 0x06 VRAM
+ .long ext_store_oam16 # 0x07 OAM RAM
+ .long ext_store_rtc # 0x08 gamepak, RTC
+ .long ext_store_ignore # 0x09 gamepak, ignore
+ .long ext_store_ignore # 0x0A gamepak, ignore
+ .long ext_store_ignore # 0x0B gamepak, ignore
+ .long ext_store_ignore # 0x0C gamepak, ignore
+ .long ext_store_eeprom # 0x0D EEPROM (possibly)
+ .long ext_store_ignore # 0x0E Flash ROM/SRAM
+
+ext_store_u16:
+ srl $1, $4, 24 # $1 = address >> 24
+ sltu $2, $1, 16 # check if the value is out of range
+ beq $2, $0, ext_store_ignore
+ sll $1, $1, 2 # make address word indexed (delay)
+ lui $2, %hi(ext_store_u16_jtable)
+ addu $2, $2, $1
+ # $2 = ext_store_u16_jtable[address >> 24]
+ lw $2, %lo(ext_store_u16_jtable)($2)
+ jr $2 # jump to table location
+ nop
+
+
+#execute_store_u16:
+ srl $1, $4, 28 # check if the address is out of range
+ bne $1, $0, ext_store_u16 # if it is, perform an extended write
+ srl $2, $4, 15 # $1 = page number of address (delay slot)
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16
+ lw $1, 256($2) # $1 = memory_map_write[address >> 15]
+ beq $1, $0, ext_store_u16 # if it's NULL perform an extended write
+ andi $2, $4, 0x7FFE # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ lh $2, -32768($1) # load the SMC status
+ bne $2, $0, smc_write # is there code there?
+ sh $5, ($1) # store the value (delay slot)
+ jr $ra # return
+ nop
+
+
+
+
+
+
+
+
+# 32bit ext memory routines
+
+ext_store_io32:
+ andi $4, $4, 0x3FF # wrap around address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_io_register32 # write the value out
+ sw $6, REG_PC($16) # save the PC (delay slot)
+ j write_io_epilogue # handle any state changes
+ nop
+
+ext_store_palette32:
+ addu $6, $ra, $0 # save return address in $6
+ jal ext_store_palette16b # write out palette entry
+ andi $4, 0x3FF # wrap address (delay)
+ addiu $4, $4, 2 # go to next location
+ srl $5, $5, 16 # shift to next 16bit value
+ j ext_store_palette16b # write out next palette entry
+ addu $ra, $6, $0 # restore return address (delay)
+
+ext_store_vram32:
+ ext $4, $4, 0, 17 # address = adress & 0x1FFFF
+ lui $1, %hi(0x18000) # $1 = 0x18000
+ sltu $1, $4, $1 # see if address < 0x18000
+ bne $1, $0, ext_store_vram32b
+ lui $2, %hi(vram) # start loading vram address (delay)
+
+ addiu $4, $4, -0x8000 # move address into VRAM region
+
+ext_store_vram32b:
+ addu $2, $2, $4 # $2 = (hi)vram + address
+ jr $ra # return
+ sw $5, %lo(vram)($2) # vram[address] = value (delay)
+
+ext_store_oam32:
+ lui $1, %hi(oam_update) # $1 = oam_update
+ addiu $1, %lo(oam_update)
+ li $2, 1 # $2 = 1
+ sw $2, ($1) # *oam_update = 1
+ andi $4, $4, 0x3FF # wrap around address
+ lui $1, %hi(oam_ram) # $1 = (hi)oam_ram
+ addu $1, $1, $4 # $1 = (hi)oam_ram + address
+ jr $ra # return
+ sw $5, %lo(oam_ram)($1) # oam_ram[address] = value (delay)
+
+ext_store_u32_jtable:
+ .long ext_store_ignore # 0x00 BIOS, ignore
+ .long ext_store_ignore # 0x01 invalid, ignore
+ .long ext_store_ignore # 0x02 EWRAM, should have been hit already
+ .long ext_store_ignore # 0x03 IWRAM, should have been hit already
+ .long ext_store_io32 # 0x04 I/O registers
+ .long ext_store_palette32 # 0x05 Palette RAM
+ .long ext_store_vram32 # 0x06 VRAM
+ .long ext_store_oam32 # 0x07 OAM RAM
+ .long ext_store_ignore # 0x08 gamepak, ignore
+ .long ext_store_ignore # 0x09 gamepak, ignore
+ .long ext_store_ignore # 0x0A gamepak, ignore
+ .long ext_store_ignore # 0x0B gamepak, ignore
+ .long ext_store_ignore # 0x0C gamepak, ignore
+ .long ext_store_eeprom # 0x0D EEPROM (possibly)
+ .long ext_store_ignore # 0x0E Flash ROM/SRAM
+
+ext_store_u32:
+ srl $1, $4, 24 # $1 = address >> 24
+ sltu $2, $1, 16 # check if the value is out of range
+ beq $2, $0, ext_store_ignore
+ sll $1, $1, 2 # make address word indexed (delay)
+ lui $2, %hi(ext_store_u32_jtable)
+ addu $2, $2, $1
+ # $2 = ext_store_u32_jtable[address >> 24]
+ lw $2, %lo(ext_store_u32_jtable)($2)
+ jr $2 # jump to table location
+ nop
+
+#execute_store_u32:
+execute_store_full_u32:
+ srl $1, $4, 28 # check if the address is out of range
+ bne $1, $0, ext_store_u32 # if it is, perform an extended write
+ srl $2, $4, 15 # $1 = page number of address (delay slot)
+ sll $2, $2, 2 # adjust to word index
+ addu $2, $2, $16
+ lw $1, 256($2) # $1 = memory_map_write[address >> 15]
+ beq $1, $0, ext_store_u32 # if it's NULL perform an extended write
+ andi $2, $4, 0x7FFC # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ lw $2, -32768($1) # load the SMC status
+ bne $2, $0, smc_write # is there code there?
+ sw $5, ($1) # store the value (delay slot)
+ jr $ra # return
+ nop
+
+
+# 32bit ext aligned, non a2 destroying routines
+
+ext_store_io32a:
+ andi $4, $4, 0x3FF # wrap around address
+ addiu $sp, $sp, -4 # make room on the stack for $ra
+ sw $ra, ($sp)
+ save_registers
+ jal write_io_register32 # write the value out
+ sw $6, REG_SAVE($16) # save a2
+ lw $6, REG_SAVE($16) # restore a2
+ j write_io_epilogue # handle any state changes
+ nop
+
+ext_store_palette32a:
+ sw $ra, REG_SAVE($16) # save return address
+ jal ext_store_palette16b # write out palette entry
+ andi $4, 0x3FF # wrap address (delay)
+ addiu $4, $4, 2 # go to next location
+ srl $5, $5, 16 # shift to next 16bit value
+ j ext_store_palette16b # write out next palette entry
+ lw $ra, REG_SAVE($16) # restore return address (delay)
+
+ext_store_u32a_jtable:
+ .long ext_store_ignore # 0x00 BIOS, ignore
+ .long ext_store_ignore # 0x01 invalid, ignore
+ .long ext_store_ignore # 0x02 EWRAM, should have been hit already
+ .long ext_store_ignore # 0x03 IWRAM, should have been hit already
+ .long ext_store_io32a # 0x04 I/O registers
+ .long ext_store_palette32a # 0x05 Palette RAM
+ .long ext_store_vram32 # 0x06 VRAM
+ .long ext_store_oam32 # 0x07 OAM RAM
+ .long ext_store_ignore # 0x08 gamepak, ignore
+ .long ext_store_ignore # 0x09 gamepak, ignore
+ .long ext_store_ignore # 0x0A gamepak, ignore
+ .long ext_store_ignore # 0x0B gamepak, ignore
+ .long ext_store_ignore # 0x0C gamepak, ignore
+ .long ext_store_ignore # 0x0D EEPROM (nothing will write this)
+ .long ext_store_ignore # 0x0E Flash ROM/SRAM
+
+ext_aligned_store32:
+ srl $1, $4, 24 # $1 = address >> 24
+ sltu $2, $1, 16 # check if the value is out of range
+ beq $2, $0, ext_store_ignore
+ sll $1, $1, 2 # make address word indexed (delay)
+ lui $2, %hi(ext_store_u32a_jtable)
+ addu $2, $2, $1
+ # $2 = ext_store_u32a_jtable[address >> 24]
+ lw $2, %lo(ext_store_u32a_jtable)($2)
+ jr $2 # jump to table location
+ nop
+
+#execute_aligned_store32:
+ srl $2, $4, 28 # check if the address is out of range
+ bne $2, $0, ext_aligned_store32 # if it is, perform an extended load
+ srl $1, $4, 15 # $1 = page number of address
+ sll $1, $1, 2 # adjust to word index
+ addu $1, $1, $16 # $1 = memory_map_write[address >> 15]
+ lw $1, 256($1)
+ beq $1, $0, ext_aligned_store32 # if it's NULL perform an extended write
+ andi $2, $4, 0x7FFF # $2 = low 15bits of address (delay slot)
+ addu $1, $1, $2 # add the memory map offset
+ jr $ra # return
+ sw $5, ($1) # write the value
+
+smc_write:
+ save_registers
+ jal flush_translation_cache_ram # flush translation cache
+ sw $6, REG_PC($16) # save PC (delay slot)
+
+lookup_pc:
+ lw $2, REG_CPSR($16) # $2 = cpsr
+ andi $2, $2, 0x20 # isolate mode bit
+ beq $2, $0, lookup_pc_arm # if T bit is zero use arm handler
+ nop
+
+lookup_pc_thumb:
+ jal block_lookup_address_thumb # get Thumb address
+ lw $4, REG_PC($16) # load PC as arg 0 (delay slot)
+ restore_registers
+ jr $2 # jump to result
+ nop
+
+lookup_pc_arm:
+ jal block_lookup_address_arm # get ARM address
+ lw $4, REG_PC($16) # load PC as arg 0 (delay slot)
+ restore_registers
+ jr $2 # jump to result
+ nop
+
+# Return the current cpsr
+
+execute_read_cpsr:
+ collapse_flags # fold flags into cpsr, put cpsr into $2
+ jr $ra # return
+ nop
+
+# Return the current spsr
+
+execute_read_spsr:
+ lw $1, CPU_MODE($16) # $1 = cpu_mode
+ lui $2, %hi(spsr)
+ sll $1, $1, 2 # adjust to word offset size
+ addu $2, $2, $1
+ jr $ra # return
+ lw $2, %lo(spsr)($2) # $2 = spsr[cpu_mode] (delay slot)
+
+# Switch into SWI, has to collapse flags
+# $4: Current pc
+
+execute_swi:
+ add $sp, $sp, -4 # push $ra
+ sw $ra, ($sp)
+ lui $1, %hi(SUPERVISOR_LR)
+ sw $4, %lo(SUPERVISOR_LR)($1) # store next PC in the supervisor's LR
+ collapse_flags # get cpsr in $2
+ lui $5, %hi(SUPERVISOR_SPSR)
+ sw $2, %lo(SUPERVISOR_SPSR)($5) # save cpsr in SUPERVISOR_CPSR
+ ins $2, $0, 0, 6 # zero out bottom 6 bits of CPSR
+ ori $2, 0x13 # set mode to supervisor
+ sw $2, REG_CPSR($16) # write back CPSR
+ save_registers
+ jal set_cpu_mode # set the CPU mode to supervisor
+ li $4, 3 # 3 is supervisor mode (delay slot)
+ restore_registers
+ lw $ra, ($sp) # pop $ra
+ jr $ra # return
+ add $sp, $sp, 4 # fix stack (delay slot)
+
+# $4: pc to restore to
+# returns in $4
+
+execute_spsr_restore:
+ lw $1, CPU_MODE($16) # $1 = cpu_mode
+
+ beq $1, $0, no_spsr_restore # only restore if the cpu isn't usermode
+ lui $2, %hi(spsr) # start loading SPSR (delay)
+
+ sll $1, $1, 2 # adjust to word offset size
+ addu $2, $2, $1
+ lw $1, %lo(spsr)($2) # $1 = spsr[cpu_mode]
+ sw $1, REG_CPSR($16) # cpsr = spsr[cpu_mode]
+ extract_flags_body # extract flags from $1
+ addiu $sp, $sp, -4
+ sw $ra, ($sp)
+ save_registers
+ jal execute_spsr_restore_body # do the dirty work in this C function
+ nop
+ restore_registers
+ addu $4, $2, $0 # move return value to $4
+ lw $ra, ($sp)
+ jr $ra
+ addiu $sp, $sp, 4
+
+no_spsr_restore:
+ jr $ra
+ nop
+
+# $4: new cpsr
+# $5: store mask
+# $6: current PC
+
+execute_store_cpsr:
+ and $1, $4, $5 # $1 = new_cpsr & store_mask
+ lw $2, REG_CPSR($16) # $2 = current cpsr
+ nor $4, $5, $0 # $4 = ~store_mask
+ and $2, $2, $4 # $2 = (cpsr & (~store_mask))
+ or $1, $1, $2 # $1 = new cpsr combined with old
+ extract_flags_body # extract flags from $1
+ addiu $sp, $sp, -4
+ sw $ra, ($sp)
+ save_registers
+ jal execute_store_cpsr_body # do the dirty work in this C function
+ addu $4, $1, $0 # load the new CPSR (delay slot)
+
+ bne $2, $0, changed_pc_cpsr # this could have changed the pc
+ nop
+
+ restore_registers
+
+ lw $ra, ($sp)
+ jr $ra
+ addiu $sp, $sp, 4
+
+changed_pc_cpsr:
+ jal block_lookup_address_arm # GBA address is in $4
+ addu $4, $2, $0 # load new address in $4 (delay slot)
+ restore_registers # restore registers
+ jr $2 # jump to the new address
+ addiu $sp, $sp, 4 # get rid of the old ra (delay slot)
+
+
+# $4: new spsr
+# $5: store mask
+
+execute_store_spsr:
+ lw $1, CPU_MODE($16) # $1 = cpu_mode
+ lui $2, %hi(spsr)
+ sll $1, $1, 2 # adjust to word offset size
+ addu $1, $2, $1
+ lw $2, %lo(spsr)($1) # $2 = spsr[cpu_mode]
+ and $4, $4, $5 # $4 = new_spsr & store_mask
+ nor $5, $5, $0 # $5 = ~store_mask
+ and $2, $2, $5 # $2 = (spsr & (~store_mask))
+ or $4, $4, $2 # $4 = new spsr combined with old
+ jr $ra # return
+ sw $4, %lo(spsr)($1) # spsr[cpu_mode] = $4 (delay slot)
+
+# $4: value
+# $5: shift
+
+execute_lsl_flags_reg:
+ beq $5, $0, lsl_shift_zero # is the shift zero?
+ sltiu $1, $5, 32 # $1 = (shift < 32) (delay)
+ beq $1, $0, lsl_shift_high # is the shift >= 32?
+ li $2, 32
+
+ subu $2, $2, $5 # $2 = (32 - shift)
+ srlv $2, $4, $2 # $2 = (value >> (32 - shift))
+ andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01
+
+lsl_shift_zero:
+ jr $ra # return
+ sllv $4, $4, $5 # return (value << shift) (delay)
+
+lsl_shift_high:
+ sltiu $1, $5, 33 # $1 = (shift < 33) (delay)
+ bne $1, $0, lsl_shift_done # jump if shift == 32
+ andi $22, $4, 1 # c flag = value & 0x01 (delay)
+
+ add $22, $0, $0 # c flag = 0 otherwise
+
+lsl_shift_done:
+ jr $ra # return
+ add $4, $0, $0 # value = 0 no matter what
+
+
+execute_lsr_flags_reg:
+ beq $5, $0, lsr_shift_zero # is the shift zero?
+ sltiu $1, $5, 32 # $1 = (shift < 32) (delay)
+ beq $1, $0, lsr_shift_high # is the shift >= 32?
+ addiu $2, $5, -1 # $2 = shift - 1 (delay)
+
+ srlv $2, $4, $2 # $2 = (value >> (shift - 1))
+ andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01
+
+lsr_shift_zero:
+ jr $ra # return
+ srlv $4, $4, $5 # return (value >> shift) (delay)
+
+lsr_shift_high:
+ sltiu $1, $5, 33 # $1 = (shift < 33) (delay)
+ bne $1, $0, lsr_shift_done # jump if shift == 32
+ srl $22, $4, 31 # c flag = value >> 31 (delay)
+
+ add $22, $0, $0 # c flag = 0 otherwise
+
+lsr_shift_done:
+ jr $ra # return
+ add $4, $0, $0 # value = 0 no matter what
+
+
+execute_asr_flags_reg:
+ beq $5, $0, asr_shift_zero # is the shift zero?
+ sltiu $1, $5, 32 # $1 = (shift < 32) (delay)
+ beq $1, $0, asr_shift_high # is the shift >= 32?
+ addiu $2, $5, -1 # $2 = shift - 1 (delay)
+
+ srlv $2, $4, $2 # $2 = (value >> (shift - 1))
+ andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01
+
+asr_shift_zero:
+ jr $ra # return
+ srav $4, $4, $5 # return (value >> shift) (delay)
+
+asr_shift_high:
+ sra $4, $4, 31 # value >>= 31
+ jr $ra # return
+ andi $22, $4, 1 # c flag = value & 0x01
+
+
+execute_ror_flags_reg:
+ beq $5, $0, ror_zero_shift # is the shift zero?
+ addiu $1, $5, -1 # $1 = (shift - 1) (delay)
+
+ srav $1, $4, $1 # $1 = (value >> (shift - 1))
+ andi $22, $1, 1 # c flag = $1 & 1
+
+ror_zero_shift:
+ jr $ra # return
+ rotrv $4, $4, $5 # return (value ror shift) delay
+
+# $4: cycle counter argument
+
+execute_arm_translate:
+ addu $17, $4, $0 # load cycle counter register
+ lui $16, %hi(reg) # load base register
+ addiu $16, %lo(reg)
+ extract_flags # load flag variables
+
+ and $1, $1, 0x20 # see if Thumb bit is set in flags
+
+ bne $1, $0, 1f
+ lw $4, REG_PC($16) # load PC into $4 (delay)
+
+ jal block_lookup_address_arm # lookup initial jump address
+ nop
+ restore_registers # load initial register values
+ jr $2 # jump to return
+ nop
+
+1:
+ jal block_lookup_address_thumb # lookup initial jump address
+ nop
+ restore_registers # load initial register values
+ jr $2 # jump to return
+ nop
+
+# sceKernelInvalidateIcacheRange gives me problems, trying this instead
+# Invalidates an n byte region starting at the start address
+# $4: start location
+# $5: length
+
+invalidate_icache_region:
+ ins $4, $0, 0, 6 # align to 64 bytes
+ addiu $2, $5, 63 # align up to 64 bytes
+ srl $2, $2, 6 # divide by 64
+ beq $2, $0, done # exit early on 0
+ nop
+
+iir_loop:
+ cache 0x08, ($4) # hit invalidate icache line
+ addiu $2, $2, -1 # next loop iteration
+ bne $2, $0, iir_loop # loop
+ addiu $4, $4, 64 # go to next cache line (delay slot)
+
+done:
+ jr $ra # return
+ nop
+
+# Writes back dcache and invalidates icache.
+
+invalidate_all_cache:
+ addu $4, $0, $0 # $4 = 0
+ addiu $5, $0, 0x4000 # $5 = 0x4000
+
+iac_loop:
+ cache 0x14, 0($4) # index invalidate/writeback dcache index
+ addiu $4, $4, 0x40 # goto next cache line
+ bne $4, $5, iac_loop # next iteration
+ cache 0x04, -0x40($4) # index invalidate icache index.. maybe?
+
+ jr $ra # return
+ nop
+
+
+step_debug_mips:
+ addiu $sp, $sp, -4
+ sw $ra, ($sp)
+ collapse_flags
+ save_registers
+ jal step_debug
+ addiu $5, $17, 0
+ restore_registers
+ lw $ra, ($sp)
+ jr $ra
+ addiu $sp, $sp, 4
+
+memory_map_read:
+ .space 0x8000
+
+reg:
+ .space 0x100
+
+memory_map_write:
+ .space 0x8000
+