summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Guillen Fandos2021-03-16 19:02:11 +0100
committerDavid Guillen Fandos2021-03-16 22:58:58 +0100
commit6b503667ec074c55dbcd689595d8fe03aa17e4a4 (patch)
tree2465e0d464b5a221b93701a67173b69d80ac8d3b
parent80be1e3447f26376b07a1154c98258fb4a124500 (diff)
downloadpicogpsp-6b503667ec074c55dbcd689595d8fe03aa17e4a4.tar.gz
picogpsp-6b503667ec074c55dbcd689595d8fe03aa17e4a4.tar.bz2
picogpsp-6b503667ec074c55dbcd689595d8fe03aa17e4a4.zip
Add Dingux support
Uses a different cache primitive and a differend madd(u) encoding. Also added a flag for BGR vs RGB color output (since PSP is assuming to be BGR for speed). Aside from that the ABI required some special function calls for PIC.
-rw-r--r--Makefile10
-rw-r--r--psp/mips_emit.h156
-rw-r--r--psp/mips_stub.S100
3 files changed, 165 insertions, 101 deletions
diff --git a/Makefile b/Makefile
index 85465e0..eddfe36 100644
--- a/Makefile
+++ b/Makefile
@@ -193,7 +193,7 @@ else ifeq ($(platform), psp1)
TARGET := $(TARGET_NAME)_libretro_$(platform).a
CC = psp-gcc$(EXE_EXT)
AR = psp-ar$(EXE_EXT)
- CFLAGS += -DPSP -G0
+ CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT
CFLAGS += -I$(shell psp-config --pspsdk-path)/include
CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi
CFLAGS += -fomit-frame-pointer -ffast-math
@@ -374,8 +374,10 @@ else ifeq ($(platform), gcw0)
CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++
AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar
SHARED := -shared -nostdlib -Wl,--version-script=link.T
- fpic := -fPIC
+ fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
+ HAVE_DYNAREC := 1
+ CPU_ARCH := mips
# Windows
else
@@ -403,8 +405,8 @@ ifeq ($(DEBUG), 1)
OPTIMIZE_SAFE := -O0 -g
OPTIMIZE := -O0 -g
else
- OPTIMIZE_SAFE := -O2 -DNDEBUG -g
- OPTIMIZE := -O3 -DNDEBUG -g
+ OPTIMIZE_SAFE := -O2 -DNDEBUG
+ OPTIMIZE := -O3 -DNDEBUG
endif
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index 48ed630..506c440 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -134,6 +134,12 @@ typedef enum
typedef enum
{
+ mips_special2_madd = 0x00,
+ mips_special2_maddu = 0x01,
+} mips_function_special2;
+
+typedef enum
+{
mips_special3_ext = 0x00,
mips_special3_ins = 0x04,
mips_special3_bshfl = 0x20
@@ -203,6 +209,12 @@ typedef enum
mips_special_##function; \
translation_ptr += 4 \
+#define mips_emit_special2(function, rs, rt, rd, shift) \
+ *((u32 *)translation_ptr) = (mips_opcode_special2 << 26) | \
+ (rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | \
+ mips_special2_##function; \
+ translation_ptr += 4 \
+
#define mips_emit_special3(function, rs, rt, imm_a, imm_b) \
*((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \
(rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \
@@ -314,11 +326,19 @@ typedef enum
#define mips_emit_divu(rs, rt) \
mips_emit_special(divu, rs, rt, 0, 0) \
-#define mips_emit_madd(rs, rt) \
- mips_emit_special(madd, rs, rt, 0, 0) \
+#ifdef PSP
+ #define mips_emit_madd(rs, rt) \
+ mips_emit_special(madd, rs, rt, 0, 0) \
-#define mips_emit_maddu(rs, rt) \
- mips_emit_special(maddu, rs, rt, 0, 0) \
+ #define mips_emit_maddu(rs, rt) \
+ mips_emit_special(maddu, rs, rt, 0, 0)
+#else
+ #define mips_emit_madd(rs, rt) \
+ mips_emit_special2(madd, rs, rt, 0, 0) \
+
+ #define mips_emit_maddu(rs, rt) \
+ mips_emit_special2(maddu, rs, rt, 0, 0)
+#endif
#define mips_emit_movn(rd, rs, rt) \
mips_emit_special(movn, rs, rt, rd, 0) \
@@ -411,6 +431,9 @@ typedef enum
#define mips_emit_jr(rs) \
mips_emit_special(jr, rs, 0, 0, 0) \
+#define mips_emit_jalr(rs) \
+ mips_emit_special(jalr, rs, 0, 31, 0) \
+
#define mips_emit_synci(rs, offset) \
mips_emit_regimm(synci, rs, offset) \
@@ -2535,8 +2558,9 @@ u8 swi_hle_handle[256] =
#define ReOff_GP_Save (32*4) // GP_SAVE
// Saves all regs to their right slot and loads gp
-#define emit_save_regs(save_a2) \
- for (unsigned i = 0; i < 15; i++) { \
+#define emit_save_regs(save_a2) { \
+ int i; \
+ for (i = 0; i < 15; i++) { \
mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \
if (save_a2) { \
@@ -2544,21 +2568,24 @@ u8 swi_hle_handle[256] =
} \
/* Load the gp pointer, used by C code */ \
mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \
+}
// Restores the registers from their slot
-#define emit_restore_regs(restore_a2) \
+#define emit_restore_regs(restore_a2) { \
+ int i; \
if (restore_a2) { \
mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \
} \
- for (unsigned i = 0; i < 15; i++) { \
+ for (i = 0; i < 15; i++) { \
mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \
+}
// Emits a function call for a read or a write (for special stuff like flash)
#define emit_mem_call_ds(fnptr, mask) \
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \
emit_save_regs(true); \
- mips_emit_jal(((u32)(fnptr)) >> 2); \
+ genccall(fnptr); \
mips_emit_andi(reg_a0, reg_a0, (mask)); \
emit_restore_regs(true); \
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \
@@ -2569,10 +2596,10 @@ u8 swi_hle_handle[256] =
mips_emit_nop();
// Pointer table to stubs, indexed by type and region
-// Caution! This is not really a ptr table, but contains pre-encoed JALs
extern u32 tmemld[11][16];
extern u32 tmemst[ 4][16];
void mips_lookup_pc();
+void smc_write();
cpu_alert_type write_io_register8 (u32 address, u32 value);
cpu_alert_type write_io_register16(u32 address, u32 value);
cpu_alert_type write_io_register32(u32 address, u32 value);
@@ -2624,6 +2651,15 @@ static void emit_mem_access_loadop(
};
}
+#ifdef PIC
+ #define genccall(fn) \
+ mips_emit_lui(mips_reg_t9, ((u32)fn) >> 16); \
+ mips_emit_ori(mips_reg_t9, mips_reg_t9, ((u32)fn)); \
+ mips_emit_jalr(mips_reg_t9);
+#else
+ #define genccall(fn) mips_emit_jal(((u32)fn) >> 2);
+#endif
+
// Stub memory map:
// 0 .. 63 First patch handler [#0]
// 448 .. 511 Last patch handler [#7]
@@ -2721,7 +2757,7 @@ static void emit_pmemld_stub(
emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
- mips_emit_jal(((u32)&load_gamepak_page) >> 2);
+ genccall(&load_gamepak_page);
mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1);
@@ -2891,6 +2927,21 @@ static void emit_pmemst_stub(
*tr_ptr = translation_ptr;
}
+#ifdef USE_BGR_FORMAT
+ /* 0BGR to BGR565, for PSP */
+ #define palette_convert() \
+ mips_emit_sll(reg_temp, reg_a1, 1); \
+ mips_emit_andi(reg_temp, reg_temp, 0xFFC0); \
+ mips_emit_ins(reg_temp, reg_a1, 0, 5);
+#else
+ /* 0BGR to RGB565 (clobbers a0!) */
+ #define palette_convert() \
+ mips_emit_ext(reg_temp, reg_a1, 10, 5); \
+ mips_emit_ins(reg_temp, reg_a1, 11, 5); \
+ mips_emit_ext(reg_a0, reg_a1, 5, 5); \
+ mips_emit_ins(reg_temp, reg_a0, 6, 5);
+#endif
+
// Palette is accessed differently and stored in a decoded manner
static void emit_palette_hdl(
unsigned memop_number, const t_stub_meminfo *meminfo,
@@ -2923,17 +2974,13 @@ static void emit_palette_hdl(
mips_emit_sh(reg_a1, reg_base, 0x100);
}
- mips_emit_sll(reg_temp, reg_a1, 1);
- mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
- mips_emit_ins(reg_temp, reg_a1, 0, 5);
+ palette_convert();
mips_emit_sh(reg_temp, reg_rv, 0x500);
if (size == 2) {
// Convert the second half-word also
mips_emit_srl(reg_a1, reg_a1, 16);
- mips_emit_sll(reg_temp, reg_a1, 1);
- mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
- mips_emit_ins(reg_temp, reg_a1, 0, 5);
+ palette_convert();
mips_emit_sh(reg_temp, reg_rv, 0x502);
}
generate_function_return_swap_delay();
@@ -2980,6 +3027,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
// Stubs for regions with EEPROM or flash/SRAM
static void emit_saveaccess_stub(u8 **tr_ptr) {
+ unsigned opt, i, strop;
u8 *translation_ptr = *tr_ptr;
const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} };
@@ -2995,9 +3043,9 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
emit_mem_call(&write_eeprom, 0x3FF);
// Map loads to the read handler.
- for (unsigned opt = 0; opt < 6; opt++) {
+ for (opt = 0; opt < 6; opt++) {
// Unalignment is not relevant here, so map them all to the same handler.
- for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++)
+ for (i = opmap[opt][0]; i < opmap[opt][1]; i++)
tmemld[i][13] = (u32)translation_ptr;
// Emit just a check + patch jump
mips_emit_srl(reg_temp, reg_a0, 24);
@@ -3007,7 +3055,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr));
}
// This is for stores
- for (unsigned strop = 0; strop <= 3; strop++) {
+ for (strop = 0; strop <= 3; strop++) {
tmemst[strop][13] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0D);
@@ -3017,7 +3065,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
}
// Flash/SRAM/Backup writes are only 8 byte supported
- for (unsigned strop = 0; strop <= 3; strop++) {
+ for (strop = 0; strop <= 3; strop++) {
tmemst[strop][14] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0E);
@@ -3038,7 +3086,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
(u32)&write_io_register8, (u32)&write_io_register16,
(u32)&write_io_register32, (u32)&write_io_register32 };
const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC};
- for (unsigned strop = 0; strop <= 3; strop++) {
+ for (strop = 0; strop <= 3; strop++) {
tmemst[strop][4] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_temp, reg_temp, 0x04);
@@ -3047,7 +3095,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr
emit_save_regs(strop == 3);
mips_emit_andi(reg_a0, reg_a0, amsk[strop]);
- mips_emit_jal(iowrtbl[strop] >> 2);
+ genccall(iowrtbl[strop]);
if (strop < 3) {
mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay)
@@ -3069,6 +3117,7 @@ static void emit_openload_stub(
unsigned memopn, bool signext, unsigned size,
unsigned alignment, bool aligned, u8 **tr_ptr
) {
+ u8 *jmp1, *jmp2;
u8 *translation_ptr = *tr_ptr;
// This affects regions 1 and 15
@@ -3105,30 +3154,31 @@ static void emit_openload_stub(
switch (size) {
case 0:
- mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode
mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB
- mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB
- mips_emit_jal(((u32)&read_memory8) >> 2);
+ mips_emit_andi(reg_temp, reg_a0, 0x1); // Thb: Isolate one LSB
+ mips_emit_movn(reg_a0, reg_temp, reg_rv); // Pick thumb or ARM
+ genccall(&read_memory8);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break;
case 1:
- mips_emit_b(beq, reg_zero, reg_rv, 2);
mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1
- mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all
- mips_emit_jal(((u32)&read_memory16) >> 2);
+ mips_emit_movn(reg_a0, reg_zero, reg_rv); // Thumb: ignore all low bits
+ genccall(&read_memory16);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break;
default:
- mips_emit_b(beq, reg_zero, reg_rv, 5);
+ mips_emit_b_filler(beq, reg_zero, reg_rv, jmp1);
mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0
- mips_emit_jal(((u32)&read_memory16) >> 2);
+ genccall(&read_memory16);
mips_emit_nop();
- mips_emit_b(beq, reg_zero, reg_zero, 3);
+ mips_emit_b_filler(beq, reg_zero, reg_zero, jmp2);
mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay]
-
- mips_emit_jal(((u32)&read_memory32) >> 2);
+
+ generate_branch_patch_conditional(jmp1, translation_ptr);
+ genccall(&read_memory32);
mips_emit_nop();
+ generate_branch_patch_conditional(jmp2, translation_ptr);
break;
};
@@ -3194,9 +3244,14 @@ static void emit_phand(
mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate
mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction!
+ #ifdef PSP
mips_emit_cache(0x1A, mips_reg_ra, -8);
mips_emit_jr(reg_rv); // Jump directly to target for speed
mips_emit_cache(0x08, mips_reg_ra, -8);
+ #else
+ mips_emit_jr(reg_rv);
+ mips_emit_synci(mips_reg_ra, -8);
+ #endif
// Round up handlers to 16 instructions for easy addressing :)
while (translation_ptr - *tr_ptr < 64) {
@@ -3212,6 +3267,7 @@ static void emit_phand(
// - mem stubs: There's stubs for load & store, and every memory region
// and possible operand size and misaligment (+sign extensions)
void init_emitter() {
+ int i;
// Initialize memory to a debuggable state
memset(stub_arena, 0, sizeof(stub_arena)); // nop
@@ -3234,25 +3290,8 @@ void init_emitter() {
emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32
emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32
- // Generate SMC write handler, with the lookup machinery
- // Call out the flushing routine (save PC)
- emit_save_regs(false);
- mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2);
- mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot
-
- mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
- mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
- mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode
- mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc
- // Lookup thumb PC and execute
- mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2);
- mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return!
- // Lookup arm PC and execute
- mips_emit_jal(((u32)&block_lookup_address_arm) >> 2);
- mips_emit_nop();
- // Epiloge (restore and jump)
- emit_restore_regs(false);
- mips_emit_jr(reg_rv); // Go execute the code
+ // This is just a trampoline (for the SMC branches)
+ mips_emit_j(((u32)&smc_write) >> 2);
mips_emit_nop();
// Generate the openload handlers (for accesses to unmapped mem)
@@ -3269,7 +3308,7 @@ void init_emitter() {
emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32
// Here we emit the ignore store area, just checks and does nothing
- for (unsigned i = 0; i < 4; i++)
+ for (i = 0; i < 4; i++)
emit_ignorestore_stub(i, &translation_ptr);
// Here go the save game handlers
@@ -3295,7 +3334,7 @@ void init_emitter() {
// 15 Open load / Ignore store
};
- for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) {
+ for (i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) {
ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter;
/* region info signext sz al isaligned */
handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8
@@ -3325,7 +3364,7 @@ void init_emitter() {
// Store only for "regular"-ish mem regions
//
- for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) {
+ for (i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) {
sthldr_t handler = (sthldr_t)stinfo[i].emitter;
handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8
handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16
@@ -3334,6 +3373,11 @@ void init_emitter() {
}
}
+u32 execute_arm_translate_internal(u32 cycles, void *regptr);
+u32 function_cc execute_arm_translate(u32 cycles) {
+ return execute_arm_translate_internal(cycles, &reg[0]);
+}
+
#endif
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index a427e89..2d40bf8 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -33,13 +33,14 @@
.global execute_lsr_flags_reg
.global execute_asr_flags_reg
.global execute_ror_flags_reg
-.global execute_arm_translate
+.global execute_arm_translate_internal
.global icache_region_sync
.global reg_check
.global palette_ram
.global palette_ram_converted
.global init_emitter
.global mips_lookup_pc
+.global smc_write
.global write_io_epilogue
.global memory_map_read
@@ -120,6 +121,7 @@
.equ REGMODE_BASE, (0x900 + 24)
.equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE)
.equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE)
+.equ FNPTRS_BASE, (0x900 + 220 + 960)
.set noat
.set noreorder
@@ -196,6 +198,22 @@
lw $30, REG_R14($16)
.endm
+# PIC ABI mandates to jump to target via $t9
+
+#ifdef PIC
+.macro cfncall target, targetid
+ lw $t9, (FNPTRS_BASE + \targetid * 4)($16)
+ jalr $t9
+ nop
+.endm
+#else
+.macro cfncall target, targetid
+ jal \target
+ nop
+.endm
+#endif
+
+
# Process a hardware event. Since an interrupt might be
# raised we have to check if the PC has changed.
@@ -213,8 +231,8 @@ mips_update_gba:
sw $ra, REG_SAVE2($16) # save return addr
collapse_flags # update cpsr
save_registers # save registers
- jal update_gba # process the next event
sw $0, CHANGED_PC_STATUS($16)
+ cfncall update_gba, 0 # process the next event
lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame
bne $1, $0, return_to_main # Return to main thread now
@@ -257,26 +275,24 @@ return_to_main:
mips_indirect_branch_arm:
save_registers
- jal block_lookup_address_arm # $2 = MIPS address to jump to
- nop
+ cfncall block_lookup_address_arm, 1
restore_registers
- jr $2 # jump to it
+ jr $2 # $2 = value returned
nop
mips_indirect_branch_thumb:
save_registers
- jal block_lookup_address_thumb # $2 = MIPS address to jump to
- nop
+ cfncall block_lookup_address_thumb, 2
restore_registers
- jr $2 # jump to it
+ jr $2 # $2 = value returned
nop
mips_indirect_branch_dual:
save_registers
- jal block_lookup_address_dual # $2 = MIPS address to jump to
+ cfncall block_lookup_address_dual, 3
nop
restore_registers
- jr $2 # jump to it
+ jr $2 # $2 = value returned
nop
@@ -293,8 +309,7 @@ write_io_epilogue:
alert_loop:
- jal update_gba # process the next event
- nop
+ cfncall update_gba, 0 # process the next event
lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame
bne $1, $0, return_to_main # Return to main thread now
@@ -321,15 +336,14 @@ no_alert:
nop
smc_dma:
- jal flush_translation_cache_ram # flush translation cache
- nop
+ cfncall flush_translation_cache_ram, 4
j lookup_pc
nop
smc_write:
save_registers
- jal flush_translation_cache_ram # flush translation cache
- sw $6, REG_PC($16) # save PC (delay slot)
+ sw $6, REG_PC($16) # save PC
+ cfncall flush_translation_cache_ram, 4
mips_lookup_pc:
lookup_pc:
@@ -339,17 +353,17 @@ lookup_pc:
nop
lookup_pc_thumb:
- jal block_lookup_address_thumb # get Thumb address
- lw $4, REG_PC($16) # load PC as arg 0 (delay slot)
+ lw $4, REG_PC($16) # load PC as arg 0
+ cfncall block_lookup_address_thumb, 2 # get Thumb address
restore_registers
- jr $2 # jump to result
+ jr $2 # jump to result
nop
lookup_pc_arm:
- jal block_lookup_address_arm # get ARM address
- lw $4, REG_PC($16) # load PC as arg 0 (delay slot)
+ lw $4, REG_PC($16) # load PC as arg 0
+ cfncall block_lookup_address_arm, 1 # get ARM address
restore_registers
- jr $2 # jump to result
+ jr $2 # jump to result
nop
# Return the current cpsr
@@ -381,8 +395,8 @@ execute_swi:
ori $2, 0x13 # set mode to supervisor
sw $2, REG_CPSR($16) # write back CPSR
save_registers
- jal set_cpu_mode # set the CPU mode to supervisor
- li $4, 3 # 3 is supervisor mode (delay slot)
+ li $4, 3 # 3 is supervisor mode
+ cfncall set_cpu_mode, 5 # set the CPU mode to supervisor
restore_registers
lw $ra, ($sp) # pop $ra
jr $ra # return
@@ -404,8 +418,7 @@ execute_spsr_restore:
addiu $sp, $sp, -4
sw $ra, ($sp)
save_registers
- jal execute_spsr_restore_body # do the dirty work in this C function
- nop
+ cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function
restore_registers
addu $4, $2, $0 # move return value to $4
lw $ra, ($sp)
@@ -429,8 +442,8 @@ execute_store_cpsr:
extract_flags_body # extract flags from $1
sw $ra, REG_SAVE3($16)
save_registers
- jal execute_store_cpsr_body # do the dirty work in this C function
- addu $4, $1, $0 # load the new CPSR (delay slot)
+ addu $4, $1, $0 # load the new CPSR
+ cfncall execute_store_cpsr_body, 7 # do the dirty work in this C function
bne $2, $0, changed_pc_cpsr # this could have changed the pc
nop
@@ -442,10 +455,10 @@ execute_store_cpsr:
nop
changed_pc_cpsr:
- jal block_lookup_address_arm # GBA address is in $4
- addu $4, $2, $0 # load new address in $4 (delay slot)
- restore_registers # restore registers
- jr $2 # jump to the new address
+ addu $4, $2, $0 # load new address in $4
+ cfncall block_lookup_address_arm, 1 # GBA address is in $4
+ restore_registers # restore registers
+ jr $2 # jump to the new address
nop
@@ -549,8 +562,9 @@ ror_zero_shift:
rotrv $4, $4, $5 # return (value ror shift) delay
# $4: cycle counter argument
+# $5: pointer to reg
-execute_arm_translate:
+execute_arm_translate_internal:
add $sp, $sp, -48 # Store the main thread context
sw $s0, 0($sp)
sw $s1, 4($sp)
@@ -563,9 +577,7 @@ execute_arm_translate:
sw $fp, 32($sp)
sw $ra, 36($sp)
- lui $16, %hi(reg) # load reg address into base reg
- addiu $16, %lo(reg)
-
+ move $16, $5
sw $28, GP_SAVE($16)
addu $17, $4, $0 # load cycle counter register
@@ -582,15 +594,13 @@ execute_arm_translate:
bne $1, $0, 1f
lw $4, REG_PC($16) # load PC into $4 (delay)
- jal block_lookup_address_arm # lookup initial jump address
- nop
+ cfncall block_lookup_address_arm, 1
restore_registers # load initial register values
jr $2 # jump to return
nop
1:
- jal block_lookup_address_thumb # lookup initial jump address
- nop
+ cfncall block_lookup_address_thumb, 2
restore_registers # load initial register values
jr $2 # jump to return
nop
@@ -629,5 +639,13 @@ tmemld:
.space 704
tmemst:
.space 256
-
+fnptrs:
+ .long update_gba # 0
+ .long block_lookup_address_arm # 1
+ .long block_lookup_address_thumb # 2
+ .long block_lookup_address_dual # 3
+ .long flush_translation_cache_ram # 4
+ .long set_cpu_mode # 5
+ .long execute_spsr_restore_body # 6
+ .long execute_store_cpsr_body # 7