summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Guillen Fandos2021-06-18 18:03:47 +0200
committerDavid Guillen Fandos2021-06-18 18:03:47 +0200
commite0a31952dbffd15cd2878ed20142ec41cbd937bb (patch)
tree193b602acde7e4a44297058293a2ac2c3b62af75
parent34b90277bcba369807a434bde3f770dd401007ac (diff)
downloadpicogpsp-e0a31952dbffd15cd2878ed20142ec41cbd937bb.tar.gz
picogpsp-e0a31952dbffd15cd2878ed20142ec41cbd937bb.tar.bz2
picogpsp-e0a31952dbffd15cd2878ed20142ec41cbd937bb.zip
Add preliminary support for non mips32r2 devices
This is required in PS2 but could also make older dingux devices run gpsp on retroarch
-rw-r--r--Makefile7
-rw-r--r--psp/mips_emit.h179
-rw-r--r--psp/mips_stub.S29
3 files changed, 142 insertions, 73 deletions
diff --git a/Makefile b/Makefile
index 6edf65c..4d75fdd 100644
--- a/Makefile
+++ b/Makefile
@@ -200,7 +200,7 @@ else ifeq ($(platform), psp1)
TARGET := $(TARGET_NAME)_libretro_$(platform).a
CC = psp-gcc$(EXE_EXT)
AR = psp-ar$(EXE_EXT)
- CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT
+ CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT -DMIPS_HAS_R2_INSTS
CFLAGS += -I$(shell psp-config --pspsdk-path)/include
CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi
CFLAGS += -fomit-frame-pointer -ffast-math
@@ -375,7 +375,7 @@ else ifeq ($(platform), mips32)
SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
- CFLAGS += -fno-caller-saves
+ CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips
@@ -393,6 +393,7 @@ else ifeq ($(platform), gcw0)
SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
+ CFLAGS += -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips
@@ -408,7 +409,7 @@ else ifeq ($(platform), gcw0-odbeta)
# The ASM code and/or MIPS dynarec of GPSP does not respect
# MIPS calling conventions, so we must use '-fno-caller-saves'
# for the OpenDingux Beta build
- CFLAGS += -fno-caller-saves
+ CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips
diff --git a/psp/mips_emit.h b/psp/mips_emit.h
index 53a09a6..679c9e0 100644
--- a/psp/mips_emit.h
+++ b/psp/mips_emit.h
@@ -791,12 +791,13 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
- mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \
+ reg_temp, _shift); \
} \
else \
- { \
+ { /* Special case: RRX (no carry update) */ \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
- mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
+ insert_bits(arm_to_mips_reg[arm_reg], reg_c_cache, reg_temp, 31, 1); \
} \
_rm = arm_reg \
@@ -804,7 +805,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
- mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \
+ extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \
mips_emit_sll(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
_rm = arm_reg; \
} \
@@ -813,7 +814,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
- mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
} \
else \
@@ -827,7 +828,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
- mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
} \
else \
@@ -841,15 +842,16 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
- mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
- mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
+ extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
+ rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \
+ reg_temp, _shift); \
} \
else \
- { \
- mips_emit_andi(reg_temp, arm_to_mips_reg[_rm], 1); \
+ { /* Special case: RRX (carry update) */ \
+ mips_emit_sll(reg_temp, reg_c_cache, 31); \
+ mips_emit_andi(reg_c_cache, arm_to_mips_reg[_rm], 1); \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
- mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
- mips_emit_addu(reg_c_cache, reg_temp, reg_zero); \
+ mips_emit_or(arm_to_mips_reg[arm_reg], arm_to_mips_reg[arm_reg],reg_temp);\
} \
_rm = arm_reg \
@@ -870,7 +872,8 @@ u32 arm_to_mips_reg[] =
mips_emit_sra(reg_a0, reg_a0, 31) \
#define generate_shift_reg_ror_no_flags(_rm, _rs) \
- mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
+ rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \
+ reg_temp, arm_to_mips_reg[_rs]) \
#define generate_shift_reg_lsl_flags(_rm, _rs) \
generate_load_reg_pc(reg_a0, _rm, 12); \
@@ -892,7 +895,8 @@ u32 arm_to_mips_reg[] =
mips_emit_addiu(reg_temp, arm_to_mips_reg[_rs], -1); \
mips_emit_srlv(reg_temp, arm_to_mips_reg[_rm], reg_temp); \
mips_emit_andi(reg_c_cache, reg_temp, 1); \
- mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
+ rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \
+ reg_temp, arm_to_mips_reg[_rs]) \
#define generate_shift_imm(arm_reg, name, flags_op) \
u32 shift = (opcode >> 7) & 0x1F; \
@@ -1894,7 +1898,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
} \
else \
{ \
- mips_emit_ins(reg_a2, reg_zero, 0, 2); \
+ emit_align_reg(reg_a2, 2); \
\
for(i = 0; i < 16; i++) \
{ \
@@ -2070,20 +2074,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
check_store_reg_pc_thumb(dest_rd); \
} \
-/*
-
-#define thumb_data_proc_hi(name) \
-{ \
- thumb_decode_hireg_op(); \
- check_load_reg_pc(arm_reg_a0, rs, 4); \
- check_load_reg_pc(arm_reg_a1, rd, 4); \
- generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rd], \
- arm_to_mips_reg[rs]); \
- check_store_reg_pc_thumb(rd); \
-} \
-
-*/
-
#define thumb_data_proc_test_hi(name) \
{ \
thumb_decode_hireg_op(); \
@@ -2331,7 +2321,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
} \
else \
{ \
- mips_emit_ins(reg_a2, reg_zero, 0, 2); \
+ emit_align_reg(reg_a2, 2); \
\
for(i = 0; i < 8; i++) \
{ \
@@ -2528,6 +2518,71 @@ u8 swi_hle_handle[256] =
generate_load_pc(reg_a0, pc); \
mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \
+// Some macros to wrap device-specific instructions
+
+/* MIPS32R2 and PSP support ins, ext, seb, rotr */
+#ifdef MIPS_HAS_R2_INSTS
+ // Inserts LSB bits into another register
+ #define insert_bits(rdest, rsrc, rtemp, pos, size) \
+ mips_emit_ins(rdest, rsrc, pos, size);
+ // Doubles a byte into a halfword
+ #define double_byte(reg, rtmp) \
+ mips_emit_ins(reg, reg, 8, 8);
+ // Clears numbits at LSB position (to align an address)
+ #define emit_align_reg(reg, numbits) \
+ mips_emit_ins(reg, reg_zero, 0, numbits)
+ // Extract a bitfield (pos, size) to a register
+ #define extract_bits(rt, rs, pos, size) \
+ mips_emit_ext(rt, rs, pos, size)
+ // Extends signed byte to u32
+ #define extend_byte_signed(rt, rs) \
+ mips_emit_seb(rt, rs)
+ // Rotates a word using a temp reg if necessary
+ #define rotate_right(rdest, rsrc, rtemp, amount) \
+ mips_emit_rotr(rdest, rsrc, amount);
+ // Same but variable amount rotation (register)
+ #define rotate_right_var(rdest, rsrc, rtemp, ramount) \
+ mips_emit_rotrv(rdest, rsrc, ramount);
+#else
+ // Inserts LSB bits into another register
+ // *assumes dest bits are cleared*!
+ #define insert_bits(rdest, rsrc, rtemp, pos, size) \
+ mips_emit_sll(rtemp, rsrc, 32 - size); \
+ mips_emit_srl(rtemp, rtemp, 32 - size - pos); \
+ mips_emit_or(rdest, rdest, rtemp);
+ // Doubles a byte into a halfword
+ #define double_byte(reg, rtmp) \
+ mips_emit_sll(rtmp, reg, 8); \
+ mips_emit_andi(reg, reg, 0xff); \
+ mips_emit_or(reg, reg, rtmp);
+ // Clears numbits at LSB position (to align an address)
+ #define emit_align_reg(reg, numbits) \
+ mips_emit_srl(reg, reg, numbits); \
+ mips_emit_sll(reg, reg, numbits)
+ // Extract a bitfield (pos, size) to a register
+ #define extract_bits(rt, rs, pos, size) \
+ mips_emit_sll(rt, rs, 32 - ((pos) + (size))); \
+ mips_emit_srl(rt, rt, 32 - (size))
+ // Extends signed byte to u32
+ #define extend_byte_signed(rt, rs) \
+ mips_emit_sll(rt, rs, 24); \
+ mips_emit_sra(rt, rt, 24)
+ // Rotates a word (uses temp reg)
+ #define rotate_right(rdest, rsrc, rtemp, amount) \
+ mips_emit_sll(rtemp, rsrc, 32 - (amount)); \
+ mips_emit_srl(rdest, rsrc, (amount)); \
+ mips_emit_or(rdest, rdest, rtemp)
+ // Variable rotation using temp reg (dst != src)
+ #define rotate_right_var(rdest, rsrc, rtemp, ramount) \
+ mips_emit_andi(rtemp, ramount, 0x1F); \
+ mips_emit_srlv(rdest, rsrc, rtemp); \
+ mips_emit_subu(rtemp, reg_zero, rtemp); \
+ mips_emit_addiu(rtemp, rtemp, 32); \
+ mips_emit_sllv(rtemp, rsrc, rtemp); \
+ mips_emit_or(rdest, rdest, rtemp)
+
+#endif
+
// Register save layout as follows:
#define ReOff_RegPC (15*4) // REG_PC
@@ -2698,7 +2753,7 @@ static void emit_pmemld_stub(
// Address checking: jumps to handler if bad region/alignment
mips_emit_srl(reg_temp, reg_a0, (32 - regionbits));
if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits
- mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment
+ insert_bits(reg_temp, reg_a0, reg_rv, regionbits, size); // Add 1 or 2 bits of alignment
}
if (regioncheck || alignment) { // If region and alignment are zero, can skip
mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits));
@@ -2735,7 +2790,7 @@ static void emit_pmemld_stub(
// This code call the C routine to map the relevant ROM page
emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
- mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
+ extract_bits(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
genccall(&load_gamepak_page);
mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
@@ -2750,11 +2805,11 @@ static void emit_pmemld_stub(
// Read from flash, is a bit special, fn call
emit_mem_call_ds(&read_backup, 0xFFFF);
if (!size && signext) {
- mips_emit_seb(reg_rv, reg_rv);
+ extend_byte_signed(reg_rv, reg_rv);
} else if (size == 1 && alignment) {
- mips_emit_seb(reg_rv, reg_rv);
+ extend_byte_signed(reg_rv, reg_rv);
} else if (size == 2) {
- mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
+ rotate_right(reg_rv, reg_rv, reg_temp, 8 * alignment);
}
generate_function_return_swap_delay();
*tr_ptr = translation_ptr;
@@ -2770,21 +2825,22 @@ static void emit_pmemld_stub(
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
- mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
+ extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && alignment != 0) {
- mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
+ emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
- mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
+ extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && alignment != 0) {
- mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
+ emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
- mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
- mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip unless last block
+ generate_swap_delay();
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
@@ -2795,16 +2851,13 @@ static void emit_pmemld_stub(
}
}
- // Aligned accesses (or the weird s16u1 case) are just one inst
- if (alignment == 0 || (size == 1 && signext)) {
- emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot
- translation_ptr += 4;
- }
- else {
- // Unaligned accesses (require rotation) need two insts
- emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
- translation_ptr += 4;
- mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot
+ // Emit load operation
+ emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
+ translation_ptr += 4;
+
+ if (!(alignment == 0 || (size == 1 && signext))) {
+ // Unaligned accesses require rotation, except for size=1 & signext
+ rotate_right(reg_rv, reg_rv, reg_temp, alignment * 8);
}
generate_function_return_swap_delay(); // Return. Move prev inst to delay slot
@@ -2842,26 +2895,27 @@ static void emit_pmemst_stub(
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (doubleaccess) {
- mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
+ double_byte(reg_a1, reg_temp); // value = value | (value << 8)
}
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
- mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
+ extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && realsize != 0) {
- mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
+ emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
- mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
+ extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && realsize != 0) {
- mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size)
+ emit_align_reg(reg_a0, realsize); // addr & ~1/2 (align to size)
}
- mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
- mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
+ mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip next inst unless last block
+ generate_swap_delay();
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
@@ -2951,7 +3005,7 @@ static void emit_palette_hdl(
mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring)
if (size == 0) {
- mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
+ double_byte(reg_a1, reg_temp); // value = value | (value << 8)
}
mips_emit_addu(reg_rv, reg_rv, reg_base);
@@ -3187,15 +3241,16 @@ static void emit_phand(
mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table
#else
mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX
- mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds)
mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
+ mips_emit_b(bne, reg_zero, reg_rv, 1); // Skip next inst if region is good
+ generate_swap_delay();
mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore)
#endif
// Stores or byte-accesses do not care about alignment
if (check_alignment) {
- // Move alignment bits for the table lookup
- mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
+ // Move alignment bits for the table lookup (1 or 2, to bits 6 and 7)
+ insert_bits(reg_temp, reg_a0, reg_rv, 6, size);
}
unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs
diff --git a/psp/mips_stub.S b/psp/mips_stub.S
index 47f219a..48146b3 100644
--- a/psp/mips_stub.S
+++ b/psp/mips_stub.S
@@ -130,9 +130,25 @@
# make sure $16 has the register base for these macros
-.macro collapse_flag flag_reg, shift
- ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
-.endm
+#ifdef MIPS_HAS_R2_INSTS
+ .macro collapse_flag flag_reg, shift
+ ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
+ .endm
+
+ .macro extract_flag shift, flag_reg
+ ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
+ .endm
+#else
+ .macro collapse_flag flag_reg, shift
+ sll $1, $\flag_reg, \shift
+ or $2, $2, $1
+ .endm
+
+ .macro extract_flag shift, flag_reg
+ srl $\flag_reg, $1, \shift
+ andi $\flag_reg, $\flag_reg, 1
+ .endm
+#endif
.macro collapse_flags
lw $2, REG_CPSR($16) # load CPSR
@@ -144,10 +160,6 @@
sw $2, REG_CPSR($16) # store CPSR
.endm
-.macro extract_flag shift, flag_reg
- ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
-.endm
-
.macro extract_flags_body # extract flags from $1
extract_flag 31, 20 # load flags
extract_flag 30, 21
@@ -403,7 +415,8 @@ execute_swi:
sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR
collapse_flags # get cpsr in $2
sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR
- ins $2, $0, 0, 6 # zero out bottom 6 bits of CPSR
+ srl $2, $2, 6 # zero out bottom 6 bits of CPSR
+ sll $2, $2, 6
ori $2, 0x13 # set mode to supervisor
sw $2, REG_CPSR($16) # write back CPSR
save_registers