From e0a31952dbffd15cd2878ed20142ec41cbd937bb Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 18 Jun 2021 18:03:47 +0200 Subject: Add preliminary support for non mips32r2 devices This is required in PS2 but could also make older dingux devices run gpsp on retroarch --- Makefile | 7 ++- psp/mips_emit.h | 179 ++++++++++++++++++++++++++++++++++++-------------------- psp/mips_stub.S | 29 ++++++--- 3 files changed, 142 insertions(+), 73 deletions(-) diff --git a/Makefile b/Makefile index 6edf65c..4d75fdd 100644 --- a/Makefile +++ b/Makefile @@ -200,7 +200,7 @@ else ifeq ($(platform), psp1) TARGET := $(TARGET_NAME)_libretro_$(platform).a CC = psp-gcc$(EXE_EXT) AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT + CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT -DMIPS_HAS_R2_INSTS CFLAGS += -I$(shell psp-config --pspsdk-path)/include CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi CFLAGS += -fomit-frame-pointer -ffast-math @@ -375,7 +375,7 @@ else ifeq ($(platform), mips32) SHARED := -shared -nostdlib -Wl,--version-script=link.T fpic := -fPIC -DPIC CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float - CFLAGS += -fno-caller-saves + CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS HAVE_DYNAREC := 1 CPU_ARCH := mips @@ -393,6 +393,7 @@ else ifeq ($(platform), gcw0) SHARED := -shared -nostdlib -Wl,--version-script=link.T fpic := -fPIC -DPIC CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + CFLAGS += -DMIPS_HAS_R2_INSTS HAVE_DYNAREC := 1 CPU_ARCH := mips @@ -408,7 +409,7 @@ else ifeq ($(platform), gcw0-odbeta) # The ASM code and/or MIPS dynarec of GPSP does not respect # MIPS calling conventions, so we must use '-fno-caller-saves' # for the OpenDingux Beta build - CFLAGS += -fno-caller-saves + CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS HAVE_DYNAREC := 1 CPU_ARCH := mips diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 53a09a6..679c9e0 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -791,12 +791,13 @@ u32 arm_to_mips_reg[] = check_load_reg_pc(arm_reg, _rm, 8); \ if(_shift != 0) \ { \ - mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \ + rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \ + reg_temp, _shift); \ } \ else \ - { \ + { /* Special case: RRX (no carry update) */ \ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \ - mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \ + insert_bits(arm_to_mips_reg[arm_reg], reg_c_cache, reg_temp, 31, 1); \ } \ _rm = arm_reg \ @@ -804,7 +805,7 @@ u32 arm_to_mips_reg[] = check_load_reg_pc(arm_reg, _rm, 8); \ if(_shift != 0) \ { \ - mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \ + extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \ mips_emit_sll(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \ _rm = arm_reg; \ } \ @@ -813,7 +814,7 @@ u32 arm_to_mips_reg[] = check_load_reg_pc(arm_reg, _rm, 8); \ if(_shift != 0) \ { \ - mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ + extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \ } \ else \ @@ -827,7 +828,7 @@ u32 arm_to_mips_reg[] = check_load_reg_pc(arm_reg, _rm, 8); \ if(_shift != 0) \ { \ - mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ + extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \ } \ else \ @@ -841,15 +842,16 @@ u32 arm_to_mips_reg[] = check_load_reg_pc(arm_reg, _rm, 8); \ if(_shift != 0) \ { \ - mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ - mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \ + extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \ + rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \ + reg_temp, _shift); \ } \ else \ - { \ - mips_emit_andi(reg_temp, arm_to_mips_reg[_rm], 1); \ + { /* Special case: RRX (carry update) */ \ + mips_emit_sll(reg_temp, reg_c_cache, 31); \ + mips_emit_andi(reg_c_cache, arm_to_mips_reg[_rm], 1); \ mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \ - mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \ - mips_emit_addu(reg_c_cache, reg_temp, reg_zero); \ + mips_emit_or(arm_to_mips_reg[arm_reg], arm_to_mips_reg[arm_reg],reg_temp);\ } \ _rm = arm_reg \ @@ -870,7 +872,8 @@ u32 arm_to_mips_reg[] = mips_emit_sra(reg_a0, reg_a0, 31) \ #define generate_shift_reg_ror_no_flags(_rm, _rs) \ - mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \ + rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \ + reg_temp, arm_to_mips_reg[_rs]) \ #define generate_shift_reg_lsl_flags(_rm, _rs) \ generate_load_reg_pc(reg_a0, _rm, 12); \ @@ -892,7 +895,8 @@ u32 arm_to_mips_reg[] = mips_emit_addiu(reg_temp, arm_to_mips_reg[_rs], -1); \ mips_emit_srlv(reg_temp, arm_to_mips_reg[_rm], reg_temp); \ mips_emit_andi(reg_c_cache, reg_temp, 1); \ - mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \ + rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \ + reg_temp, arm_to_mips_reg[_rs]) \ #define generate_shift_imm(arm_reg, name, flags_op) \ u32 shift = (opcode >> 7) & 0x1F; \ @@ -1894,7 +1898,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) } \ else \ { \ - mips_emit_ins(reg_a2, reg_zero, 0, 2); \ + emit_align_reg(reg_a2, 2); \ \ for(i = 0; i < 16; i++) \ { \ @@ -2070,20 +2074,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) check_store_reg_pc_thumb(dest_rd); \ } \ -/* - -#define thumb_data_proc_hi(name) \ -{ \ - thumb_decode_hireg_op(); \ - check_load_reg_pc(arm_reg_a0, rs, 4); \ - check_load_reg_pc(arm_reg_a1, rd, 4); \ - generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rd], \ - arm_to_mips_reg[rs]); \ - check_store_reg_pc_thumb(rd); \ -} \ - -*/ - #define thumb_data_proc_test_hi(name) \ { \ thumb_decode_hireg_op(); \ @@ -2331,7 +2321,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) } \ else \ { \ - mips_emit_ins(reg_a2, reg_zero, 0, 2); \ + emit_align_reg(reg_a2, 2); \ \ for(i = 0; i < 8; i++) \ { \ @@ -2528,6 +2518,71 @@ u8 swi_hle_handle[256] = generate_load_pc(reg_a0, pc); \ mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \ +// Some macros to wrap device-specific instructions + +/* MIPS32R2 and PSP support ins, ext, seb, rotr */ +#ifdef MIPS_HAS_R2_INSTS + // Inserts LSB bits into another register + #define insert_bits(rdest, rsrc, rtemp, pos, size) \ + mips_emit_ins(rdest, rsrc, pos, size); + // Doubles a byte into a halfword + #define double_byte(reg, rtmp) \ + mips_emit_ins(reg, reg, 8, 8); + // Clears numbits at LSB position (to align an address) + #define emit_align_reg(reg, numbits) \ + mips_emit_ins(reg, reg_zero, 0, numbits) + // Extract a bitfield (pos, size) to a register + #define extract_bits(rt, rs, pos, size) \ + mips_emit_ext(rt, rs, pos, size) + // Extends signed byte to u32 + #define extend_byte_signed(rt, rs) \ + mips_emit_seb(rt, rs) + // Rotates a word using a temp reg if necessary + #define rotate_right(rdest, rsrc, rtemp, amount) \ + mips_emit_rotr(rdest, rsrc, amount); + // Same but variable amount rotation (register) + #define rotate_right_var(rdest, rsrc, rtemp, ramount) \ + mips_emit_rotrv(rdest, rsrc, ramount); +#else + // Inserts LSB bits into another register + // *assumes dest bits are cleared*! + #define insert_bits(rdest, rsrc, rtemp, pos, size) \ + mips_emit_sll(rtemp, rsrc, 32 - size); \ + mips_emit_srl(rtemp, rtemp, 32 - size - pos); \ + mips_emit_or(rdest, rdest, rtemp); + // Doubles a byte into a halfword + #define double_byte(reg, rtmp) \ + mips_emit_sll(rtmp, reg, 8); \ + mips_emit_andi(reg, reg, 0xff); \ + mips_emit_or(reg, reg, rtmp); + // Clears numbits at LSB position (to align an address) + #define emit_align_reg(reg, numbits) \ + mips_emit_srl(reg, reg, numbits); \ + mips_emit_sll(reg, reg, numbits) + // Extract a bitfield (pos, size) to a register + #define extract_bits(rt, rs, pos, size) \ + mips_emit_sll(rt, rs, 32 - ((pos) + (size))); \ + mips_emit_srl(rt, rt, 32 - (size)) + // Extends signed byte to u32 + #define extend_byte_signed(rt, rs) \ + mips_emit_sll(rt, rs, 24); \ + mips_emit_sra(rt, rt, 24) + // Rotates a word (uses temp reg) + #define rotate_right(rdest, rsrc, rtemp, amount) \ + mips_emit_sll(rtemp, rsrc, 32 - (amount)); \ + mips_emit_srl(rdest, rsrc, (amount)); \ + mips_emit_or(rdest, rdest, rtemp) + // Variable rotation using temp reg (dst != src) + #define rotate_right_var(rdest, rsrc, rtemp, ramount) \ + mips_emit_andi(rtemp, ramount, 0x1F); \ + mips_emit_srlv(rdest, rsrc, rtemp); \ + mips_emit_subu(rtemp, reg_zero, rtemp); \ + mips_emit_addiu(rtemp, rtemp, 32); \ + mips_emit_sllv(rtemp, rsrc, rtemp); \ + mips_emit_or(rdest, rdest, rtemp) + +#endif + // Register save layout as follows: #define ReOff_RegPC (15*4) // REG_PC @@ -2698,7 +2753,7 @@ static void emit_pmemld_stub( // Address checking: jumps to handler if bad region/alignment mips_emit_srl(reg_temp, reg_a0, (32 - regionbits)); if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits - mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment + insert_bits(reg_temp, reg_a0, reg_rv, regionbits, size); // Add 1 or 2 bits of alignment } if (regioncheck || alignment) { // If region and alignment are zero, can skip mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits)); @@ -2735,7 +2790,7 @@ static void emit_pmemld_stub( // This code call the C routine to map the relevant ROM page emit_save_regs(aligned); mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); - mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff + extract_bits(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff genccall(&load_gamepak_page); mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1); @@ -2750,11 +2805,11 @@ static void emit_pmemld_stub( // Read from flash, is a bit special, fn call emit_mem_call_ds(&read_backup, 0xFFFF); if (!size && signext) { - mips_emit_seb(reg_rv, reg_rv); + extend_byte_signed(reg_rv, reg_rv); } else if (size == 1 && alignment) { - mips_emit_seb(reg_rv, reg_rv); + extend_byte_signed(reg_rv, reg_rv); } else if (size == 2) { - mips_emit_rotr(reg_rv, reg_rv, 8 * alignment); + rotate_right(reg_rv, reg_rv, reg_temp, 8 * alignment); } generate_function_return_swap_delay(); *tr_ptr = translation_ptr; @@ -2770,21 +2825,22 @@ static void emit_pmemld_stub( if (region == 2) { // Can't do EWRAM with an `andi` instruction (18 bits mask) - mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff if (!aligned && alignment != 0) { - mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size) } // Need to insert a zero in the addr (due to how it's mapped) mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous - mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) if (!aligned && alignment != 0) { - mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size) } - mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block - mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip unless last block + generate_swap_delay(); mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset } else { @@ -2795,16 +2851,13 @@ static void emit_pmemld_stub( } } - // Aligned accesses (or the weird s16u1 case) are just one inst - if (alignment == 0 || (size == 1 && signext)) { - emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot - translation_ptr += 4; - } - else { - // Unaligned accesses (require rotation) need two insts - emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); - translation_ptr += 4; - mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot + // Emit load operation + emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); + translation_ptr += 4; + + if (!(alignment == 0 || (size == 1 && signext))) { + // Unaligned accesses require rotation, except for size=1 & signext + rotate_right(reg_rv, reg_rv, reg_temp, alignment * 8); } generate_function_return_swap_delay(); // Return. Move prev inst to delay slot @@ -2842,26 +2895,27 @@ static void emit_pmemst_stub( mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); if (doubleaccess) { - mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + double_byte(reg_a1, reg_temp); // value = value | (value << 8) } if (region == 2) { // Can't do EWRAM with an `andi` instruction (18 bits mask) - mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff + extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff if (!aligned && realsize != 0) { - mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size) + emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size) } // Need to insert a zero in the addr (due to how it's mapped) mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr } else if (region == 6) { // VRAM is mirrored every 128KB but the last 32KB is mapped to the previous - mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 + extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16 mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block) if (!aligned && realsize != 0) { - mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size) + emit_align_reg(reg_a0, realsize); // addr & ~1/2 (align to size) } - mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block - mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay] + mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip next inst unless last block + generate_swap_delay(); mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block) mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset } else { @@ -2951,7 +3005,7 @@ static void emit_palette_hdl( mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number)); mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring) if (size == 0) { - mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8) + double_byte(reg_a1, reg_temp); // value = value | (value << 8) } mips_emit_addu(reg_rv, reg_rv, reg_base); @@ -3187,15 +3241,16 @@ static void emit_phand( mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table #else mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX - mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds) mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed + mips_emit_b(bne, reg_zero, reg_rv, 1); // Skip next inst if region is good + generate_swap_delay(); mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore) #endif // Stores or byte-accesses do not care about alignment if (check_alignment) { - // Move alignment bits for the table lookup - mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) + // Move alignment bits for the table lookup (1 or 2, to bits 6 and 7) + insert_bits(reg_temp, reg_a0, reg_rv, 6, size); } unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 47f219a..48146b3 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -130,9 +130,25 @@ # make sure $16 has the register base for these macros -.macro collapse_flag flag_reg, shift - ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR -.endm +#ifdef MIPS_HAS_R2_INSTS + .macro collapse_flag flag_reg, shift + ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR + .endm + + .macro extract_flag shift, flag_reg + ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR + .endm +#else + .macro collapse_flag flag_reg, shift + sll $1, $\flag_reg, \shift + or $2, $2, $1 + .endm + + .macro extract_flag shift, flag_reg + srl $\flag_reg, $1, \shift + andi $\flag_reg, $\flag_reg, 1 + .endm +#endif .macro collapse_flags lw $2, REG_CPSR($16) # load CPSR @@ -144,10 +160,6 @@ sw $2, REG_CPSR($16) # store CPSR .endm -.macro extract_flag shift, flag_reg - ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR -.endm - .macro extract_flags_body # extract flags from $1 extract_flag 31, 20 # load flags extract_flag 30, 21 @@ -403,7 +415,8 @@ execute_swi: sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR collapse_flags # get cpsr in $2 sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR - ins $2, $0, 0, 6 # zero out bottom 6 bits of CPSR + srl $2, $2, 6 # zero out bottom 6 bits of CPSR + sll $2, $2, 6 ori $2, 0x13 # set mode to supervisor sw $2, REG_CPSR($16) # write back CPSR save_registers -- cgit v1.2.3