From b1be1eeee94d3547c20719acfa6b0082404897f1 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 11 Oct 2011 23:03:03 +0300 Subject: inline/parametrize rootcounter reads makes rcnt1 hack impossible though, overclock PSX instead --- libpcsxcore/new_dynarec/assem_arm.c | 75 +++++++++++++++----- libpcsxcore/new_dynarec/emu_if.c | 3 + libpcsxcore/new_dynarec/emu_if.h | 6 ++ libpcsxcore/new_dynarec/linkage_arm.s | 79 ++++++++++++++++++++- libpcsxcore/new_dynarec/pcsxmem.c | 116 ++++++++++++++++++++++++------- libpcsxcore/new_dynarec/pcsxmem.h | 3 + libpcsxcore/new_dynarec/pcsxmem_inline.c | 55 +++++++++++++++ 7 files changed, 292 insertions(+), 45 deletions(-) create mode 100644 libpcsxcore/new_dynarec/pcsxmem_inline.c (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 93dbf92..c0e4116 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -2158,6 +2158,14 @@ void emit_shrcc_imm(int rs,u_int imm,int rt) output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } +void emit_shrne_imm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + void emit_negmi(int rs, int rt) { assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); @@ -2711,6 +2719,23 @@ static void pass_args(int a0, int a1) } } +static void mov_loadtype_adj(int type,int rs,int rt) +{ + switch(type) { + case LOADB_STUB: emit_signextend8(rs,rt); break; + case LOADBU_STUB: emit_andimm(rs,0xff,rt); break; + case LOADH_STUB: emit_signextend16(rs,rt); break; + case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break; + case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break; + default: assert(0); + } +} + +#ifdef PCSX +#include "pcsxmem.h" +#include "pcsxmem_inline.c" +#endif + do_readstub(int n) { assem_debug("do_readstub %x\n",start+stubs[n][3]*4); @@ -2787,13 +2812,7 @@ do_readstub(int n) emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); emit_call(handler); if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { - switch(type) { - case LOADB_STUB: emit_signextend8(0,rt); break; - case LOADBU_STUB: emit_andimm(0,0xff,rt); break; - case LOADH_STUB: emit_signextend16(0,rt); break; - case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; - case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; - } + mov_loadtype_adj(type,0,rt); } if(restore_jump) set_jump_target(restore_jump,(int)out); @@ -2918,7 +2937,10 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); #ifdef PCSX - u_int handler,host_addr=0; + u_int handler,host_addr=0,is_dynamic,far_call=0; + int cc=get_reg(regmap,CCREG); + if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + return; handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); if (handler==0) { if(rt<0) @@ -2935,6 +2957,15 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i } return; } + is_dynamic=pcsxmem_is_handler_dynamic(addr); + if(is_dynamic) { + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + } // call a memhandler if(rt>=0) @@ -2944,22 +2975,30 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i emit_movimm(addr,0); else if(rs!=0) emit_mov(rs,0); - int cc=get_reg(regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_readword((int)&last_count,3); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_add(2,3,3); - emit_writeword(3,(int)&Count); - int offset=(int)handler-(int)out-8; if(offset<-33554432||offset>=33554432) { // unreachable memhandler, a plugin func perhaps - emit_movimm(handler,1); - emit_callreg(1); + emit_movimm(handler,12); + far_call=1; + } + if(cc<0) + emit_loadreg(CCREG,2); + if(is_dynamic) { + emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); } + else { + emit_readword((int)&last_count,3); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_add(2,3,2); + emit_writeword(2,(int)&Count); + } + + if(far_call) + emit_callreg(12); else emit_call(handler); + if(rt>=0) { switch(type) { case LOADB_STUB: emit_signextend8(0,rt); break; diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 66afe12..6957689 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -139,6 +139,8 @@ void new_dyna_restore(void) int i; for (i = 0; i < PSXINT_COUNT; i++) event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; + + new_dyna_pcsx_mem_load_state(); } void *gte_handlers[64]; @@ -311,6 +313,7 @@ void invalidate_all_pages() {} void invalidate_block(unsigned int block) {} void new_dyna_pcsx_mem_init(void) {} void new_dyna_pcsx_mem_reset(void) {} +void new_dyna_pcsx_mem_load_state(void) {} #endif #ifdef DRC_DBG diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index beb517c..9078150 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -46,6 +46,12 @@ void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); void jump_handle_swl(u32 addr, u32 data, u32 cycles); void jump_handle_swr(u32 addr, u32 data, u32 cycles); +void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); extern unsigned int address; extern void *psxH_ptr; diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index 23a69cf..19c9686 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -51,13 +51,14 @@ rdram = 0x80000000 .global psxH_ptr .global inv_code_start .global inv_code_end + .global rcnts .bss .align 4 .type dynarec_local, %object .size dynarec_local, dynarec_local_end-dynarec_local dynarec_local: - .space dynarec_local_end-dynarec_local /*0x400630*/ + .space dynarec_local_end-dynarec_local next_interupt = dynarec_local + 64 .type next_interupt, %object .size next_interupt, 4 @@ -120,7 +121,12 @@ intCycle = interrupt + 4 .size intCycle, 256 psxRegs_end = intCycle + 256 -mem_rtab = psxRegs_end +rcnts = psxRegs_end + .type rcnts, %object + .size rcnts, 7*4*4 +rcnts_end = rcnts + 7*4*4 + +mem_rtab = rcnts_end .type mem_rtab, %object .size mem_rtab, 4 mem_wtab = mem_rtab + 4 @@ -166,6 +172,26 @@ FCR31 = align0 .endif .endm +.macro mov_16 reg imm +.if HAVE_ARMV7 + movw \reg, #\imm +.else + mov \reg, #(\imm & 0x00ff) + orr \reg, #(\imm & 0xff00) +.endif +.endm + +.macro mov_24 reg imm +.if HAVE_ARMV7 + movw \reg, #(\imm & 0xffff) + movt \reg, #(\imm >> 16) +.else + mov \reg, #(\imm & 0x0000ff) + orr \reg, #(\imm & 0x00ff00) + orr \reg, #(\imm & 0xff0000) +.endif +.endm + .macro dyna_linker_main /* r0 = virtual target address */ /* r1 = instruction to patch */ @@ -772,6 +798,12 @@ new_dyna_start: .global jump_handler_write_h .global jump_handle_swl .global jump_handle_swr +.global rcnt0_read_count_m0 +.global rcnt0_read_count_m1 +.global rcnt1_read_count_m0 +.global rcnt1_read_count_m1 +.global rcnt2_read_count_m0 +.global rcnt2_read_count_m1 .macro pcsx_read_mem readop tab_shift @@ -922,4 +954,47 @@ jump_handle_swr: bx lr @ TODO? +.macro rcntx_read_mode0 num + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*\num] @ cycleStart + mov r0, r2, lsl #16 + sub r0, r3, lsl #16 + lsr r0, #16 + bx lr +.endm + +rcnt0_read_count_m0: + rcntx_read_mode0 0 + +rcnt1_read_count_m0: + rcntx_read_mode0 1 + +rcnt2_read_count_m0: + rcntx_read_mode0 2 + +rcnt0_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*0] @ cycleStart + mov_16 r1, 0x3334 + sub r2, r2, r3 + mul r0, r1, r2 @ /= 5 + lsr r0, #16 + bx lr + +rcnt1_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*1] + mov_24 r1, 0x1e6cde + sub r2, r2, r3 + umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd + bx lr + +rcnt2_read_count_m1: + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #rcnts-dynarec_local+6*4+7*4*2] + mov r0, r2, lsl #16-3 + sub r0, r3, lsl #16-3 + lsr r0, #16 @ /= 8 + bx lr + @ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 70f1376..586c760 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -16,6 +16,30 @@ //#define memprintf printf #define memprintf(...) +static u32 *mem_readtab; +static u32 *mem_writetab; +static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; +static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; +//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; + +static void map_item(u32 *out, const void *h, u32 flag) +{ + u32 hv = (u32)h; + if (hv & 1) + fprintf(stderr, "%p has LSB set\n", h); + *out = (hv >> 1) | (flag << 31); +} + +// size must be power of 2, at least 4k +#define map_l1_mem(tab, i, addr, size, base) \ + map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) + +#define IOMEM32(a) (((a) & 0xfff) / 4) +#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) +#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) + static u8 unmapped_mem[0x1000]; u32 read_mem_dummy() @@ -53,12 +77,60 @@ static void io_write_sio32(u32 value) sioWrite8((unsigned char)(value >> 24)); } +static void map_rcnt_rcount0(u32 mode) +{ + if (mode & 0x01) { // gate + map_item(&mem_iortab[IOMEM32(0x1100)], &psxH[0x1000], 0); + map_item(&mem_iortab[IOMEM16(0x1100)], &psxH[0x1000], 0); + } + else if (mode & 0x100) { // pixel clock + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m0, 1); + } +} + +static void map_rcnt_rcount1(u32 mode) +{ + if (mode & 0x01) { // gate + map_item(&mem_iortab[IOMEM32(0x1110)], &psxH[0x1000], 0); + map_item(&mem_iortab[IOMEM16(0x1110)], &psxH[0x1000], 0); + } + else if (mode & 0x100) { // hcnt + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m0, 1); + } +} + +static void map_rcnt_rcount2(u32 mode) +{ + if (mode & 0x01) { // gate + map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); + map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); + } + else if (mode & 0x200) { // clk/8 + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m0, 1); + } +} + #define make_rcnt_funcs(i) \ static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ -static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); } \ +static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); map_rcnt_rcount##i(val); } \ static void io_rcnt_write_target##i(u32 val) { psxRcntWtarget(i, val & 0xffff); } make_rcnt_funcs(0) @@ -137,30 +209,6 @@ static void io_spu_write32(u32 value) wfunc(a + 2, value >> 16); } -static u32 *mem_readtab; -static u32 *mem_writetab; -static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; -static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; -//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; - -static void map_item(u32 *out, const void *h, u32 flag) -{ - u32 hv = (u32)h; - if (hv & 1) - fprintf(stderr, "%p has LSB set\n", h); - *out = (hv >> 1) | (flag << 31); -} - -// size must be power of 2, at least 4k -#define map_l1_mem(tab, i, addr, size, base) \ - map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) - -#define IOMEM32(a) (((a) & 0xfff) / 4) -#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) -#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) - static void map_ram_write(void) { int i; @@ -203,6 +251,22 @@ static void write_biu(u32 value) } } +void new_dyna_pcsx_mem_load_state(void) +{ + map_rcnt_rcount0(rcnts[0].mode); + map_rcnt_rcount1(rcnts[1].mode); + map_rcnt_rcount2(rcnts[2].mode); +} + +int pcsxmem_is_handler_dynamic(u_int addr) +{ + if ((addr & 0xfffff000) != 0x1f801000) + return 0; + + addr &= 0xffff; + return addr == 0x1100 || addr == 0x1110 || addr == 0x1120; +} + void new_dyna_pcsx_mem_init(void) { int i; @@ -367,6 +431,8 @@ void new_dyna_pcsx_mem_init(void) mem_rtab = mem_readtab; mem_wtab = mem_writetab; + + new_dyna_pcsx_mem_load_state(); } void new_dyna_pcsx_mem_reset(void) diff --git a/libpcsxcore/new_dynarec/pcsxmem.h b/libpcsxcore/new_dynarec/pcsxmem.h index 74b0560..a3b08e1 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.h +++ b/libpcsxcore/new_dynarec/pcsxmem.h @@ -1,3 +1,6 @@ void new_dyna_pcsx_mem_init(void); void new_dyna_pcsx_mem_reset(void); +void new_dyna_pcsx_mem_load_state(void); + +int pcsxmem_is_handler_dynamic(u_int addr); diff --git a/libpcsxcore/new_dynarec/pcsxmem_inline.c b/libpcsxcore/new_dynarec/pcsxmem_inline.c new file mode 100644 index 0000000..210ce0e --- /dev/null +++ b/libpcsxcore/new_dynarec/pcsxmem_inline.c @@ -0,0 +1,55 @@ +/* + * (C) GraÅžvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of GNU GPL version 2 or later. + * See the COPYING file in the top-level directory. + */ + +static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) +{ + if ((addr & 0xfffff000) == 0x1f801000) { + u_int t; + switch (addr & 0xffff) { + case 0x1120: // rcnt2 count + if (rt < 0) goto dont_care; + if (cc < 0) return 0; + emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); + emit_readword((int)&rcnts[2].cycleStart, rt); + emit_testimm(HOST_TEMPREG, 0x200); + emit_readword((int)&last_count, HOST_TEMPREG); + emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); + emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); + if (cc_adj) + emit_addimm(HOST_TEMPREG, cc_adj, rt); + emit_shrne_imm(rt, 3, rt); + mov_loadtype_adj(type!=LOADW_STUB?type:LOADH_STUB, rt, rt); + goto hit; + case 0x1104: + case 0x1114: + case 0x1124: // rcnt mode + if (rt < 0) return 0; + t = (addr >> 4) & 3; + emit_readword((int)&rcnts[t].mode, rt); + emit_andimm(rt, ~0x1800, HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); + mov_loadtype_adj(type, rt, rt); + goto hit; + } + } + else { + if (rt < 0) + goto dont_care; + } + + return 0; + +hit: + assem_debug("pcsx_direct_read %08x end\n", addr); + return 1; + +dont_care: + assem_debug("pcsx_direct_read %08x dummy\n", addr); + return 1; +} + +// vim:shiftwidth=2:expandtab -- cgit v1.2.3