From 6c0eefafd3a531d6bc6d26736926a1514c0c9e6f Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 22 Oct 2011 01:29:31 +0300 Subject: gte: parametrize remaining ops at least the ones from C source, it looks like there could be more. --- libpcsxcore/gte.c | 133 ++++++++++++++++++++++++++++++++++++ libpcsxcore/gte.h | 74 +++++++++++++------- libpcsxcore/new_dynarec/assem_arm.c | 82 ++++++++++++++++++---- libpcsxcore/psxinterpreter.c | 8 +-- 4 files changed, 253 insertions(+), 44 deletions(-) diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 38e6308..dc56b7f 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -26,6 +26,11 @@ #include "gte.h" #include "psxmem.h" +typedef struct psxCP2Regs { + psxCP2Data CP2D; /* Cop2 data registers */ + psxCP2Ctrl CP2C; /* Cop2 control registers */ +} psxCP2Regs; + #define VX(n) (n < 3 ? regs->CP2D.p[n << 1].sw.l : regs->CP2D.p[9].sw.l) #define VY(n) (n < 3 ? regs->CP2D.p[n << 1].sw.h : regs->CP2D.p[10].sw.l) #define VZ(n) (n < 3 ? regs->CP2D.p[(n << 1) + 1].sw.l : regs->CP2D.p[11].sw.l) @@ -956,3 +961,131 @@ void gteCDP(psxCP2Regs *regs) { gteG2 = limC2(gteMAC2 >> 4); gteB2 = limC3(gteMAC3 >> 4); } + +/* decomposed/parametrized versions for the recompiler */ + +#ifndef FLAGLESS + +void gteSQR_part_noshift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = gteIR1 * gteIR1; + gteMAC2 = gteIR2 * gteIR2; + gteMAC3 = gteIR3 * gteIR3; +} + +void gteSQR_part_shift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = (gteIR1 * gteIR1) >> 12; + gteMAC2 = (gteIR2 * gteIR2) >> 12; + gteMAC3 = (gteIR3 * gteIR3) >> 12; +} + +void gteOP_part_noshift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = (gteR22 * gteIR3) - (gteR33 * gteIR2); + gteMAC2 = (gteR33 * gteIR1) - (gteR11 * gteIR3); + gteMAC3 = (gteR11 * gteIR2) - (gteR22 * gteIR1); +} + +void gteOP_part_shift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = ((gteR22 * gteIR3) - (gteR33 * gteIR2)) >> 12; + gteMAC2 = ((gteR33 * gteIR1) - (gteR11 * gteIR3)) >> 12; + gteMAC3 = ((gteR11 * gteIR2) - (gteR22 * gteIR1)) >> 12; +} + +void gteDCPL_part(psxCP2Regs *regs) { + s32 RIR1 = ((s32)gteR * gteIR1) >> 8; + s32 GIR2 = ((s32)gteG * gteIR2) >> 8; + s32 BIR3 = ((s32)gteB * gteIR3) >> 8; + + gteFLAG = 0; + + gteMAC1 = RIR1 + ((gteIR0 * limB1(A1U((s64)gteRFC - RIR1), 0)) >> 12); + gteMAC2 = GIR2 + ((gteIR0 * limB1(A2U((s64)gteGFC - GIR2), 0)) >> 12); + gteMAC3 = BIR3 + ((gteIR0 * limB1(A3U((s64)gteBFC - BIR3), 0)) >> 12); +} + +void gteGPF_part_noshift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = gteIR0 * gteIR1; + gteMAC2 = gteIR0 * gteIR2; + gteMAC3 = gteIR0 * gteIR3; +} + +void gteGPF_part_shift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = (gteIR0 * gteIR1) >> 12; + gteMAC2 = (gteIR0 * gteIR2) >> 12; + gteMAC3 = (gteIR0 * gteIR3) >> 12; +} + +#endif // !FLAGLESS + +void gteGPL_part_noshift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = A1((s64)gteMAC1 + (gteIR0 * gteIR1)); + gteMAC2 = A2((s64)gteMAC2 + (gteIR0 * gteIR2)); + gteMAC3 = A3((s64)gteMAC3 + (gteIR0 * gteIR3)); +} + +void gteGPL_part_shift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = A1((s64)gteMAC1 + ((gteIR0 * gteIR1) >> 12)); + gteMAC2 = A2((s64)gteMAC2 + ((gteIR0 * gteIR2) >> 12)); + gteMAC3 = A3((s64)gteMAC3 + ((gteIR0 * gteIR3) >> 12)); +} + +void gteDPCS_part_noshift(psxCP2Regs *regs) { + int shift = 0; + + gteFLAG = 0; + + gteMAC1 = ((gteR << 16) + (gteIR0 * limB1(A1U((s64)gteRFC - (gteR << 4)) << (12 - shift), 0))) >> 12; + gteMAC2 = ((gteG << 16) + (gteIR0 * limB2(A2U((s64)gteGFC - (gteG << 4)) << (12 - shift), 0))) >> 12; + gteMAC3 = ((gteB << 16) + (gteIR0 * limB3(A3U((s64)gteBFC - (gteB << 4)) << (12 - shift), 0))) >> 12; +} + +void gteDPCS_part_shift(psxCP2Regs *regs) { + int shift = 12; + + gteFLAG = 0; + + gteMAC1 = ((gteR << 16) + (gteIR0 * limB1(A1U((s64)gteRFC - (gteR << 4)) << (12 - shift), 0))) >> 12; + gteMAC2 = ((gteG << 16) + (gteIR0 * limB2(A2U((s64)gteGFC - (gteG << 4)) << (12 - shift), 0))) >> 12; + gteMAC3 = ((gteB << 16) + (gteIR0 * limB3(A3U((s64)gteBFC - (gteB << 4)) << (12 - shift), 0))) >> 12; +} + +void gteINTPL_part_noshift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = ((gteIR1 << 12) + (gteIR0 * limB1(A1U((s64)gteRFC - gteIR1), 0))); + gteMAC2 = ((gteIR2 << 12) + (gteIR0 * limB2(A2U((s64)gteGFC - gteIR2), 0))); + gteMAC3 = ((gteIR3 << 12) + (gteIR0 * limB3(A3U((s64)gteBFC - gteIR3), 0))); +} + +void gteINTPL_part_shift(psxCP2Regs *regs) { + gteFLAG = 0; + + gteMAC1 = ((gteIR1 << 12) + (gteIR0 * limB1(A1U((s64)gteRFC - gteIR1), 0))) >> 12; + gteMAC2 = ((gteIR2 << 12) + (gteIR0 * limB2(A2U((s64)gteGFC - gteIR2), 0))) >> 12; + gteMAC3 = ((gteIR3 << 12) + (gteIR0 * limB3(A3U((s64)gteBFC - gteIR3), 0))) >> 12; +} + +void gteMACtoRGB(psxCP2Regs *regs) { + gteRGB0 = gteRGB1; + gteRGB1 = gteRGB2; + gteCODE2 = gteCODE; + gteR2 = limC1(gteMAC1 >> 4); + gteG2 = limC2(gteMAC2 >> 4); + gteB2 = limC3(gteMAC3 >> 4); +} + diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 96f8542..7646226 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -44,6 +44,15 @@ #define gteGPL gteGPL_nf #define gteNCCT gteNCCT_nf +#define gteGPL_part_noshift gteGPL_part_noshift_nf +#define gteGPL_part_shift gteGPL_part_shift_nf +#define gteDPCS_part_noshift gteDPCS_part_noshift_nf +#define gteDPCS_part_shift gteDPCS_part_shift_nf +#define gteINTPL_part_noshift gteINTPL_part_noshift_nf +#define gteINTPL_part_shift gteINTPL_part_shift_nf +#define gteMACtoRGB gteMACtoRGB_nf + +#undef __GTE_H__ #endif #ifndef __GTE_H__ @@ -56,10 +65,7 @@ extern "C" { #include "psxcommon.h" #include "r3000a.h" -typedef struct { - psxCP2Data CP2D; /* Cop2 data registers */ - psxCP2Ctrl CP2C; /* Cop2 control registers */ -} psxCP2Regs; +struct psxCP2Regs; void gteMFC2(); void gteCFC2(); @@ -68,28 +74,44 @@ void gteCTC2(); void gteLWC2(); void gteSWC2(); -void gteRTPS(psxCP2Regs *regs); -void gteOP(psxCP2Regs *regs); -void gteNCLIP(psxCP2Regs *regs); -void gteDPCS(psxCP2Regs *regs); -void gteINTPL(psxCP2Regs *regs); -void gteMVMVA(psxCP2Regs *regs); -void gteNCDS(psxCP2Regs *regs); -void gteNCDT(psxCP2Regs *regs); -void gteCDP(psxCP2Regs *regs); -void gteNCCS(psxCP2Regs *regs); -void gteCC(psxCP2Regs *regs); -void gteNCS(psxCP2Regs *regs); -void gteNCT(psxCP2Regs *regs); -void gteSQR(psxCP2Regs *regs); -void gteDCPL(psxCP2Regs *regs); -void gteDPCT(psxCP2Regs *regs); -void gteAVSZ3(psxCP2Regs *regs); -void gteAVSZ4(psxCP2Regs *regs); -void gteRTPT(psxCP2Regs *regs); -void gteGPF(psxCP2Regs *regs); -void gteGPL(psxCP2Regs *regs); -void gteNCCT(psxCP2Regs *regs); +void gteRTPS(struct psxCP2Regs *regs); +void gteOP(struct psxCP2Regs *regs); +void gteNCLIP(struct psxCP2Regs *regs); +void gteDPCS(struct psxCP2Regs *regs); +void gteINTPL(struct psxCP2Regs *regs); +void gteMVMVA(struct psxCP2Regs *regs); +void gteNCDS(struct psxCP2Regs *regs); +void gteNCDT(struct psxCP2Regs *regs); +void gteCDP(struct psxCP2Regs *regs); +void gteNCCS(struct psxCP2Regs *regs); +void gteCC(struct psxCP2Regs *regs); +void gteNCS(struct psxCP2Regs *regs); +void gteNCT(struct psxCP2Regs *regs); +void gteSQR(struct psxCP2Regs *regs); +void gteDCPL(struct psxCP2Regs *regs); +void gteDPCT(struct psxCP2Regs *regs); +void gteAVSZ3(struct psxCP2Regs *regs); +void gteAVSZ4(struct psxCP2Regs *regs); +void gteRTPT(struct psxCP2Regs *regs); +void gteGPF(struct psxCP2Regs *regs); +void gteGPL(struct psxCP2Regs *regs); +void gteNCCT(struct psxCP2Regs *regs); + +void gteSQR_part_noshift(struct psxCP2Regs *regs); +void gteSQR_part_shift(struct psxCP2Regs *regs); +void gteOP_part_noshift(struct psxCP2Regs *regs); +void gteOP_part_shift(struct psxCP2Regs *regs); +void gteDCPL_part(struct psxCP2Regs *regs); +void gteGPF_part_noshift(struct psxCP2Regs *regs); +void gteGPF_part_shift(struct psxCP2Regs *regs); + +void gteGPL_part_noshift(struct psxCP2Regs *regs); +void gteGPL_part_shift(struct psxCP2Regs *regs); +void gteDPCS_part_noshift(struct psxCP2Regs *regs); +void gteDPCS_part_shift(struct psxCP2Regs *regs); +void gteINTPL_part_noshift(struct psxCP2Regs *regs); +void gteINTPL_part_shift(struct psxCP2Regs *regs); +void gteMACtoRGB(struct psxCP2Regs *regs); #ifdef __cplusplus } diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 0c16fc2..6b663e5 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -20,6 +20,10 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #ifdef PCSX +#include "../gte.h" +#define FLAGLESS +#include "../gte.h" +#undef FLAGLESS #include "../gte_arm.h" #include "../gte_neon.h" #include "pcnt.h" @@ -4456,15 +4460,34 @@ static void c2op_epilogue(u_int op,u_int reglist) restore_regs_all(reglist); } +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + static void c2op_assemble(int i,struct regstat *i_regs) { signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; - u_int hr,reglist=0; + u_int hr,reglist_full=0,reglist; int need_flags,need_ir; for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works @@ -4475,14 +4498,14 @@ static void c2op_assemble(int i,struct regstat *i_regs) // let's take more risk here need_flags=need_flags&>e_reads_flags; #endif + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; switch(c2op) { case GTE_MVMVA: { - int shift = (source[i] >> 19) & 1; int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; - int lm = (source[i] >> 10) & 1; - reglist&=0x10ff; // +{r4-r7} + reglist=reglist_full&0x10ff; // +{r4-r7} c2op_prologue(c2op,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) @@ -4515,21 +4538,52 @@ static void c2op_assemble(int i,struct regstat *i_regs) emit_movimm(shift,1); emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); } - if(need_flags||need_ir) { - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); // lm0 borked - } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); #endif break; } + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; default: - reglist&=0x100f; c2op_prologue(c2op,reglist); - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); + //emit_movimm(source[i],1); // opcode + //emit_writeword(1,(int)&psxRegs.code); emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); break; } diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index f9833a4..66730d9 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -46,7 +46,7 @@ void (*psxBSC[64])(); void (*psxSPC[64])(); void (*psxREG[32])(); void (*psxCP0[32])(); -void (*psxCP2[64])(psxCP2Regs *regs); +void (*psxCP2[64])(struct psxCP2Regs *regs); void (*psxCP2BSC[32])(); static void delayRead(int reg, u32 bpc) { @@ -833,10 +833,10 @@ void psxCOP0() { } void psxCOP2() { - psxCP2[_Funct_]((psxCP2Regs *)&psxRegs.CP2D); + psxCP2[_Funct_]((struct psxCP2Regs *)&psxRegs.CP2D); } -void psxBASIC(psxCP2Regs *regs) { +void psxBASIC(struct psxCP2Regs *regs) { psxCP2BSC[_Rs_](); } @@ -882,7 +882,7 @@ void (*psxCP0[32])() = { psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL, psxNULL }; -void (*psxCP2[64])(psxCP2Regs *regs) = { +void (*psxCP2[64])(struct psxCP2Regs *regs) = { psxBASIC, gteRTPS , psxNULL , psxNULL, psxNULL, psxNULL , gteNCLIP, psxNULL, // 00 psxNULL , psxNULL , psxNULL , psxNULL, gteOP , psxNULL , psxNULL , psxNULL, // 08 gteDPCS , gteINTPL, gteMVMVA, gteNCDS, gteCDP , psxNULL , gteNCDT , psxNULL, // 10 -- cgit v1.2.3