aboutsummaryrefslogtreecommitdiff
path: root/libpcsxcore
diff options
context:
space:
mode:
Diffstat (limited to 'libpcsxcore')
-rw-r--r--libpcsxcore/gte.c6
-rw-r--r--libpcsxcore/new_dynarec/assem_arm.c9
-rw-r--r--libpcsxcore/new_dynarec/emu_if.c11
-rw-r--r--libpcsxcore/new_dynarec/emu_if.h1
-rw-r--r--libpcsxcore/new_dynarec/new_dynarec.c81
5 files changed, 100 insertions, 8 deletions
diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c
index 0acca65..16e0a89 100644
--- a/libpcsxcore/gte.c
+++ b/libpcsxcore/gte.c
@@ -212,7 +212,11 @@ static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag_unused) {
return ret;
}
-#define limE(a) ((a) & 0x1ffff)
+static inline u32 limE(u32 result) {
+ if (result > 0x1ffff)
+ return 0x1ffff;
+ return result;
+}
#endif
diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c
index 2a0a214..adbde59 100644
--- a/libpcsxcore/new_dynarec/assem_arm.c
+++ b/libpcsxcore/new_dynarec/assem_arm.c
@@ -3796,6 +3796,7 @@ void c2op_assemble(int i,struct regstat *i_regs)
signed char temp=get_reg(i_regs->regmap,-1);
u_int c2op=source[i]&0x3f;
u_int hr,reglist=0;
+ int need_flags;
for(hr=0;hr<HOST_REGS;hr++) {
if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
}
@@ -3809,7 +3810,13 @@ void c2op_assemble(int i,struct regstat *i_regs)
emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
emit_writeword(1,(int)&psxRegs.code);
- emit_call((int)gte_handlers[c2op]);
+ need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
+ assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags);
+#ifdef ARMv5_ONLY
+ // let's take more risk here
+ need_flags=need_flags&&gte_reads_flags;
+#endif
+ emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
}
if(i>=slen-1||itype[i+1]!=C2OP)
diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c
index fbd4f96..f65e5bd 100644
--- a/libpcsxcore/new_dynarec/emu_if.c
+++ b/libpcsxcore/new_dynarec/emu_if.c
@@ -154,6 +154,17 @@ void *gte_handlers_nf[64] = {
NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
};
+const char *gte_regnames[64] = {
+ NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00
+ NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08
+ "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10
+ NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18
+ "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
+ "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28
+ "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
+ NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38
+};
+
/* from gte.txt.. not sure if this is any good. */
const char gte_cycletab[64] = {
/* 1 2 3 4 5 6 7 8 9 a b c d e f */
diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h
index e16cca5..88749be 100644
--- a/libpcsxcore/new_dynarec/emu_if.h
+++ b/libpcsxcore/new_dynarec/emu_if.h
@@ -31,6 +31,7 @@ extern int reg_cop0[];
extern int reg_cop2d[], reg_cop2c[];
extern void *gte_handlers[64];
extern void *gte_handlers_nf[64];
+extern const char *gte_regnames[64];
extern const char gte_cycletab[64];
/* dummy */
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
index 666b4d4..716b1d4 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
@@ -80,6 +80,10 @@ struct ll_entry
u_char dep1[MAXBLOCK];
u_char dep2[MAXBLOCK];
u_char lt1[MAXBLOCK];
+ static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
+ static uint64_t gte_rt[MAXBLOCK];
+ static uint64_t gte_unneeded[MAXBLOCK];
+ static int gte_reads_flags; // gte flag read encountered
int imm[MAXBLOCK];
u_int ba[MAXBLOCK];
char likely[MAXBLOCK];
@@ -6710,8 +6714,8 @@ static void pagespan_ds()
void unneeded_registers(int istart,int iend,int r)
{
int i;
- uint64_t u,uu,b,bu;
- uint64_t temp_u,temp_uu;
+ uint64_t u,uu,gte_u,b,bu,gte_bu;
+ uint64_t temp_u,temp_uu,temp_gte_u;
uint64_t tdep;
if(iend==slen-1) {
u=1;uu=1;
@@ -6720,6 +6724,8 @@ void unneeded_registers(int istart,int iend,int r)
uu=unneeded_reg_upper[iend+1];
u=1;uu=1;
}
+ gte_u=temp_gte_u=0;
+
for (i=iend;i>=istart;i--)
{
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
@@ -6733,6 +6739,7 @@ void unneeded_registers(int istart,int iend,int r)
// Branch out of this block, flush all regs
u=1;
uu=1;
+ gte_u=0;
/* Hexagon hack
if(itype[i]==UJUMP&&rt1[i]==31)
{
@@ -6764,17 +6771,21 @@ void unneeded_registers(int istart,int iend,int r)
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
}
else
{
u=1;
uu=1;
+ gte_u=0;
}
}
}
@@ -6788,10 +6799,12 @@ void unneeded_registers(int istart,int iend,int r)
{
// Unconditional branch
temp_u=1;temp_uu=1;
+ temp_gte_u=0;
} else {
// Conditional branch (not taken case)
temp_u=unneeded_reg[i+2];
temp_uu=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
// Merge in delay slot
tdep=(~temp_uu>>rt1[i+1])&1;
@@ -6801,17 +6814,21 @@ void unneeded_registers(int istart,int iend,int r)
temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i+1];
+ temp_gte_u&=~gte_rs[i+1];
// If branch is "likely" (and conditional)
// then we skip the delay slot on the fall-thru path
if(likely[i]) {
if(i<slen-1) {
temp_u&=unneeded_reg[i+2];
temp_uu&=unneeded_reg_upper[i+2];
+ temp_gte_u&=gte_unneeded[i+2];
}
else
{
temp_u=1;
temp_uu=1;
+ temp_gte_u=0;
}
}
tdep=(~temp_uu>>rt1[i])&1;
@@ -6821,8 +6838,11 @@ void unneeded_registers(int istart,int iend,int r)
temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
temp_u|=1;temp_uu|=1;
+ temp_gte_u|=gte_rt[i];
+ temp_gte_u&=~gte_rs[i];
unneeded_reg[i]=temp_u;
unneeded_reg_upper[i]=temp_uu;
+ gte_unneeded[i]=temp_gte_u;
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
@@ -6830,6 +6850,7 @@ void unneeded_registers(int istart,int iend,int r)
}else{
unneeded_reg[(ba[i]-start)>>2]=1;
unneeded_reg_upper[(ba[i]-start)>>2]=1;
+ gte_unneeded[(ba[i]-start)>>2]=0;
}
} /*else*/ if(1) {
if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
@@ -6837,6 +6858,7 @@ void unneeded_registers(int istart,int iend,int r)
// Unconditional branch
u=unneeded_reg[(ba[i]-start)>>2];
uu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_u=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=u;
branch_unneeded_reg_upper[i]=uu;
//u=1;
@@ -6851,10 +6873,13 @@ void unneeded_registers(int istart,int iend,int r)
uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
u|=1;uu|=1;
+ gte_u|=gte_rt[i+1];
+ gte_u&=~gte_rs[i+1];
} else {
// Conditional branch
b=unneeded_reg[(ba[i]-start)>>2];
bu=unneeded_reg_upper[(ba[i]-start)>>2];
+ gte_bu=gte_unneeded[(ba[i]-start)>>2];
branch_unneeded_reg[i]=b;
branch_unneeded_reg_upper[i]=bu;
//b=1;
@@ -6869,20 +6894,25 @@ void unneeded_registers(int istart,int iend,int r)
bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
b|=1;bu|=1;
+ gte_bu|=gte_rt[i+1];
+ gte_bu&=~gte_rs[i+1];
// If branch is "likely" then we skip the
// delay slot on the fall-thru path
if(likely[i]) {
u=b;
uu=bu;
+ gte_u=gte_bu;
if(i<slen-1) {
u&=unneeded_reg[i+2];
uu&=unneeded_reg_upper[i+2];
+ gte_u&=gte_unneeded[i+2];
//u=1;
//uu=1;
}
} else {
u&=b;
uu&=bu;
+ gte_u&=gte_bu;
//u=1;
//uu=1;
}
@@ -6918,11 +6948,13 @@ void unneeded_registers(int istart,int iend,int r)
u|=1LL<<rt2[i];
uu|=1LL<<rt1[i];
uu|=1LL<<rt2[i];
+ gte_u|=gte_rt[i];
// Accessed registers are needed
u&=~(1LL<<rs1[i]);
u&=~(1LL<<rs2[i]);
uu&=~(1LL<<us1[i]);
uu&=~(1LL<<us2[i]);
+ gte_u&=~gte_rs[i];
// Source-target dependencies
uu&=~(tdep<<dep1[i]);
uu&=~(tdep<<dep2[i]);
@@ -6931,6 +6963,7 @@ void unneeded_registers(int istart,int iend,int r)
// Save it
unneeded_reg[i]=u;
unneeded_reg_upper[i]=uu;
+ gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
printf("U:");
@@ -7852,6 +7885,7 @@ void new_dynarec_clear_full()
literalcount=0;
stop_after_jal=0;
inv_code_start=inv_code_end=~0;
+ gte_reads_flags=0;
// TLB
#ifndef DISABLE_TLB
using_tlb=0;
@@ -8344,11 +8378,14 @@ int new_recompile_block(int addr)
#endif
#ifdef PCSX
case 0x12: strcpy(insn[i],"COP2"); type=NI;
- // note: COP MIPS-1 encoding differs from MIPS32
op2=(source[i]>>21)&0x1f;
- if (source[i]&0x3f) {
+ //if (op2 & 0x10) {
+ if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
if (gte_handlers[source[i]&0x3f]!=NULL) {
- snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
+ if (gte_regnames[source[i]&0x3f]!=NULL)
+ strcpy(insn[i],gte_regnames[source[i]&0x3f]);
+ else
+ snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
type=C2OP;
}
}
@@ -8376,6 +8413,7 @@ int new_recompile_block(int addr)
us2[i]=0;
dep1[i]=0;
dep2[i]=0;
+ gte_rs[i]=gte_rt[i]=0;
switch(type) {
case LOAD:
rs1[i]=(source[i]>>21)&0x1f;
@@ -8539,7 +8577,6 @@ int new_recompile_block(int addr)
if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
break;
case COP1:
- case COP2:
rs1[i]=0;
rs2[i]=0;
rt1[i]=0;
@@ -8549,6 +8586,28 @@ int new_recompile_block(int addr)
if(op2==5) us1[i]=rs1[i]; // DMTC1
rs2[i]=CSREG;
break;
+ case COP2:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
+ if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
+ rs2[i]=CSREG;
+ int gr=(source[i]>>11)&0x1F;
+ switch(op2)
+ {
+ case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
+ case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
+ case 0x02: gte_rs[i]=1ll<<(gr+32); // CFC2
+ if(gr==31&&!gte_reads_flags) {
+ printf("gte flag read encountered @%08x\n",addr + i*4);
+ gte_reads_flags=1;
+ }
+ break;
+ case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
+ }
+ break;
case C1LS:
rs1[i]=(source[i]>>21)&0x1F;
rs2[i]=CSREG;
@@ -8562,6 +8621,16 @@ int new_recompile_block(int addr)
rt1[i]=0;
rt2[i]=0;
imm[i]=(short)source[i];
+ if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
+ else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
+ break;
+ case C2OP:
+ rs1[i]=0;
+ rs2[i]=0;
+ rt1[i]=0;
+ rt2[i]=0;
+ gte_rt[i]=1ll<<63; // every op changes flags
+ // TODO: other regs?
break;
case FLOAT:
case FCONV: