aboutsummaryrefslogtreecommitdiff
path: root/libpcsxcore/new_dynarec/new_dynarec.c
diff options
context:
space:
mode:
authornotaz2011-06-22 18:47:10 +0300
committernotaz2011-07-08 00:15:07 +0300
commitd61de97e8764980c0a98d777653396fad745fff4 (patch)
treeb7688730c37fd6660c0d4991e0a68e0792ce6915 /libpcsxcore/new_dynarec/new_dynarec.c
parente3234ecf9665738e35a749fbb9d4120f25a0c7cf (diff)
downloadpcsx_rearmed-d61de97e8764980c0a98d777653396fad745fff4.tar.gz
pcsx_rearmed-d61de97e8764980c0a98d777653396fad745fff4.tar.bz2
pcsx_rearmed-d61de97e8764980c0a98d777653396fad745fff4.zip
drc: merge part of old Ari64's patch: 09_tlb_offset
This one is from previous batch, applying so that the next patch applies.
Diffstat (limited to 'libpcsxcore/new_dynarec/new_dynarec.c')
-rw-r--r--libpcsxcore/new_dynarec/new_dynarec.c168
1 files changed, 168 insertions, 0 deletions
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
index c6d83c2..74b64b7 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
@@ -10228,6 +10228,174 @@ int new_recompile_block(int addr)
}
}
+ // Cache memory offset or tlb map pointer if a register is available
+ #ifndef HOST_IMM_ADDR32
+ #ifndef RAM_OFFSET
+ if(using_tlb)
+ #endif
+ {
+ int earliest_available[HOST_REGS];
+ int loop_start[HOST_REGS];
+ int score[HOST_REGS];
+ int end[HOST_REGS];
+ int reg=using_tlb?MMREG:ROREG;
+
+ // Init
+ for(hr=0;hr<HOST_REGS;hr++) {
+ score[hr]=0;earliest_available[hr]=0;
+ loop_start[hr]=MAXBLOCK;
+ }
+ for(i=0;i<slen-1;i++)
+ {
+ // Can't do anything if no registers are available
+ if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i]) {
+ for(hr=0;hr<HOST_REGS;hr++) {
+ score[hr]=0;earliest_available[hr]=i+1;
+ loop_start[hr]=MAXBLOCK;
+ }
+ }
+ if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
+ if(!ooo[i]) {
+ if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) {
+ for(hr=0;hr<HOST_REGS;hr++) {
+ score[hr]=0;earliest_available[hr]=i+1;
+ loop_start[hr]=MAXBLOCK;
+ }
+ }
+ }
+ }
+ // Mark unavailable registers
+ for(hr=0;hr<HOST_REGS;hr++) {
+ if(regs[i].regmap[hr]>=0) {
+ score[hr]=0;earliest_available[hr]=i+1;
+ loop_start[hr]=MAXBLOCK;
+ }
+ if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
+ if(branch_regs[i].regmap[hr]>=0) {
+ score[hr]=0;earliest_available[hr]=i+2;
+ loop_start[hr]=MAXBLOCK;
+ }
+ }
+ }
+ // No register allocations after unconditional jumps
+ if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
+ {
+ for(hr=0;hr<HOST_REGS;hr++) {
+ score[hr]=0;earliest_available[hr]=i+2;
+ loop_start[hr]=MAXBLOCK;
+ }
+ i++; // Skip delay slot too
+ //printf("skip delay slot: %x\n",start+i*4);
+ }
+ else
+ // Possible match
+ if(itype[i]==LOAD||itype[i]==LOADLR||
+ itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS) {
+ for(hr=0;hr<HOST_REGS;hr++) {
+ if(hr!=EXCLUDE_REG) {
+ end[hr]=i-1;
+ for(j=i;j<slen-1;j++) {
+ if(regs[j].regmap[hr]>=0) break;
+ if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
+ if(branch_regs[j].regmap[hr]>=0) break;
+ if(ooo[j]) {
+ if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break;
+ }else{
+ if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break;
+ }
+ }
+ else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break;
+ if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
+ int t=(ba[j]-start)>>2;
+ if(t<j&&t>=earliest_available[hr]) {
+ // Score a point for hoisting loop invariant
+ if(t<loop_start[hr]) loop_start[hr]=t;
+ //printf("set loop_start: i=%x j=%x (%x)\n",start+i*4,start+j*4,start+t*4);
+ score[hr]++;
+ end[hr]=j;
+ }
+ else if(t<j) {
+ if(regs[t].regmap[hr]==reg) {
+ // Score a point if the branch target matches this register
+ score[hr]++;
+ end[hr]=j;
+ }
+ }
+ if(itype[j+1]==LOAD||itype[j+1]==LOADLR||
+ itype[j+1]==STORE||itype[j+1]==STORELR||itype[j+1]==C1LS) {
+ score[hr]++;
+ end[hr]=j;
+ }
+ }
+ if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
+ {
+ // Stop on unconditional branch
+ break;
+ }
+ else
+ if(itype[j]==LOAD||itype[j]==LOADLR||
+ itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) {
+ score[hr]++;
+ end[hr]=j;
+ }
+ }
+ }
+ }
+ // Find highest score and allocate that register
+ int maxscore=0;
+ for(hr=0;hr<HOST_REGS;hr++) {
+ if(hr!=EXCLUDE_REG) {
+ if(score[hr]>score[maxscore]) {
+ maxscore=hr;
+ //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4);
+ }
+ }
+ }
+ if(score[maxscore]>1)
+ {
+ if(i<loop_start[maxscore]) loop_start[maxscore]=i;
+ for(j=loop_start[maxscore];j<slen&&j<=end[maxscore];j++) {
+ //if(regs[j].regmap[maxscore]>=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);}
+ assert(regs[j].regmap[maxscore]<0);
+ if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg;
+ regs[j].regmap[maxscore]=reg;
+ regs[j].dirty&=~(1<<maxscore);
+ regs[j].wasconst&=~(1<<maxscore);
+ regs[j].isconst&=~(1<<maxscore);
+ if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
+ branch_regs[j].regmap[maxscore]=reg;
+ branch_regs[j].wasdirty&=~(1<<maxscore);
+ branch_regs[j].dirty&=~(1<<maxscore);
+ branch_regs[j].wasconst&=~(1<<maxscore);
+ branch_regs[j].isconst&=~(1<<maxscore);
+ if(itype[j]!=RJUMP&&itype[j]!=UJUMP&&(source[j]>>16)!=0x1000) {
+ regmap_pre[j+2][maxscore]=reg;
+ regs[j+2].wasdirty&=~(1<<maxscore);
+ }
+ // loop optimization (loop_preload)
+ int t=(ba[j]-start)>>2;
+ if(t==loop_start[maxscore]) regs[t].regmap_entry[maxscore]=reg;
+ }
+ else
+ {
+ if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) {
+ regmap_pre[j+1][maxscore]=reg;
+ regs[j+1].wasdirty&=~(1<<maxscore);
+ }
+ }
+ }
+ i=j-1;
+ if(itype[j-1]==RJUMP||itype[j-1]==UJUMP||itype[j-1]==CJUMP||itype[j-1]==SJUMP||itype[j-1]==FJUMP) i++; // skip delay slot
+ for(hr=0;hr<HOST_REGS;hr++) {
+ score[hr]=0;earliest_available[hr]=i+i;
+ loop_start[hr]=MAXBLOCK;
+ }
+ }
+ }
+ }
+ }
+ #endif
+
// This allocates registers (if possible) one instruction prior
// to use, which can avoid a load-use penalty on certain CPUs.
for(i=0;i<slen-1;i++)