From 1a5fd79401ac52789fad34c6b852b947200a6334 Mon Sep 17 00:00:00 2001 From: frangarcj Date: Tue, 6 Sep 2016 13:49:55 +0200 Subject: (VITA) Some dynarec --- libpcsxcore/new_dynarec/new_dynarec.c | 214 ++++++++++++++++++---------------- 1 file changed, 116 insertions(+), 98 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 65c8f03..1c0ab56 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -56,6 +56,13 @@ static void __clear_cache(void *start, void *end) { #elif defined(_3DS) #include "3ds_utils.h" #define __clear_cache(start,end) svcFlushProcessDataCache(0xFFFF8001, start, (u32)(end)-(u32)(start)) +#elif defined(VITA) +#define __clear_cache vita_clear_cache +static void __clear_cache(void *start, void *end) { + size_t len = (char *)end - (char *)start; + int block = sceKernelFindMemBlockByAddr(start,len); + sceKernelSyncVMDomain(block, start, len); +} #endif #define MAXBLOCK 4096 @@ -196,7 +203,7 @@ struct ll_entry #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -314,18 +321,18 @@ static void tlb_hacks() { u_int addr; int n; - switch (ROM_HEADER->Country_code&0xFF) + switch (ROM_HEADER->Country_code&0xFF) { case 0x45: // U addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E + break; + case 0x4A: // J + addr=0x34b70; + break; + case 0x50: // E addr=0x329f0; - break; - default: + break; + default: // Unknown country code addr=0; break; @@ -526,7 +533,7 @@ static void flush_dirty_uppers(struct regstat *cur) for (hr=0;hrdirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -683,7 +690,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -786,7 +793,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -827,7 +834,7 @@ void mult64(uint64_t m1,uint64_t m2) unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; int sign = 0; - + if (m1 < 0) { op2 = -m1; @@ -840,22 +847,22 @@ void mult64(uint64_t m1,uint64_t m2) sign = 1 - sign; } else op4 = m2; - + op1 = op2 & 0xFFFFFFFF; op2 = (op2 >> 32) & 0xFFFFFFFF; op3 = op4 & 0xFFFFFFFF; op4 = (op4 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); if (sign) @@ -871,25 +878,25 @@ void multu64(uint64_t m1,uint64_t m2) unsigned long long int op1, op2, op3, op4; unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; - + op1 = m1 & 0xFFFFFFFF; op2 = (m1 >> 32) & 0xFFFFFFFF; op3 = m2 & 0xFFFFFFFF; op4 = (m2 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - + //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); } @@ -1011,7 +1018,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -1107,7 +1114,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; #ifndef DISABLE_TLB @@ -3386,7 +3393,7 @@ void storelr_assemble(int i,struct regstat *i_regs) if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); gen_tlb_addr_w(temp,map); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif }else{ // using tlb @@ -4159,7 +4166,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int mgr=MGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4757,7 +4764,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -5194,7 +5201,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -5228,7 +5235,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -5254,7 +5261,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5275,10 +5282,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -5329,7 +5336,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5358,7 +5365,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -5492,7 +5499,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -5547,7 +5554,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -5619,7 +5626,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5749,7 +5756,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5932,7 +5939,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -6019,7 +6026,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -6267,7 +6274,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6775,14 +6782,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -7045,7 +7052,7 @@ static void provisional_32bit() int i,j; uint64_t is32=1; uint64_t lastbranch=1; - + for(i=0;i0) { @@ -7082,13 +7089,13 @@ static void provisional_32bit() uint64_t temp_is32=is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) //temp_is32&=branch_regs[j].is32; temp_is32&=p32[j]; } for(j=i;j=0;i--) { int hr; @@ -7373,7 +7380,7 @@ static void provisional_r32() } //requires_32bit[i]=r32; pr32[i]=r32; - + // Dirty registers which are 32-bit, require 32-bit input // as they will be written as 32-bit values for(hr=0;hristart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7974,17 +7981,28 @@ void new_dynarec_init() { SysPrintf("Init new dynarec\n"); out=(u_char *)BASE_ADDR; -#if BASE_ADDR_FIXED +#if defined(VITA) + if (mmap (out, 1< %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -8246,7 +8264,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -8906,7 +8924,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -8936,7 +8954,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0;j--) { - if(ba[j]==start+i*4+4) + if(ba[j]==start+i*4+4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9059,12 +9077,12 @@ int new_recompile_block(int addr) uint64_t temp_is32=current.is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4+8) + if(ba[j]==start+i*4+8) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9172,7 +9190,7 @@ int new_recompile_block(int addr) } } else { // First instruction expects CCREG to be allocated - if(i==0&&hr==HOST_CCREG) + if(i==0&&hr==HOST_CCREG) regs[i].regmap_entry[hr]=CCREG; else regs[i].regmap_entry[hr]=-1; @@ -9507,7 +9525,7 @@ int new_recompile_block(int addr) pagespan_alloc(¤t,i); break; } - + // Drop the upper half of registers that have become 32-bit current.uu|=current.is32&((1LL<=0;i--) { int hr; @@ -10065,7 +10083,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -10255,10 +10273,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -10428,10 +10446,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -10504,7 +10522,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -10546,7 +10564,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -10726,7 +10744,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -10901,7 +10919,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -10925,16 +10943,16 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ #ifndef FORCE32 provisional_r32(); u_int r32=0; - + for (i=slen-1;i>=0;i--) { int hr; @@ -11030,7 +11048,7 @@ int new_recompile_block(int addr) if((regs[i].was32>>dep2[i])&1) r32|=1LL<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -11618,9 +11636,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -11662,7 +11680,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From d9e6b42f224f10d8b6f2454da1f5d80735f8e0d6 Mon Sep 17 00:00:00 2001 From: frangarcj Date: Tue, 6 Sep 2016 13:54:38 +0200 Subject: (VITA) Another dynarec fix --- libpcsxcore/new_dynarec/new_dynarec.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1c0ab56..56c8a0b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7980,17 +7980,18 @@ void new_dynarec_clear_full() void new_dynarec_init() { SysPrintf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; -#if defined(VITA) - if (mmap (out, 1<>3)+256]|=1<<(page&7); } #ifdef __arm__ + #if defined(VITA) + sceKernelCloseVMDomain(); + #endif __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<>6)+8; // get target of bl - if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) { - unsigned int page=source>>12; - unsigned int map_value=memory_map[page]; - if(map_value>=0x80000000) return 0; - while(page<((source+len-1)>>12)) { - if((memory_map[++page]<<2)!=(map_value<<2)) return 0; - } - source = source+(map_value<<2); - } -#endif //printf("verify_dirty: %x %x %x\n",source,copy,len); return !memcmp((void *)source,(void *)copy,len); } @@ -322,13 +303,6 @@ void get_bounds(int addr,u_int *start,u_int *end) #endif if((*ptr&0xFF000000)!=0xeb000000) ptr++; assert((*ptr&0xFF000000)==0xeb000000); // bl instruction -#ifndef DISABLE_TLB - u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl - if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) { - if(memory_map[source>>12]>=0x80000000) source = 0; - else source = source+(memory_map[source>>12]<<2); - } -#endif *start=source; *end=source+len; } @@ -1037,13 +1011,11 @@ void emit_pcreladdr(u_int rt) void emit_loadreg(int r, int hr) { -#ifdef FORCE32 if(r&64) { SysPrintf("64bit load in 32bit mode!\n"); assert(0); return; } -#endif if((r&63)==0) emit_zeroreg(hr); else { @@ -1062,13 +1034,11 @@ void emit_loadreg(int r, int hr) } void emit_storereg(int r, int hr) { -#ifdef FORCE32 if(r&64) { SysPrintf("64bit store in 32bit mode!\n"); assert(0); return; } -#endif int addr=((int)reg)+((r&63)<>4); if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); @@ -2671,12 +2641,6 @@ void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i) emit_movimm(value,HOST_TEMPREG); } emit_storereg(i_regmap[hr],HOST_TEMPREG); -#ifndef FORCE32 - if((i_is32>>i_regmap[hr])&1) { - if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - } -#endif } } } @@ -2832,10 +2796,8 @@ static void mov_loadtype_adj(int type,int rs,int rt) } } -#ifdef PCSX #include "pcsxmem.h" #include "pcsxmem_inline.c" -#endif do_readstub(int n) { @@ -2859,7 +2821,6 @@ do_readstub(int n) rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); -#ifdef PCSX int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; reglist|=(1<=0); - int ftable=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - ftable=(int)readmemb; - if(type==LOADH_STUB||type==LOADHU_STUB) - ftable=(int)readmemh; - if(type==LOADW_STUB) - ftable=(int)readmem; -#ifndef FORCE32 - if(type==LOADD_STUB) - ftable=(int)readmemd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif - //emit_readword((int)&last_count,temp); - //emit_add(cc,temp,cc); - //emit_writeword(cc,(int)&Count); - //emit_mov(15,14); - emit_call((int)&indirect_jump_indexed); - //emit_callreg(rs); - //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15); -#ifndef PCSX - // We really shouldn't need to update the count here, - // but not doing so causes random crashes... - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } -#endif - //emit_popa(); - restore_regs(reglist); - //if((cc=get_reg(regmap,CCREG))>=0) { - // emit_loadreg(CCREG,cc); - //} - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { - assert(rt>=0); - if(type==LOADB_STUB) - emit_movsbl((int)&readmem_dword,rt); - if(type==LOADBU_STUB) - emit_movzbl((int)&readmem_dword,rt); - if(type==LOADH_STUB) - emit_movswl((int)&readmem_dword,rt); - if(type==LOADHU_STUB) - emit_movzwl((int)&readmem_dword,rt); - if(type==LOADW_STUB) - emit_readword((int)&readmem_dword,rt); - if(type==LOADD_STUB) { - emit_readword((int)&readmem_dword,rt); - if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); - } - } - emit_jmp(stubs[n][2]); // return address -#endif // !PCSX } -#ifdef PCSX // return memhandler, or get directly accessable address and return 0 u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) { @@ -3028,7 +2908,6 @@ u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) return l2<<1; } } -#endif inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { @@ -3037,7 +2916,6 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); -#ifdef PCSX u_int handler,host_addr=0,is_dynamic,far_call=0; int cc=get_reg(regmap,CCREG); if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) @@ -3111,87 +2989,6 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i } } restore_regs(reglist); -#else // if !PCSX - int ftable=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - ftable=(int)readmemb; - if(type==LOADH_STUB||type==LOADHU_STUB) - ftable=(int)readmemh; - if(type==LOADW_STUB) - ftable=(int)readmem; -#ifndef FORCE32 - if(type==LOADD_STUB) - ftable=(int)readmemd; -#endif - assert(ftable!=0); - if(target==0) - emit_movimm(addr,rs); - emit_writeword(rs,(int)&address); - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Theoretically we can have a pagefault here, if the TLB has never - // been enabled and the address is outside the range 80000000..BFFFFFFF - // Write out the registers so the pagefault can be handled. This is - // a very rare case and likely represents a bug. - int ds=regmap!=regs[i].regmap; - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); - if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); - else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); - } -#endif - //emit_shrimm(rs,16,1); - int cc=get_reg(regmap,CCREG); - if(cc<0) { - emit_loadreg(CCREG,2); - } - //emit_movimm(ftable,0); - emit_movimm(((u_int *)ftable)[addr>>16],0); - //emit_readword((int)&last_count,12); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Pagefault address - int ds=regmap!=regs[i].regmap; - emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); - } -#endif - //emit_add(12,2,2); - //emit_writeword(2,(int)&Count); - //emit_call(((u_int *)ftable)[addr>>16]); - emit_call((int)&indirect_jump); -#ifndef PCSX - // We really shouldn't need to update the count here, - // but not doing so causes random crashes... - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } -#endif - //emit_popa(); - restore_regs(reglist); - if(rt>=0) { - if(type==LOADB_STUB) - emit_movsbl((int)&readmem_dword,rt); - if(type==LOADBU_STUB) - emit_movzbl((int)&readmem_dword,rt); - if(type==LOADH_STUB) - emit_movswl((int)&readmem_dword,rt); - if(type==LOADHU_STUB) - emit_movzwl((int)&readmem_dword,rt); - if(type==LOADW_STUB) - emit_readword((int)&readmem_dword,rt); - if(type==LOADD_STUB) { - emit_readword((int)&readmem_dword,rt); - if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); - } - } -#endif // !PCSX } do_writestub(int n) @@ -3217,7 +3014,6 @@ do_writestub(int n) } assert(rs>=0); assert(rt>=0); -#ifdef PCSX int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; int reglist2=reglist|(1<=0); - int ftable=0; - if(type==STOREB_STUB) - ftable=(int)writememb; - if(type==STOREH_STUB) - ftable=(int)writememh; - if(type==STOREW_STUB) - ftable=(int)writemem; -#ifndef FORCE32 - if(type==STORED_STUB) - ftable=(int)writememd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_shrimm(rs,16,rs); - //emit_movmem_indexedx4(ftable,rs,rs); - if(type==STOREB_STUB) - emit_writebyte(rt,(int)&byte); - if(type==STOREH_STUB) - emit_writehword(rt,(int)&hword); - if(type==STOREW_STUB) - emit_writeword(rt,(int)&word); - if(type==STORED_STUB) { -#ifndef FORCE32 - emit_writeword(rt,(int)&dword); - emit_writeword(r?rth:rt,(int)&dword+4); -#else - SysPrintf("STORED_STUB\n"); -#endif - } - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - int real_rs=get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif - //emit_readword((int)&last_count,temp); - //emit_addimm(cc,2*stubs[n][5]+2,cc); - //emit_add(cc,temp,cc); - //emit_writeword(cc,(int)&Count); - emit_call((int)&indirect_jump_indexed); - //emit_callreg(rs); - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } - //emit_popa(); - restore_regs(reglist); - //if((cc=get_reg(regmap,CCREG))>=0) { - // emit_loadreg(CCREG,cc); - //} - emit_jmp(stubs[n][2]); // return address -#endif // !PCSX } inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) @@ -3359,7 +3082,6 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); -#ifdef PCSX u_int handler,host_addr=0; handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); if (handler==0) { @@ -3388,82 +3110,6 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); -#else // if !pcsx - int ftable=0; - if(type==STOREB_STUB) - ftable=(int)writememb; - if(type==STOREH_STUB) - ftable=(int)writememh; - if(type==STOREW_STUB) - ftable=(int)writemem; -#ifndef FORCE32 - if(type==STORED_STUB) - ftable=(int)writememd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_shrimm(rs,16,rs); - //emit_movmem_indexedx4(ftable,rs,rs); - if(type==STOREB_STUB) - emit_writebyte(rt,(int)&byte); - if(type==STOREH_STUB) - emit_writehword(rt,(int)&hword); - if(type==STOREW_STUB) - emit_writeword(rt,(int)&word); - if(type==STORED_STUB) { -#ifndef FORCE32 - emit_writeword(rt,(int)&dword); - emit_writeword(target?rth:rt,(int)&dword+4); -#else - SysPrintf("STORED_STUB\n"); -#endif - } - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - // rearmed note: load_all_consts prevents BIOS boot, some bug? - if((signed int)addr>=(signed int)0xC0000000) { - // Theoretically we can have a pagefault here, if the TLB has never - // been enabled and the address is outside the range 80000000..BFFFFFFF - // Write out the registers so the pagefault can be handled. This is - // a very rare case and likely represents a bug. - int ds=regmap!=regs[i].regmap; - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); - if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); - else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); - } -#endif - //emit_shrimm(rs,16,1); - int cc=get_reg(regmap,CCREG); - if(cc<0) { - emit_loadreg(CCREG,2); - } - //emit_movimm(ftable,0); - emit_movimm(((u_int *)ftable)[addr>>16],0); - //emit_readword((int)&last_count,12); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Pagefault address - int ds=regmap!=regs[i].regmap; - emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); - } -#endif - //emit_add(12,2,2); - //emit_writeword(2,(int)&Count); - //emit_call(((u_int *)ftable)[addr>>16]); - emit_call((int)&indirect_jump); - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } - //emit_popa(); - restore_regs(reglist); -#endif } do_unalignedwritestub(int n) @@ -3506,14 +3152,6 @@ do_unalignedwritestub(int n) emit_writeword(temp2,(int)&address); save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - real_rs=get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif emit_call((int)&indirect_jump_indexed); restore_regs(reglist); @@ -3587,10 +3221,7 @@ do_invstub(int n) int do_dirty_stub(int i) { assem_debug("do_dirty_stub %x\n",start+i*4); - u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start; - #ifdef PCSX - addr=(u_int)source; - #endif + u_int addr=(u_int)source; // Careful about the code output here, verify_dirty needs to parse it. #ifndef HAVE_ARMV7 emit_loadlp(addr,1); @@ -3651,93 +3282,6 @@ do_cop1stub(int n) emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); } -#ifndef DISABLE_TLB - -/* TLB */ - -int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr) -{ - if(c) { - if((signed int)addr>=(signed int)0xC0000000) { - // address_generation already loaded the const - emit_readword_dualindexedx4(FP,map,map); - } - else - return -1; // No mapping - } - else { - assert(s!=map); - emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map); - emit_addsr12(map,s,map); - // Schedule this while we wait on the load - //if(x) emit_xorimm(s,x,ar); - if(shift>=0) emit_shlimm(s,3,shift); - if(~a) emit_andimm(s,a,ar); - emit_readword_dualindexedx4(FP,map,map); - } - return map; -} -int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr) -{ - if(!c||(signed int)addr>=(signed int)0xC0000000) { - emit_test(map,map); - *jaddr=(int)out; - emit_js(0); - } - return map; -} - -int gen_tlb_addr_r(int ar, int map) { - if(map>=0) { - assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]); - output_w32(0xe0800100|rd_rn_rm(ar,ar,map)); - } -} - -int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr) -{ - if(c) { - if(addr<0x80800000||addr>=0xC0000000) { - // address_generation already loaded the const - emit_readword_dualindexedx4(FP,map,map); - } - else - return -1; // No mapping - } - else { - assert(s!=map); - emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map); - emit_addsr12(map,s,map); - // Schedule this while we wait on the load - //if(x) emit_xorimm(s,x,ar); - emit_readword_dualindexedx4(FP,map,map); - } - return map; -} -int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr) -{ - if(!c||addr<0x80800000||addr>=0xC0000000) { - emit_testimm(map,0x40000000); - *jaddr=(int)out; - emit_jne(0); - } -} - -int gen_tlb_addr_w(int ar, int map) { - if(map>=0) { - assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]); - output_w32(0xe0800100|rd_rn_rm(ar,ar,map)); - } -} - -// Generate the address of the memory_map entry, relative to dynarec_local -generate_map_const(u_int addr,int reg) { - //printf("generate_map_const(%x,%s)\n",addr,regname[reg]); - emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg); -} - -#else - static int do_tlb_r(int a, ...) { return 0; } static int do_tlb_r_branch(int a, ...) { return 0; } static int gen_tlb_addr_r(int a, ...) { return 0; } @@ -3745,8 +3289,6 @@ static int do_tlb_w(int a, ...) { return 0; } static int do_tlb_w_branch(int a, ...) { return 0; } static int gen_tlb_addr_w(int a, ...) { return 0; } -#endif // DISABLE_TLB - /* Special assem */ void shift_assemble_arm(int i,struct regstat *i_regs) @@ -3859,7 +3401,6 @@ void shift_assemble_arm(int i,struct regstat *i_regs) } } -#ifdef PCSX static void speculate_mov(int rs,int rt) { if(rt!=0) { @@ -3977,13 +3518,10 @@ static int get_ptr_mem_type(u_int a) return MTYPE_A000; return MTYPE_8000; } -#endif static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { int jaddr,type=0; - -#ifdef PCSX int mr=rs1[i]; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -4022,7 +3560,6 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) type=0; } } -#endif if(type==0) { @@ -4112,7 +3649,6 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) map=get_reg(i_regs->regmap,TLREG); assert(map>=0); reglist&=~(1<>11)&0x1f; //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&rt1[i]!=0) { -#ifdef MUPEN64 - emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0); - emit_movimm((source[i]>>11)&0x1f,1); - emit_writeword(0,(int)&PC); - emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); - if(copr==9) { - emit_readword((int)&last_count,ECX); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - } - emit_call((int)MFC0); - emit_readword((int)&readmem_dword,t); -#else emit_readword((int)®_cop0+copr*4,t); -#endif } } else if(opcode2[i]==4) // MTC0 @@ -4231,16 +3750,7 @@ void cop0_assemble(int i,struct regstat *i_regs) signed char s=get_reg(i_regs->regmap,rs1[i]); char copr=(source[i]>>11)&0x1f; assert(s>=0); -#ifdef MUPEN64 - emit_writeword(s,(int)&readmem_dword); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); - emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0); - emit_movimm((source[i]>>11)&0x1f,1); - emit_writeword(0,(int)&PC); - emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); -#else wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); -#endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc @@ -4253,7 +3763,6 @@ void cop0_assemble(int i,struct regstat *i_regs) // The interrupt must be taken immediately, because a subsequent // instruction might disable interrupts again. if(copr==12||copr==13) { -#ifdef PCSX if (is_delayslot) { // burn cycles to cause cc_interrupt, which will // reschedule next_interupt. Relies on CCREG from above. @@ -4267,7 +3776,6 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_loadreg(rs1[i],s); return; } -#endif emit_movimm(start+i*4+4,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,(int)&pcaddr); emit_movimm(0,HOST_TEMPREG); @@ -4275,16 +3783,12 @@ void cop0_assemble(int i,struct regstat *i_regs) } //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else -#ifdef PCSX if(s==HOST_CCREG) emit_loadreg(rs1[i],1); else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0); -#else - emit_call((int)MTC0); -#endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); emit_readword((int)&next_interupt,HOST_TEMPREG); @@ -4307,25 +3811,6 @@ void cop0_assemble(int i,struct regstat *i_regs) else { assert(opcode2[i]==0x10); -#ifndef DISABLE_TLB - if((source[i]&0x3f)==0x01) // TLBR - emit_call((int)TLBR); - if((source[i]&0x3f)==0x02) // TLBWI - emit_call((int)TLBWI_new); - if((source[i]&0x3f)==0x06) { // TLBWR - // The TLB entry written by TLBWR is dependent on the count, - // so update the cycle count - emit_readword((int)&last_count,ECX); - if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - emit_call((int)TLBWR_new); - } - if((source[i]&0x3f)==0x08) // TLBP - emit_call((int)TLBP); -#endif -#ifdef PCSX if((source[i]&0x3f)==0x10) // RFE { emit_readword((int)&Status,0); @@ -4334,15 +3819,6 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_orrshr_imm(1,2,0); emit_writeword(0,(int)&Status); } -#else - if((source[i]&0x3f)==0x18) // ERET - { - int count=ccadj[i]; - if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here? - emit_jmp((int)jump_eret); - } -#endif } } @@ -4667,624 +4143,23 @@ void cop1_unusable(int i,struct regstat *i_regs) void cop1_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - if (opcode2[i]==0) { // MFC1 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl); - emit_readword_indexed(0,tl,tl); - } - } - else if (opcode2[i]==1) { // DMFC1 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - signed char th=get_reg(i_regs->regmap,rt1[i]|64); - if(tl>=0) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl); - if(th>=0) emit_readword_indexed(4,tl,th); - emit_readword_indexed(0,tl,tl); - } - } - else if (opcode2[i]==4) { // MTC1 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - signed char temp=get_reg(i_regs->regmap,-1); - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_writeword_indexed(sl,0,temp); - } - else if (opcode2[i]==5) { // DMTC1 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl; - signed char temp=get_reg(i_regs->regmap,-1); - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_writeword_indexed(sh,4,temp); - emit_writeword_indexed(sl,0,temp); - } - else if (opcode2[i]==2) // CFC1 - { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0) { - u_int copr=(source[i]>>11)&0x1f; - if(copr==0) emit_readword((int)&FCR0,tl); - if(copr==31) emit_readword((int)&FCR31,tl); - } - } - else if (opcode2[i]==6) // CTC1 - { - signed char sl=get_reg(i_regs->regmap,rs1[i]); - u_int copr=(source[i]>>11)&0x1f; - assert(sl>=0); - if(copr==31) - { - emit_writeword(sl,(int)&FCR31); - // Set the rounding mode - //FIXME - //char temp=get_reg(i_regs->regmap,-1); - //emit_andimm(sl,3,temp); - //emit_fldcw_indexed((int)&rounding_modes,temp); - } - } -#else cop1_unusable(i, i_regs); -#endif } void fconv_assemble_arm(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,15); - emit_ftosizs(15,15); // float->int, truncate - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsts(15,temp); - return; - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - emit_ftosizd(7,13); // double->int, truncate - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsts(13,temp); - return; - } - - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsitos(13,15); - emit_fsts(15,temp); - return; - } - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - emit_fsitod(13,7); - emit_vstr(7,temp); - return; - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - emit_fcvtds(13,7); - emit_vstr(7,temp); - return; - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fcvtsd(7,13); - emit_fsts(13,temp); - return; - } - #endif - - // C emulation code - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_w); - } - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_w); - } - if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_l); - } - if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_l); - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_l_s); - } - - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_l_d); - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_w_s); - } - - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_w_d); - } - - restore_regs(reglist); -#else cop1_unusable(i, i_regs); -#endif } #define fconv_assemble fconv_assemble_arm void fcomp_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char fs=get_reg(i_regs->regmap,FSREG); - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char cs=get_reg(i_regs->regmap,CSREG); - assert(cs>=0); - emit_testimm(cs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - if((source[i]&0x3f)==0x30) { - emit_andimm(fs,~0x800000,fs); - return; - } - - if((source[i]&0x3e)==0x38) { - // sf/ngle - these should throw exceptions for NaNs - emit_andimm(fs,~0x800000,fs); - return; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_orimm(fs,0x800000,fs); - emit_flds(temp,14); - emit_flds(HOST_TEMPREG,15); - emit_fcmps(14,15); - emit_fmstat(); - if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s - if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s - if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s - if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s - if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s - if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s - if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s - if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s - if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s - if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s - if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s - if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s - if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s - return; - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_orimm(fs,0x800000,fs); - emit_vldr(temp,6); - emit_vldr(HOST_TEMPREG,7); - emit_fcmpd(6,7); - emit_fmstat(); - if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d - if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d - if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d - if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d - if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d - if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d - if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d - if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d - if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d - if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d - if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d - if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d - if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d - return; - } - #endif - - // C only - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG); - if((source[i]&0x3f)==0x30) emit_call((int)c_f_s); - if((source[i]&0x3f)==0x31) emit_call((int)c_un_s); - if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s); - if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s); - if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s); - if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s); - if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s); - if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s); - if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s); - if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s); - if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s); - if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s); - if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s); - if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s); - if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s); - if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],ARG2_REG); - if((source[i]&0x3f)==0x30) emit_call((int)c_f_d); - if((source[i]&0x3f)==0x31) emit_call((int)c_un_d); - if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d); - if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d); - if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d); - if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d); - if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d); - if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d); - if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d); - if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d); - if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d); - if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d); - if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d); - if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d); - if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d); - if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d); - } - restore_regs(reglist); - emit_loadreg(FSREG,fs); -#else cop1_unusable(i, i_regs); -#endif } void float_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char cs=get_reg(i_regs->regmap,CSREG); - assert(cs>=0); - emit_testimm(cs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if((source[i]&0x3f)==6) // mov - { - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG); - emit_readword_indexed(0,temp,temp); - emit_writeword_indexed(temp,0,HOST_TEMPREG); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG); - emit_vldr(temp,7); - emit_vstr(7,HOST_TEMPREG); - } - } - return; - } - - if((source[i]&0x3f)>3) - { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,15); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==4) // sqrt - emit_fsqrts(15,15); - if((source[i]&0x3f)==5) // abs - emit_fabss(15,15); - if((source[i]&0x3f)==7) // neg - emit_fnegs(15,15); - emit_fsts(15,temp); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==4) // sqrt - emit_fsqrtd(7,7); - if((source[i]&0x3f)==5) // abs - emit_fabsd(7,7); - if((source[i]&0x3f)==7) // neg - emit_fnegd(7,7); - emit_vstr(7,temp); - } - return; - } - if((source[i]&0x3f)<4) - { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - } - if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_flds(temp,15); - emit_flds(HOST_TEMPREG,13); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - } - if((source[i]&0x3f)==0) emit_fadds(15,13,15); - if((source[i]&0x3f)==1) emit_fsubs(15,13,15); - if((source[i]&0x3f)==2) emit_fmuls(15,13,15); - if((source[i]&0x3f)==3) emit_fdivs(15,13,15); - if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) { - emit_fsts(15,HOST_TEMPREG); - }else{ - emit_fsts(15,temp); - } - } - else if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_vldr(temp,7); - emit_vldr(HOST_TEMPREG,6); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - } - if((source[i]&0x3f)==0) emit_faddd(7,6,7); - if((source[i]&0x3f)==1) emit_fsubd(7,6,7); - if((source[i]&0x3f)==2) emit_fmuld(7,6,7); - if((source[i]&0x3f)==3) emit_fdivd(7,6,7); - if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) { - emit_vstr(7,HOST_TEMPREG); - }else{ - emit_vstr(7,temp); - } - } - } - else { - if(opcode2[i]==0x10) { - emit_flds(temp,15); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==0) emit_fadds(15,15,15); - if((source[i]&0x3f)==1) emit_fsubs(15,15,15); - if((source[i]&0x3f)==2) emit_fmuls(15,15,15); - if((source[i]&0x3f)==3) emit_fdivs(15,15,15); - emit_fsts(15,temp); - } - else if(opcode2[i]==0x11) { - emit_vldr(temp,7); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==0) emit_faddd(7,7,7); - if((source[i]&0x3f)==1) emit_fsubd(7,7,7); - if((source[i]&0x3f)==2) emit_fmuld(7,7,7); - if((source[i]&0x3f)==3) emit_fdivd(7,7,7); - emit_vstr(7,temp); - } - } - return; - } - #endif - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - if((source[i]&0x3f)<4) { - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG); - }else{ - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - } - switch(source[i]&0x3f) - { - case 0x00: emit_call((int)add_s);break; - case 0x01: emit_call((int)sub_s);break; - case 0x02: emit_call((int)mul_s);break; - case 0x03: emit_call((int)div_s);break; - case 0x04: emit_call((int)sqrt_s);break; - case 0x05: emit_call((int)abs_s);break; - case 0x06: emit_call((int)mov_s);break; - case 0x07: emit_call((int)neg_s);break; - } - restore_regs(reglist); - } - if(opcode2[i]==0x11) { // Double precision - save_regs(reglist); - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - if((source[i]&0x3f)<4) { - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],ARG2_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG); - }else{ - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - } - switch(source[i]&0x3f) - { - case 0x00: emit_call((int)add_d);break; - case 0x01: emit_call((int)sub_d);break; - case 0x02: emit_call((int)mul_d);break; - case 0x03: emit_call((int)div_d);break; - case 0x04: emit_call((int)sqrt_d);break; - case 0x05: emit_call((int)abs_d);break; - case 0x06: emit_call((int)mov_d);break; - case 0x07: emit_call((int)neg_d);break; - } - restore_regs(reglist); - } -#else cop1_unusable(i, i_regs); -#endif } void multdiv_assemble_arm(int i,struct regstat *i_regs) @@ -5397,183 +4272,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) } } else // 64-bit -#ifndef FORCE32 - { - if(opcode2[i]==0x1C) // DMULT - { - assert(opcode2[i]!=0x1C); - signed char m1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char m1l=get_reg(i_regs->regmap,rs1[i]); - signed char m2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char m2l=get_reg(i_regs->regmap,rs2[i]); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - emit_pushreg(m2h); - emit_pushreg(m2l); - emit_pushreg(m1h); - emit_pushreg(m1l); - emit_call((int)&mult64); - emit_popreg(m1l); - emit_popreg(m1h); - emit_popreg(m2l); - emit_popreg(m2h); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1D) // DMULTU - { - signed char m1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char m1l=get_reg(i_regs->regmap,rs1[i]); - signed char m2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char m2l=get_reg(i_regs->regmap,rs2[i]); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - save_regs(CALLER_SAVE_REGS); - if(m1l!=0) emit_mov(m1l,0); - if(m1h==0) emit_readword((int)&dynarec_local,1); - else if(m1h>1) emit_mov(m1h,1); - if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2); - else if(m2l>2) emit_mov(m2l,2); - if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3); - else if(m2h>3) emit_mov(m2h,3); - emit_call((int)&multu64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - /*signed char temp=get_reg(i_regs->regmap,-1); - signed char rh=get_reg(i_regs->regmap,HIREG|64); - signed char rl=get_reg(i_regs->regmap,HIREG); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - assert(temp>=0); - //emit_mov(m1l,EAX); - //emit_mul(m2l); - emit_umull(rl,rh,m1l,m2l); - emit_storereg(LOREG,rl); - emit_mov(rh,temp); - //emit_mov(m1h,EAX); - //emit_mul(m2l); - emit_umull(rl,rh,m1h,m2l); - emit_adds(rl,temp,temp); - emit_adcimm(rh,0,rh); - emit_storereg(HIREG,rh); - //emit_mov(m2h,EAX); - //emit_mul(m1l); - emit_umull(rl,rh,m1l,m2h); - emit_adds(rl,temp,temp); - emit_adcimm(rh,0,rh); - emit_storereg(LOREG|64,temp); - emit_mov(rh,temp); - //emit_mov(m2h,EAX); - //emit_mul(m1h); - emit_umull(rl,rh,m1h,m2h); - emit_adds(rl,temp,rl); - emit_loadreg(HIREG,temp); - emit_adcimm(rh,0,rh); - emit_adds(rl,temp,rl); - emit_adcimm(rh,0,rh); - // DEBUG - /* - emit_pushreg(m2h); - emit_pushreg(m2l); - emit_pushreg(m1h); - emit_pushreg(m1l); - emit_call((int)&multu64); - emit_popreg(m1l); - emit_popreg(m1h); - emit_popreg(m2l); - emit_popreg(m2h); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG - if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG - */ - // Shouldn't be necessary - //char loh=get_reg(i_regs->regmap,LOREG|64); - //char lol=get_reg(i_regs->regmap,LOREG); - //if(loh>=0) emit_loadreg(LOREG|64,loh); - //if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1E) // DDIV - { - signed char d1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char d1l=get_reg(i_regs->regmap,rs1[i]); - signed char d2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char d2l=get_reg(i_regs->regmap,rs2[i]); - assert(d1h>=0); - assert(d2h>=0); - assert(d1l>=0); - assert(d2l>=0); - save_regs(CALLER_SAVE_REGS); - if(d1l!=0) emit_mov(d1l,0); - if(d1h==0) emit_readword((int)&dynarec_local,1); - else if(d1h>1) emit_mov(d1h,1); - if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2); - else if(d2l>2) emit_mov(d2l,2); - if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); - else if(d2h>3) emit_mov(d2h,3); - emit_call((int)&div64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1F) // DDIVU - { - //u_int hr,reglist=0; - //for(hr=0;hrregmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<regmap,rs1[i]|64); - signed char d1l=get_reg(i_regs->regmap,rs1[i]); - signed char d2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char d2l=get_reg(i_regs->regmap,rs2[i]); - assert(d1h>=0); - assert(d2h>=0); - assert(d1l>=0); - assert(d2l>=0); - save_regs(CALLER_SAVE_REGS); - if(d1l!=0) emit_mov(d1l,0); - if(d1h==0) emit_readword((int)&dynarec_local,1); - else if(d1h>1) emit_mov(d1h,1); - if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2); - else if(d2l>2) emit_mov(d2l,2); - if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); - else if(d2h>3) emit_mov(d2h,3); - emit_call((int)&divu64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - } -#else - assert(0); -#endif + assert(0); } else { @@ -5634,32 +4333,6 @@ void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -// Sign-extend to 64 bits and write out upper half of a register -// This is useful where we have a 32-bit value in a register, and want to -// keep it in a 32-bit register, but can't guarantee that it won't be read -// as a 64-bit value later. -void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu) -{ -#ifndef FORCE32 - if(is32_pre==is32) return; - int hr,reg; - for(hr=0;hr=0) { - if((dirty>>hr)&1) { - if( ((is32_pre&~is32&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } - } - } - //} - } - } -#endif -} - void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; @@ -5777,13 +4450,7 @@ void do_clear_cache() } // CPU-architecture-specific initialization -void arch_init() { -#ifndef DISABLE_COP1 - rounding_modes[0]=0x0<<22; // round - rounding_modes[1]=0x3<<22; // trunc - rounding_modes[2]=0x1<<22; // ceil - rounding_modes[3]=0x2<<22; // floor -#endif +static void arch_init() { } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 2254638..2d10ac7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -9,11 +9,6 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 #define HAVE_CONDITIONAL_CALL 1 -#define DISABLE_TLB 1 -//#define MUPEN64 -#define FORCE32 1 -#define DISABLE_COP1 1 -#define PCSX 1 #define RAM_SIZE 0x200000 #ifndef __ARM_ARCH_7A__ @@ -25,11 +20,7 @@ #define BASE_ADDR_FIXED 0 #endif -#ifdef FORCE32 #define REG_SHIFT 2 -#else -#define REG_SHIFT 3 -#endif /* ARM calling convention: r0-r3, r12: caller-save diff --git a/libpcsxcore/new_dynarec/fpu.c b/libpcsxcore/new_dynarec/fpu.c deleted file mode 100644 index a189a53..0000000 --- a/libpcsxcore/new_dynarec/fpu.c +++ /dev/null @@ -1,394 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus - fpu.c * - * Copyright (C) 2010 Ari64 * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include - -extern int FCR0, FCR31; - -void cvt_s_w(int *source,float *dest) -{ - *dest = *source; -} -void cvt_d_w(int *source,double *dest) -{ - *dest = *source; -} -void cvt_s_l(long long *source,float *dest) -{ - *dest = *source; -} -void cvt_d_l(long long *source,double *dest) -{ - *dest = *source; -} -void cvt_d_s(float *source,double *dest) -{ - *dest = *source; -} -void cvt_s_d(double *source,float *dest) -{ - *dest = *source; -} - -void round_l_s(float *source,long long *dest) -{ - *dest = roundf(*source); -} -void round_w_s(float *source,int *dest) -{ - *dest = roundf(*source); -} -void trunc_l_s(float *source,long long *dest) -{ - *dest = truncf(*source); -} -void trunc_w_s(float *source,int *dest) -{ - *dest = truncf(*source); -} -void ceil_l_s(float *source,long long *dest) -{ - *dest = ceilf(*source); -} -void ceil_w_s(float *source,int *dest) -{ - *dest = ceilf(*source); -} -void floor_l_s(float *source,long long *dest) -{ - *dest = floorf(*source); -} -void floor_w_s(float *source,int *dest) -{ - *dest = floorf(*source); -} - -void round_l_d(double *source,long long *dest) -{ - *dest = round(*source); -} -void round_w_d(double *source,int *dest) -{ - *dest = round(*source); -} -void trunc_l_d(double *source,long long *dest) -{ - *dest = trunc(*source); -} -void trunc_w_d(double *source,int *dest) -{ - *dest = trunc(*source); -} -void ceil_l_d(double *source,long long *dest) -{ - *dest = ceil(*source); -} -void ceil_w_d(double *source,int *dest) -{ - *dest = ceil(*source); -} -void floor_l_d(double *source,long long *dest) -{ - *dest = floor(*source); -} -void floor_w_d(double *source,int *dest) -{ - *dest = floor(*source); -} - -void cvt_w_s(float *source,int *dest) -{ - switch(FCR31&3) - { - case 0: round_w_s(source,dest);return; - case 1: trunc_w_s(source,dest);return; - case 2: ceil_w_s(source,dest);return; - case 3: floor_w_s(source,dest);return; - } -} -void cvt_w_d(double *source,int *dest) -{ - switch(FCR31&3) - { - case 0: round_w_d(source,dest);return; - case 1: trunc_w_d(source,dest);return; - case 2: ceil_w_d(source,dest);return; - case 3: floor_w_d(source,dest);return; - } -} -void cvt_l_s(float *source,long long *dest) -{ - switch(FCR31&3) - { - case 0: round_l_s(source,dest);return; - case 1: trunc_l_s(source,dest);return; - case 2: ceil_l_s(source,dest);return; - case 3: floor_l_s(source,dest);return; - } -} -void cvt_l_d(double *source,long long *dest) -{ - switch(FCR31&3) - { - case 0: round_l_d(source,dest);return; - case 1: trunc_l_d(source,dest);return; - case 2: ceil_l_d(source,dest);return; - case 3: floor_l_d(source,dest);return; - } -} - -void c_f_s() -{ - FCR31 &= ~0x800000; -} -void c_un_s(float *source,float *target) -{ - FCR31=(isnan(*source) || isnan(*target)) ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_eq_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ueq_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_olt_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ult_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_ole_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ule_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_sf_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} -void c_ngle_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} - -void c_seq_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngl_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_lt_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_nge_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_le_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngt_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_f_d() -{ - FCR31 &= ~0x800000; -} -void c_un_d(double *source,double *target) -{ - FCR31=(isnan(*source) || isnan(*target)) ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_eq_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ueq_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_olt_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ult_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_ole_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ule_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_sf_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} -void c_ngle_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} - -void c_seq_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngl_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_lt_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_nge_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_le_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngt_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - - -void add_s(float *source1,float *source2,float *target) -{ - *target=(*source1)+(*source2); -} -void sub_s(float *source1,float *source2,float *target) -{ - *target=(*source1)-(*source2); -} -void mul_s(float *source1,float *source2,float *target) -{ - *target=(*source1)*(*source2); -} -void div_s(float *source1,float *source2,float *target) -{ - *target=(*source1)/(*source2); -} -void sqrt_s(float *source,float *target) -{ - *target=sqrtf(*source); -} -void abs_s(float *source,float *target) -{ - *target=fabsf(*source); -} -void mov_s(float *source,float *target) -{ - *target=*source; -} -void neg_s(float *source,float *target) -{ - *target=-(*source); -} -void add_d(double *source1,double *source2,double *target) -{ - *target=(*source1)+(*source2); -} -void sub_d(double *source1,double *source2,double *target) -{ - *target=(*source1)-(*source2); -} -void mul_d(double *source1,double *source2,double *target) -{ - *target=(*source1)*(*source2); -} -void div_d(double *source1,double *source2,double *target) -{ - *target=(*source1)/(*source2); -} -void sqrt_d(double *source,double *target) -{ - *target=sqrt(*source); -} -void abs_d(double *source,double *target) -{ - *target=fabs(*source); -} -void mov_d(double *source,double *target) -{ - *target=*source; -} -void neg_d(double *source,double *target) -{ - *target=-(*source); -} - diff --git a/libpcsxcore/new_dynarec/fpu.h b/libpcsxcore/new_dynarec/fpu.h deleted file mode 100644 index 881ddbe..0000000 --- a/libpcsxcore/new_dynarec/fpu.h +++ /dev/null @@ -1,74 +0,0 @@ -void cvt_s_w(int *source,float *dest); -void cvt_d_w(int *source,double *dest); -void cvt_s_l(long long *source,float *dest); -void cvt_d_l(long long *source,double *dest); -void cvt_w_s(float *source,int *dest); -void cvt_w_d(double *source,int *dest); -void cvt_l_s(float *source,long long *dest); -void cvt_l_d(double *source,long long *dest); -void cvt_d_s(float *source,double *dest); -void cvt_s_d(double *source,float *dest); -void round_l_s(float *source,long long *dest); -void round_w_s(float *source,int *dest); -void trunc_l_s(float *source,long long *dest); -void trunc_w_s(float *source,int *dest); -void ceil_l_s(float *source,long long *dest); -void ceil_w_s(float *source,int *dest); -void floor_l_s(float *source,long long *dest); -void floor_w_s(float *source,int *dest); -void round_l_d(double *source,long long *dest); -void round_w_d(double *source,int *dest); -void trunc_l_d(double *source,long long *dest); -void trunc_w_d(double *source,int *dest); -void ceil_l_d(double *source,long long *dest); -void ceil_w_d(double *source,int *dest); -void floor_l_d(double *source,long long *dest); -void floor_w_d(double *source,int *dest); -void c_f_s(); -void c_un_s(float *source,float *target); -void c_eq_s(float *source,float *target); -void c_ueq_s(float *source,float *target); -void c_olt_s(float *source,float *target); -void c_ult_s(float *source,float *target); -void c_ole_s(float *source,float *target); -void c_ule_s(float *source,float *target); -void c_sf_s(float *source,float *target); -void c_ngle_s(float *source,float *target); -void c_seq_s(float *source,float *target); -void c_ngl_s(float *source,float *target); -void c_lt_s(float *source,float *target); -void c_nge_s(float *source,float *target); -void c_le_s(float *source,float *target); -void c_ngt_s(float *source,float *target); -void c_f_d(); -void c_un_d(double *source,double *target); -void c_eq_d(double *source,double *target); -void c_ueq_d(double *source,double *target); -void c_olt_d(double *source,double *target); -void c_ult_d(double *source,double *target); -void c_ole_d(double *source,double *target); -void c_ule_d(double *source,double *target); -void c_sf_d(double *source,double *target); -void c_ngle_d(double *source,double *target); -void c_seq_d(double *source,double *target); -void c_ngl_d(double *source,double *target); -void c_lt_d(double *source,double *target); -void c_nge_d(double *source,double *target); -void c_le_d(double *source,double *target); -void c_ngt_d(double *source,double *target); -void add_s(float *source1,float *source2,float *target); -void sub_s(float *source1,float *source2,float *target); -void mul_s(float *source1,float *source2,float *target); -void div_s(float *source1,float *source2,float *target); -void sqrt_s(float *source,float *target); -void abs_s(float *source,float *target); -void mov_s(float *source,float *target); -void neg_s(float *source,float *target); -void add_d(double *source1,double *source2,double *target); -void sub_d(double *source1,double *source2,double *target); -void mul_d(double *source1,double *source2,double *target); -void div_d(double *source1,double *source2,double *target); -void sqrt_d(double *source,double *target); -void abs_d(double *source,double *target); -void mov_d(double *source,double *target); -void neg_d(double *source,double *target); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 5120df0..f1034e6 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -147,11 +147,7 @@ struct ll_entry char shadow[1048576] __attribute__((aligned(16))); void *copy; int expirep; -#ifndef PCSX - u_int using_tlb; -#else static const u_int using_tlb=0; -#endif int new_dynarec_did_compile; int new_dynarec_hacks; u_int stop_after_jal; @@ -303,87 +299,21 @@ static int CLOCK_ADJUST(int x) return (x * cycle_multiplier + s * 50) / 100; } -static void tlb_hacks() -{ -#ifndef DISABLE_TLB - // Goldeneye hack - if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0) - { - u_int addr; - int n; - switch (ROM_HEADER->Country_code&0xFF) - { - case 0x45: // U - addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E - addr=0x329f0; - break; - default: - // Unknown country code - addr=0; - break; - } - u_int rom_addr=(u_int)rom; - #ifdef ROM_COPY - // Since memory_map is 32-bit, on 64-bit systems the rom needs to be - // in the lower 4G of memory to use this hack. Copy it if necessary. - if((void *)rom>(void *)0xffffffff) { - munmap(ROM_COPY, 67108864); - if(mmap(ROM_COPY, 12582912, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0) <= 0) {printf("mmap() failed\n");} - memcpy(ROM_COPY,rom,12582912); - rom_addr=(u_int)ROM_COPY; - } - #endif - if(addr) { - for(n=0x7F000;n<0x80000;n++) { - memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000; - } - } - } -#endif -} - static u_int get_page(u_int vaddr) { -#ifndef PCSX - u_int page=(vaddr^0x80000000)>>12; -#else u_int page=vaddr&~0xe0000000; if (page < 0x1000000) page &= ~0x0e00000; // RAM mirrors page>>=12; -#endif -#ifndef DISABLE_TLB - if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12; -#endif if(page>2048) page=2048+(page&2047); return page; } -#ifndef PCSX -static u_int get_vpage(u_int vaddr) -{ - u_int vpage=(vaddr^0x80000000)>>12; -#ifndef DISABLE_TLB - if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead -#endif - if(vpage>2048) vpage=2048+(vpage&2047); - return vpage; -} -#else // no virtual mem in PCSX static u_int get_vpage(u_int vaddr) { return get_page(vaddr); } -#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -416,16 +346,7 @@ void *get_addr(u_int vaddr) //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; inv_code_start=inv_code_end=~0; -#ifndef DISABLE_TLB - memory_map[vaddr>>12]|=0x40000000; -#endif if(vpage<2048) { -#ifndef DISABLE_TLB - if(tlb_LUT_r[vaddr>>12]) { - invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0; - memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000; - } -#endif restore_candidate[vpage>>3]|=1<<(vpage&7); } else restore_candidate[page>>3]|=1<<(page&7); @@ -802,119 +723,6 @@ void alloc_all(struct regstat *cur,int i) } } -#ifndef FORCE32 -void div64(int64_t dividend,int64_t divisor) -{ - lo=dividend/divisor; - hi=dividend%divisor; - //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} -void divu64(uint64_t dividend,uint64_t divisor) -{ - lo=dividend/divisor; - hi=dividend%divisor; - //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} - -void mult64(uint64_t m1,uint64_t m2) -{ - unsigned long long int op1, op2, op3, op4; - unsigned long long int result1, result2, result3, result4; - unsigned long long int temp1, temp2, temp3, temp4; - int sign = 0; - - if (m1 < 0) - { - op2 = -m1; - sign = 1 - sign; - } - else op2 = m1; - if (m2 < 0) - { - op4 = -m2; - sign = 1 - sign; - } - else op4 = m2; - - op1 = op2 & 0xFFFFFFFF; - op2 = (op2 >> 32) & 0xFFFFFFFF; - op3 = op4 & 0xFFFFFFFF; - op4 = (op4 >> 32) & 0xFFFFFFFF; - - temp1 = op1 * op3; - temp2 = (temp1 >> 32) + op1 * op4; - temp3 = op2 * op3; - temp4 = (temp3 >> 32) + op2 * op4; - - result1 = temp1 & 0xFFFFFFFF; - result2 = temp2 + (temp3 & 0xFFFFFFFF); - result3 = (result2 >> 32) + temp4; - result4 = (result3 >> 32); - - lo = result1 | (result2 << 32); - hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - if (sign) - { - hi = ~hi; - if (!lo) hi++; - else lo = ~lo + 1; - } -} - -void multu64(uint64_t m1,uint64_t m2) -{ - unsigned long long int op1, op2, op3, op4; - unsigned long long int result1, result2, result3, result4; - unsigned long long int temp1, temp2, temp3, temp4; - - op1 = m1 & 0xFFFFFFFF; - op2 = (m1 >> 32) & 0xFFFFFFFF; - op3 = m2 & 0xFFFFFFFF; - op4 = (m2 >> 32) & 0xFFFFFFFF; - - temp1 = op1 * op3; - temp2 = (temp1 >> 32) + op1 * op4; - temp3 = op2 * op3; - temp4 = (temp3 >> 32) + op2 * op4; - - result1 = temp1 & 0xFFFFFFFF; - result2 = temp2 + (temp3 & 0xFFFFFFFF); - result3 = (result2 >> 32) + temp4; - result4 = (result3 >> 32); - - lo = result1 | (result2 << 32); - hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - - //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} - -uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits) -{ - if(bits) { - original<<=64-bits; - original>>=64-bits; - loaded<<=bits; - original|=loaded; - } - else original=loaded; - return original; -} -uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) -{ - if(bits^56) { - original>>=64-(bits^56); - original<<=64-(bits^56); - loaded>>=bits^56; - original|=loaded; - } - else original=loaded; - return original; -} -#endif - #ifdef __i386__ #include "assem_x86.c" #endif @@ -1107,18 +915,6 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) // Don't trap writes invalid_code[block]=1; -#ifndef DISABLE_TLB - // If there is a valid TLB entry for this page, remove write protect - if(tlb_LUT_w[block]) { - assert(tlb_LUT_r[block]==tlb_LUT_w[block]); - // CHECK: Is this right? - memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2; - u_int real_block=tlb_LUT_w[block]>>12; - invalid_code[real_block]=1; - if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2; - } - else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2; -#endif #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); @@ -1147,14 +943,6 @@ void invalidate_block(u_int block) if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; } } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif } head=head->next; } @@ -1163,7 +951,6 @@ void invalidate_block(u_int block) void invalidate_addr(u_int addr) { -#ifdef PCSX //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } @@ -1217,7 +1004,6 @@ void invalidate_addr(u_int addr) return; } } -#endif invalidate_block(addr>>12); } @@ -1239,19 +1025,6 @@ void invalidate_all_pages() #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); #endif - #ifndef DISABLE_TLB - // TLB - for(page=0;page<0x100000;page++) { - if(tlb_LUT_r[page]) { - memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2; - if(!tlb_LUT_w[page]||!invalid_code[page]) - memory_map[page]|=0x40000000; // Write protect - } - else memory_map[page]=-1; - if(page==0x80000) page=0xC0000; - } - tlb_hacks(); - #endif } // Add an entry to jump_out after making a link @@ -1290,13 +1063,6 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } -#ifndef DISABLE_TLB - if((signed int)head->vaddr>=(signed int)0xC0000000) { - u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); - //printf("addr=%x start=%x end=%x\n",addr,start,end); - if(addr=end) inv=1; - } -#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1304,9 +1070,6 @@ void clean_blocks(u_int page) void * clean_addr=(void *)get_clean_addr((int)head->addr); if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { u_int ppage=page; -#ifndef DISABLE_TLB - if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12; -#endif inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); @@ -2044,12 +1807,6 @@ void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32 if((dirty>>hr)&1) { if(regmap[hr]<64) { emit_storereg(r,hr); -#ifndef FORCE32 - if((is32>>regmap[hr])&1) { - emit_sarimm(hr,31,hr); - emit_storereg(r|64,hr); - } -#endif }else{ emit_storereg(r|64,hr); } @@ -2087,12 +1844,6 @@ void rlist() for(i=0;i<32;i++) printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]); printf("\n"); -#ifndef DISABLE_COP1 - printf("TRACE: "); - for(i=0;i<32;i++) - printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i])); - printf("\n"); -#endif } void enabletrace() @@ -2818,7 +2569,6 @@ void load_assemble(int i,struct regstat *i_regs) //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... -#ifdef PCSX if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) ||rt1[i]==0) { // could be FIFO, must perform the read @@ -2827,7 +2577,6 @@ void load_assemble(int i,struct regstat *i_regs) tl=get_reg(i_regs->regmap,-1); assert(tl>=0); } -#endif if(offset||s<0||c) addr=tl; else addr=s; //if(tl<0) tl=get_reg(i_regs->regmap,-1); @@ -3138,32 +2887,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifndef PCSX - #ifdef R29_HACK - // Strmnnrmn's speed hack - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - emit_cmpimm(addr,RAM_SIZE); - #ifdef DESTRUCTIVE_SHIFT - if(s==addr) emit_mov(s,temp); - #endif - #ifdef R29_HACK - memtarget=1; - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - { - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); - } - #else - jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); - #endif + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); } else if(ram_offset&&memtarget) { emit_addimm(addr,ram_offset,HOST_TEMPREG); @@ -3245,14 +2969,12 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } -#ifdef PCSX if(jaddr) { // PCSX store handlers don't check invcode again reglist|=1<waswritten&(1<regmap,FTEMP|64); - tl=get_reg(i_regs->regmap,FTEMP); - s=get_reg(i_regs->regmap,rs1[i]); - temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); - offset=imm[i]; - assert(tl>=0); - assert(rs1[i]>0); - assert(temp>=0); - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[HOST_CCREG]==CCREG) reglist&=~(1<wasconst>>s)&1; - if(s>=0) c=(i_regs->wasconst>>s)&1; - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - if (opcode[i]==0x39) { // SWC1 (get float address) - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],tl); - } - if (opcode[i]==0x3D) { // SDC1 (get double address) - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],tl); - } - // Generate address + offset - if(!using_tlb) { - if(!c) - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - } - else - { - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<>16)&0x1f],temp); - } - if (opcode[i]==0x35) { // LDC1 (get target address) - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp); - } - if(!using_tlb) { - if(!c) { - jaddr2=(int)out; - emit_jno(0); - } - else if(((signed int)(constmap[i][s]+offset))>=(signed int)0x80000000+RAM_SIZE) { - jaddr2=(int)out; - emit_jmp(0); // inline_readstub/inline_writestub? Very rare case - } - #ifdef DESTRUCTIVE_SHIFT - if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1 - if(!offset&&!c&&s>=0) emit_mov(s,ar); - } - #endif - }else{ - if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1 - do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr2); - } - if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1 - do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr2); - } - } - if (opcode[i]==0x31) { // LWC1 - //if(s>=0&&!c&&!offset) emit_mov(s,tl); - //gen_tlb_addr_r(ar,map); - //emit_readword_indexed((int)rdram-0x80000000,tl,tl); - #ifdef HOST_IMM_ADDR32 - if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl); - else - #endif - emit_readword_indexed_tlb(0,offset||c||s<0?tl:s,map,tl); - type=LOADW_STUB; - } - if (opcode[i]==0x35) { // LDC1 - assert(th>=0); - //if(s>=0&&!c&&!offset) emit_mov(s,tl); - //gen_tlb_addr_r(ar,map); - //emit_readword_indexed((int)rdram-0x80000000,tl,th); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,tl,tl); - #ifdef HOST_IMM_ADDR32 - if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl); - else - #endif - emit_readdword_indexed_tlb(0,offset||c||s<0?tl:s,map,th,tl); - type=LOADD_STUB; - } - if (opcode[i]==0x39) { // SWC1 - //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp); - emit_writeword_indexed_tlb(tl,0,offset||c||s<0?temp:s,map,temp); - type=STOREW_STUB; - } - if (opcode[i]==0x3D) { // SDC1 - assert(th>=0); - //emit_writeword_indexed(th,(int)rdram-0x80000000,temp); - //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp); - emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); - type=STORED_STUB; - } - if(!using_tlb&&!(i_regs->waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,temp,1); - #else - emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1); - #endif - #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) - emit_callne(invalidate_addr_reg[temp]); - #else - jaddr3=(int)out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<regmap,CCREG)<0) - emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - emit_call((int)memdebug); - emit_popa(); - }/**/ -#else cop1_unusable(i, i_regs); -#endif } void c2ls_assemble(int i,struct regstat *i_regs) @@ -4004,7 +3544,7 @@ int internal_branch(uint64_t i_is32,int addr) if(addr&1) return 0; // Indirect (register) jump if(addr>=start && addr>2; + //int t=(addr-start)>>2; // Delay slots are not valid branch targets //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; // 64 -> 32 bit transition requires a recompile @@ -4014,11 +3554,7 @@ int internal_branch(uint64_t i_is32,int addr) else printf("optimizable: yes\n"); }*/ //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; -#ifndef FORCE32 - if(requires_32bit[t]&~i_is32) return 0; - else -#endif - return 1; + return 1; } return 0; } @@ -4203,22 +3739,6 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { -#ifndef DISABLE_TLB - if(rm>=0) { - if(!entry||entry[rm]!=mgr) { - if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { - // Stores to memory go thru the mapper to detect self-modifying - // code, loads don't. - if((unsigned int)(constmap[i][rs]+offset)>=0xC0000000 || - (unsigned int)(constmap[i][rs]+offset)<0x80000000+RAM_SIZE ) - generate_map_const(constmap[i][rs]+offset,rm); - }else{ - if((signed int)(constmap[i][rs]+offset)>=(signed int)0xC0000000) - generate_map_const(constmap[i][rs]+offset,rm); - } - } - } -#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4248,32 +3768,6 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // Preload constants for next instruction if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) { int agr,ra; - #if !defined(HOST_IMM_ADDR32) && !defined(DISABLE_TLB) - // Mapper entry - agr=MGEN1+((i+1)&1); - ra=get_reg(i_regs->regmap,agr); - if(ra>=0) { - int rs=get_reg(regs[i+1].regmap,rs1[i+1]); - int offset=imm[i+1]; - int c=(regs[i+1].wasconst>>rs)&1; - if(c) { - if(itype[i+1]==STORE||itype[i+1]==STORELR - ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1, SWC2/SDC2 - // Stores to memory go thru the mapper to detect self-modifying - // code, loads don't. - if((unsigned int)(constmap[i+1][rs]+offset)>=0xC0000000 || - (unsigned int)(constmap[i+1][rs]+offset)<0x80000000+RAM_SIZE ) - generate_map_const(constmap[i+1][rs]+offset,ra); - }else{ - if((signed int)(constmap[i+1][rs]+offset)>=(signed int)0xC0000000) - generate_map_const(constmap[i+1][rs]+offset,ra); - } - } - /*else if(rs1[i]==0) { - generate_map_const(offset,ra); - }*/ - } - #endif // Actual address agr=AGEN1+((i+1)&1); ra=get_reg(i_regs->regmap,agr); @@ -4490,17 +3984,6 @@ void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty) if((i_dirty>>hr)&1) { if(i_regmap[hr]<64) { emit_storereg(i_regmap[hr],hr); -#ifndef FORCE32 - if( ((i_is32>>i_regmap[hr])&1) ) { - #ifdef DESTRUCTIVE_WRITEBACK - emit_sarimm(hr,31,hr); - emit_storereg(i_regmap[hr]|64,hr); - #else - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - #endif - } -#endif }else{ if( !((i_is32>>(i_regmap[hr]&63))&1) ) { emit_storereg(i_regmap[hr],hr); @@ -4526,17 +4009,6 @@ void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,in if((i_dirty>>hr)&1) { if(i_regmap[hr]<64) { emit_storereg(i_regmap[hr],hr); -#ifndef FORCE32 - if( ((i_is32>>i_regmap[hr])&1) ) { - #ifdef DESTRUCTIVE_WRITEBACK - emit_sarimm(hr,31,hr); - emit_storereg(i_regmap[hr]|64,hr); - #else - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - #endif - } -#endif }else{ if( !((i_is32>>(i_regmap[hr]&63))&1) ) { emit_storereg(i_regmap[hr],hr); @@ -4792,9 +4264,6 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) } } //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; -#ifndef FORCE32 - if(requires_32bit[t]&~i_is32) return 0; -#endif // Delay slots are not valid branch targets //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; // Delay slots require additional processing, so do not match @@ -5416,13 +4885,11 @@ void rjump_assemble(int i,struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); -#ifdef PCSX if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) // special case for RFE emit_jmp(0); else -#endif - emit_jns(0); + emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(rs1[i]==31) { @@ -7026,389 +6493,35 @@ void unneeded_registers(int istart,int iend,int r) } printf("\n");*/ } -#ifdef FORCE32 for (i=iend;i>=istart;i--) { unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL; } -#endif } -// Identify registers which are likely to contain 32-bit values -// This is used to predict whether any branches will jump to a -// location with 64-bit values in registers. -static void provisional_32bit() +// Write back dirty registers as soon as we will no longer modify them, +// so that we don't end up with lots of writes at the branches. +void clean_registers(int istart,int iend,int wr) { - int i,j; - uint64_t is32=1; - uint64_t lastbranch=1; - - for(i=0;i=istart;i--) { - if(i>0) { - if(itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) { - if(i>1) is32=lastbranch; - else is32=1; - } - } - if(i>1) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { - if(itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP) { - if(likely[i-2]) { - if(i>2) is32=lastbranch; - else is32=1; - } - } - if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL + if(ba[i]=(start+slen*4)) { - if(rs1[i-2]==0||rs2[i-2]==0) - { - if(rs1[i-2]) { - is32|=1LL<=0;j--) - { - if(ba[j]==start+i*4) - //temp_is32&=branch_regs[j].is32; - temp_is32&=p32[j]; - } - for(j=i;j>s1)&1LL)<>s1)&1LL); - is32&=~(1LL<=0x20&&op2<=0x23) { // ADD/ADDU/SUB/SUBU - is32|=1LL<=0x24&&op2<=0x27) { // AND/OR/XOR/NOR - uint64_t sr=((is32>>s1)&(is32>>s2)&1LL); - is32&=~(1LL<=0x2c&&op2<=0x2d) { // DADD/DADDU - if(s1==0&&s2==0) { - is32|=1LL<>s1)&1LL); - is32&=~(1LL<>s2)&1LL); - is32&=~(1LL<=0x2e&&op2<=0x2f) { // DSUB/DSUBU - if(s1==0&&s2==0) { - is32|=1LL<>s1)&1LL); - is32&=~(1LL<=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU - is32&=~((1LL<>s1)&1LL); - is32&=~(1LL<=0x14&&op2<=0x17) is32&=~(1LL<=0x38&&op2<0x3f) is32&=~(1LL<0) - { - if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000) - { - if(rt1[i-1]==31) // JAL/JALR - { - // Subroutine call will return here, don't alloc any registers - is32=1; - } - else if(i+1=0;i--) - { - int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, don't need anything - r32=0; - } - else - { - // Internal branch - // Need whatever matches the target - // (and doesn't get overwritten by the delay slot instruction) - r32=0; - int t=(ba[i]-start)>>2; - if(ba[i]>start+i*4) { - // Forward branch - //if(!(requires_32bit[t]&~regs[i].was32)) - // r32|=requires_32bit[t]&(~(1LL<>16)!=0x1000) - { - if(i0) - { - if((regs[i].was32>>us1[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i+1])&1) r32|=1LL<>dep1[i+1])&1)) - { - if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<>dep2[i+1])&1)) - { - if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us1[i])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i])&1) r32|=1LL<>dep1[i])&1)) - { - if((regs[i].was32>>dep1[i])&1) r32|=1LL<>dep2[i])&1)) - { - if((regs[i].was32>>dep2[i])&1) r32|=1LL<0&®s[i].regmap_entry[hr]<64) { - if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) { - if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1)) - pr32[i]|=1LL<=istart;i--) - { - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, flush all regs - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + // Branch out of this block, flush all regs + if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) { // Unconditional branch will_dirty_i=0; @@ -7953,15 +7066,6 @@ void new_dynarec_clear_full() stop_after_jal=0; inv_code_start=inv_code_end=~0; // TLB -#ifndef DISABLE_TLB - using_tlb=0; - for(n=0;n<524288;n++) // 0 .. 0x7FFFFFFF - memory_map[n]=-1; - for(n=524288;n<526336;n++) // 0x80000000 .. 0x807FFFFF - memory_map[n]=((u_int)rdram-0x80000000)>>2; - for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF - memory_map[n]=-1; -#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7982,12 +7086,6 @@ void new_dynarec_init() // not all systems allow execute in data segment by default if (mprotect(out, 1<>21)&0x1f; //if (op2 & 0x10) { @@ -8550,7 +7605,6 @@ int new_recompile_block(int addr) case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break; case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break; case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break; -#endif default: strcpy(insn[i],"???"); type=NI; SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr); break; @@ -8821,7 +7875,6 @@ int new_recompile_block(int addr) else if(type==CJUMP||type==SJUMP||type==FJUMP) ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); else ba[i]=-1; -#ifdef PCSX if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) { int do_in_intrp=0; // branch in delay slot? @@ -8855,7 +7908,6 @@ int new_recompile_block(int addr) i--; // don't compile the DS } } -#endif /* Is this the end of the block? */ if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(rt1[i-1]==0) { // Continue past subroutine call (JAL) @@ -8921,9 +7973,6 @@ int new_recompile_block(int addr) int cc=0; int hr=-1; -#ifndef FORCE32 - provisional_32bit(); -#endif if((u_int)addr&1) { // First instruction is delay slot cc=-1; @@ -8966,126 +8015,13 @@ int new_recompile_block(int addr) } } } -#ifndef FORCE32 - // If something jumps here with 64-bit values - // then promote those registers to 64 bits - if(bt[i]) - { - uint64_t temp_is32=current.is32; - for(j=i-1;j>=0;j--) - { - if(ba[j]==start+i*4) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0&&r<64) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>r)&1) { - temp_is32|=1LL<=0;j--) - { - if(ba[j]==start+i*4+4) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) { - if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) - { - if(rs1[i]!=(r&63)&&rs2[i]!=(r&63)) - { - //printf("dump %d/r%d\n",hr,r); - current.regmap[hr]=-1; - if(get_reg(current.regmap,r|64)>=0) - current.regmap[get_reg(current.regmap,r|64)]=-1; - } - } - } - } - } - } - } - else if(i>16)!=0x1000&&(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)) - { - uint64_t temp_is32=current.is32; - for(j=i-1;j>=0;j--) - { - if(ba[j]==start+i*4+8) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) { - if(rs1[i]!=(r&63)&&rs2[i]!=(r&63)&&rs1[i+1]!=(r&63)&&rs2[i+1]!=(r&63)) - { - //printf("dump %d/r%d\n",hr,r); - current.regmap[hr]=-1; - if(get_reg(current.regmap,r|64)>=0) - current.regmap[get_reg(current.regmap,r|64)]=-1; - } - } - } - } - } - } - #endif if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) { if(i+12) { // GTE runs in parallel until accessed, divide by 2 for a rough guess @@ -10927,121 +9863,6 @@ int new_recompile_block(int addr) clean_registers(0,slen-1,1); /* Pass 7 - Identify 32-bit registers */ -#ifndef FORCE32 - provisional_r32(); - - u_int r32=0; - - for (i=slen-1;i>=0;i--) - { - int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, don't need anything - r32=0; - } - else - { - // Internal branch - // Need whatever matches the target - // (and doesn't get overwritten by the delay slot instruction) - r32=0; - int t=(ba[i]-start)>>2; - if(ba[i]>start+i*4) { - // Forward branch - if(!(requires_32bit[t]&~regs[i].was32)) - r32|=requires_32bit[t]&(~(1LL<>16)!=0x1000) - { - if(i0) - { - if((regs[i].was32>>us1[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i+1])&1) r32|=1LL<>dep1[i+1])&1)) - { - if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<>dep2[i+1])&1)) - { - if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us1[i])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i])&1) r32|=1LL<>dep1[i])&1)) - { - if((regs[i].was32>>dep1[i])&1) r32|=1LL<>dep2[i])&1)) - { - if((regs[i].was32>>dep2[i])&1) r32|=1LL<0&®s[i].regmap_entry[hr]<64) { - if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) { - if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1)) - requires_32bit[i]|=1LL<=0;i--) { if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) @@ -11054,7 +9875,6 @@ int new_recompile_block(int addr) } } } -#endif if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception @@ -11073,26 +9893,6 @@ int new_recompile_block(int addr) else printf(" r%d",r); } } -#ifndef FORCE32 - printf(" UU:"); - for(r=1;r<=CCREG;r++) { - if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) { - if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - //if(((is32[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((regs[i].was32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } -#endif printf("\n"); #if defined(__i386__) || defined(__x86_64__) printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]); @@ -11214,18 +10014,6 @@ int new_recompile_block(int addr) #endif printf("\n"); } -#ifndef FORCE32 - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - if((regs[i].is32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); -#endif /*printf(" p32:"); for(r=0;r<=CCREG;r++) { if((p32[i]>>r)&1) { @@ -11263,18 +10051,6 @@ int new_recompile_block(int addr) if((branch_regs[i].dirty>>10)&1) printf("r10 "); if((branch_regs[i].dirty>>12)&1) printf("r12 "); #endif -#ifndef FORCE32 - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - if((branch_regs[i].is32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); -#endif } } #endif // DISASM @@ -11292,7 +10068,6 @@ int new_recompile_block(int addr) } u_int instr_addr0_override=0; -#ifdef PCSX if (start == 0x80030000) { // nasty hack for fastbios thing // override block entry to this code @@ -11306,7 +10081,6 @@ int new_recompile_block(int addr) emit_cmp(0,1); emit_jne((int)new_dyna_leave); } -#endif for(i=0;i>16)!=0x1000)) { - wb_sx(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,is32_pre,regs[i].was32, - unneeded_reg[i],unneeded_reg_upper[i]); wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre, unneeded_reg[i],unneeded_reg_upper[i]); } @@ -11595,26 +10367,15 @@ int new_recompile_block(int addr) // Trap writes to any of the pages we compiled for(i=start>>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; -#ifndef DISABLE_TLB - memory_map[i]|=0x40000000; - if((signed int)start>=(signed int)0xC0000000) { - assert(using_tlb); - j=(((u_int)i<<12)+(memory_map[i]<<2)-(u_int)rdram+(u_int)0x80000000)>>12; - invalid_code[j]=0; - memory_map[j]|=0x40000000; - //printf("write protect physical page: %x (virtual %x)\n",j<<12,start); - } -#endif } inv_code_start=inv_code_end=~0; -#ifdef PCSX + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; -#endif /* Pass 10 - Free memory by expiring oldest blocks */ -- cgit v1.2.3 From 1edfcc68047e356a9c57c4734cc3bbe084922ce7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 20:04:25 +0300 Subject: drc: drop less obvious dead code --- libpcsxcore/new_dynarec/assem_arm.c | 436 ++-------------------------------- libpcsxcore/new_dynarec/new_dynarec.c | 290 ++++------------------ 2 files changed, 70 insertions(+), 656 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c2f65ee..20a6956 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -45,9 +45,7 @@ extern int pending_exception; extern int branch_target; extern uint64_t readmem_dword; extern void *dynarec_local; -extern u_int memory_map[1048576]; extern u_int mini_ht[32][2]; -extern u_int rounding_modes[4]; void indirect_jump_indexed(); void indirect_jump(); @@ -814,26 +812,6 @@ char regname[16][4] = { "lr", "pc"}; -void output_byte(u_char byte) -{ - *(out++)=byte; -} -void output_modrm(u_char mod,u_char rm,u_char ext) -{ - assert(mod<4); - assert(rm<8); - assert(ext<8); - u_char byte=(mod<<6)|(ext<<3)|rm; - *(out++)=byte; -} -void output_sib(u_char scale,u_char index,u_char base) -{ - assert(scale<4); - assert(index<8); - assert(base<8); - u_char byte=(scale<<6)|(index<<3)|base; - *(out++)=byte; -} void output_w32(u_int word) { *((u_int *)out)=word; @@ -1207,29 +1185,7 @@ void emit_adcimm(u_int rs,int imm,u_int rt) assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); } -/*void emit_sbcimm(int imm,u_int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval); -}*/ -void emit_sbbimm(int imm,u_int rt) -{ - assem_debug("sbb $%d,%%%s\n",imm,regname[rt]); - assert(rt<8); - if(imm<128&&imm>=-128) { - output_byte(0x83); - output_modrm(3,rt,3); - output_byte(imm); - } - else - { - output_byte(0x81); - output_modrm(3,rt,3); - output_w32(imm); - } -} + void emit_rscimm(int rs,int imm,u_int rt) { assert(0); @@ -1248,13 +1204,6 @@ void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) emit_adcimm(rsh,0,rth); } -void emit_sbb(int rs1,int rs2) -{ - assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]); - output_byte(0x19); - output_modrm(3,rs1,rs2); -} - void emit_andimm(int rs,int imm,int rt) { u_int armval; @@ -1445,32 +1394,7 @@ void emit_sar(u_int rs,u_int shift,u_int rt) assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -void emit_shlcl(int r) -{ - assem_debug("shl %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_shrcl(int r) -{ - assem_debug("shr %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_sarcl(int r) -{ - assem_debug("sar %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_shldcl(int r1,int r2) -{ - assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]); - assert(0); -} -void emit_shrdcl(int r1,int r2) -{ - assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]); - assert(0); -} void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); @@ -1510,21 +1434,6 @@ void emit_cmpimm(int rs,int imm) } } -void emit_cmovne(u_int *addr,int rt) -{ - assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]); - assert(0); -} -void emit_cmovl(u_int *addr,int rt) -{ - assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]); - assert(0); -} -void emit_cmovs(u_int *addr,int rt) -{ - assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]); - assert(0); -} void emit_cmovne_imm(int imm,int rt) { assem_debug("movne %s,#%d\n",regname[rt],imm); @@ -1758,31 +1667,6 @@ void emit_jcc(int a) output_w32(0x3a000000|offset); } -void emit_pushimm(int imm) -{ - assem_debug("push $%x\n",imm); - assert(0); -} -void emit_pusha() -{ - assem_debug("pusha\n"); - assert(0); -} -void emit_popa() -{ - assem_debug("popa\n"); - assert(0); -} -void emit_pushreg(u_int r) -{ - assem_debug("push %%%s\n",regname[r]); - assert(0); -} -void emit_popreg(u_int r) -{ - assem_debug("pop %%%s\n",regname[r]); - assert(0); -} void emit_callreg(u_int r) { assert(r<15); @@ -1973,17 +1857,7 @@ void emit_movzwl(int addr, int rt) assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movzwl_reg(int rs, int rt) -{ - assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]); - assert(0); -} -void emit_xchg(int rs, int rt) -{ - assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]); - assert(0); -} void emit_writeword_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); @@ -2096,27 +1970,7 @@ void emit_writebyte(int rt, int addr) assem_debug("strb %s,fp+%d\n",regname[rt],offset); output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_writeword_imm(int imm, int addr) -{ - assem_debug("movl $%x,%x\n",imm,addr); - assert(0); -} -void emit_writebyte_imm(int imm, int addr) -{ - assem_debug("movb $%x,%x\n",imm,addr); - assert(0); -} -void emit_mul(int rs) -{ - assem_debug("mul %%%s\n",regname[rs]); - assert(0); -} -void emit_imul(int rs) -{ - assem_debug("imul %%%s\n",regname[rs]); - assert(0); -} void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); @@ -2136,22 +1990,6 @@ void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_div(int rs) -{ - assem_debug("div %%%s\n",regname[rs]); - assert(0); -} -void emit_idiv(int rs) -{ - assem_debug("idiv %%%s\n",regname[rs]); - assert(0); -} -void emit_cdq() -{ - assem_debug("cdq\n"); - assert(0); -} - void emit_clz(int rs,int rt) { assem_debug("clz %s,%s\n",regname[rt],regname[rs]); @@ -2304,12 +2142,6 @@ void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } } -// special case for checking invalid_code -void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm) -{ - assert(0); -} - // special case for checking invalid_code void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) { @@ -2320,13 +2152,6 @@ void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) emit_cmpimm(HOST_TEMPREG,imm); } -// special case for tlb mapping -void emit_addsr12(int rs1,int rs2,int rt) -{ - assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2)); -} - void emit_callne(int a) { assem_debug("blne %x\n",a); @@ -2335,14 +2160,6 @@ void emit_callne(int a) } // Used to preload hash table entries -void emit_prefetch(void *addr) -{ - assem_debug("prefetch %x\n",(int)addr); - output_byte(0x0F); - output_byte(0x18); - output_modrm(0,5,1); - output_w32((int)addr); -} void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); @@ -2357,168 +2174,6 @@ void emit_ldreq_indexed(int rs, u_int offset, int rt) output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); } -void emit_flds(int r,int sr) -{ - assem_debug("flds s%d,[%s]\n",sr,regname[r]); - output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16)); -} - -void emit_vldr(int r,int vr) -{ - assem_debug("vldr d%d,[%s]\n",vr,regname[r]); - output_w32(0xed900b00|(vr<<12)|(r<<16)); -} - -void emit_fsts(int sr,int r) -{ - assem_debug("fsts s%d,[%s]\n",sr,regname[r]); - output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16)); -} - -void emit_vstr(int vr,int r) -{ - assem_debug("vstr d%d,[%s]\n",vr,regname[r]); - output_w32(0xed800b00|(vr<<12)|(r<<16)); -} - -void emit_ftosizs(int s,int d) -{ - assem_debug("ftosizs s%d,s%d\n",d,s); - output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_ftosizd(int s,int d) -{ - assem_debug("ftosizd s%d,d%d\n",d,s); - output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7)); -} - -void emit_fsitos(int s,int d) -{ - assem_debug("fsitos s%d,s%d\n",d,s); - output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fsitod(int s,int d) -{ - assem_debug("fsitod d%d,s%d\n",d,s); - output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fcvtds(int s,int d) -{ - assem_debug("fcvtds d%d,s%d\n",d,s); - output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fcvtsd(int s,int d) -{ - assem_debug("fcvtsd s%d,d%d\n",d,s); - output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7)); -} - -void emit_fsqrts(int s,int d) -{ - assem_debug("fsqrts d%d,s%d\n",d,s); - output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fsqrtd(int s,int d) -{ - assem_debug("fsqrtd s%d,d%d\n",d,s); - output_w32(0xeeb10bc0|((d&7)<<12)|(s&7)); -} - -void emit_fabss(int s,int d) -{ - assem_debug("fabss d%d,s%d\n",d,s); - output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fabsd(int s,int d) -{ - assem_debug("fabsd s%d,d%d\n",d,s); - output_w32(0xeeb00bc0|((d&7)<<12)|(s&7)); -} - -void emit_fnegs(int s,int d) -{ - assem_debug("fnegs d%d,s%d\n",d,s); - output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fnegd(int s,int d) -{ - assem_debug("fnegd s%d,d%d\n",d,s); - output_w32(0xeeb10b40|((d&7)<<12)|(s&7)); -} - -void emit_fadds(int s1,int s2,int d) -{ - assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_faddd(int s1,int s2,int d) -{ - assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fsubs(int s1,int s2,int d) -{ - assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fsubd(int s1,int s2,int d) -{ - assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fmuls(int s1,int s2,int d) -{ - assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fmuld(int s1,int s2,int d) -{ - assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fdivs(int s1,int s2,int d) -{ - assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fdivd(int s1,int s2,int d) -{ - assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fcmps(int x,int y) -{ - assem_debug("fcmps s14, s15\n"); - output_w32(0xeeb47a67); -} - -void emit_fcmpd(int x,int y) -{ - assem_debug("fcmpd d6, d7\n"); - output_w32(0xeeb46b47); -} - -void emit_fmstat() -{ - assem_debug("fmstat\n"); - output_w32(0xeef1fa10); -} - void emit_bicne_imm(int rs,int imm,int rt) { u_int armval; @@ -2625,28 +2280,6 @@ static void restore_regs(u_int reglist) restore_regs_all(reglist); } -// Write back consts using r14 so we don't disturb the other registers -void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i) -{ - int hr; - for(hr=0;hr=0&&((i_dirty>>hr)&1)) { - if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) { - if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) { - int value=constmap[i][hr]; - if(value==0) { - emit_zeroreg(HOST_TEMPREG); - } - else { - emit_movimm(value,HOST_TEMPREG); - } - emit_storereg(i_regmap[hr],HOST_TEMPREG); - } - } - } - } -} - /* Stubs/epilogue */ void literal_pool(int n) @@ -3282,13 +2915,6 @@ do_cop1stub(int n) emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); } -static int do_tlb_r(int a, ...) { return 0; } -static int do_tlb_r_branch(int a, ...) { return 0; } -static int gen_tlb_addr_r(int a, ...) { return 0; } -static int do_tlb_w(int a, ...) { return 0; } -static int do_tlb_w_branch(int a, ...) { return 0; } -static int gen_tlb_addr_w(int a, ...) { return 0; } - /* Special assem */ void shift_assemble_arm(int i,struct regstat *i_regs) @@ -3609,52 +3235,30 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } - if(!using_tlb) { - if(!c) { - #ifdef RAM_OFFSET - map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif - emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR - }else{ - emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR - } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + emit_shlimm(addr,3,temp); + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR + }else{ + emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - else { - if(ram_offset&&memtarget) { - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR - }else{ - emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR - } + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + } + else { + if(ram_offset&&memtarget) { + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; } - }else{ // using tlb - int a; - if(c) { - a=-1; - }else if (opcode[i]==0x22||opcode[i]==0x26) { - a=0xFFFFFFFC; // LWL/LWR + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR }else{ - a=0xFFFFFFF8; // LDL/LDR - } - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<is32|=1LL<=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU @@ -2563,7 +2521,6 @@ void load_assemble(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if (c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } //printf("load_assemble: c=%d\n",c); @@ -2586,34 +2543,22 @@ void load_assemble(int i,struct regstat *i_regs) assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O reglist&=~(1<=0) reglist&=~(1<regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif -//#define R29_HACK 1 - #ifdef R29_HACK - // Strmnnrmn's speed hack - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - { - jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); - } + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + #ifdef R29_HACK + // Strmnnrmn's speed hack + if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) + #endif + { + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } - else if(ram_offset&&memtarget) { - emit_addimm(addr,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - }else{ // using tlb - int x=0; - if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU - if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<regmap,rt1[i])); // ignore loads to r0 and unneeded reg if (opcode[i]==0x20) { // LB @@ -2626,7 +2571,6 @@ void load_assemble(int i,struct regstat *i_regs) #endif { //emit_xorimm(addr,3,tl); - //gen_tlb_addr_r(tl,map); //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl); int x=0,a=tl; #ifdef BIG_ENDIAN_MIPS @@ -2667,7 +2611,6 @@ void load_assemble(int i,struct regstat *i_regs) //emit_movswl_indexed_tlb(x,tl,map,tl); //else if(map>=0) { - gen_tlb_addr_r(a,map); emit_movswl_indexed(x,a,tl); }else{ #if 1 //def RAM_OFFSET @@ -2713,7 +2656,6 @@ void load_assemble(int i,struct regstat *i_regs) #endif { //emit_xorimm(addr,3,tl); - //gen_tlb_addr_r(tl,map); //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl); int x=0,a=tl; #ifdef BIG_ENDIAN_MIPS @@ -2754,7 +2696,6 @@ void load_assemble(int i,struct regstat *i_regs) //emit_movzwl_indexed_tlb(x,tl,map,tl); //#else if(map>=0) { - gen_tlb_addr_r(a,map); emit_movzwl_indexed(x,a,tl); }else{ #if 1 //def RAM_OFFSET @@ -2798,7 +2739,6 @@ void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int a=addr; if(fastload_reg_override) a=fastload_reg_override; - //gen_tlb_addr_r(tl,map); //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th); //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl); #ifdef HOST_IMM_ADDR32 @@ -2874,7 +2814,6 @@ void store_assemble(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } assert(tl>=0); @@ -2885,23 +2824,12 @@ void store_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap,TLREG); - assert(map>=0); - reglist&=~(1<=0) { - gen_tlb_addr_w(a,map); emit_writehword_indexed(tl,x,a); }else //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); @@ -2975,7 +2901,7 @@ void store_assemble(int i,struct regstat *i_regs) add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); jaddr=0; } - if(!using_tlb&&!(i_regs->waswritten&(1<waswritten&(1<isconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } assert(tl>=0); @@ -3086,41 +3011,26 @@ void storelr_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[hr]>=0) reglist|=1<=0); - if(!using_tlb) { - if(!c) { - emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); - if(!offset&&s!=temp) emit_mov(s,temp); - jaddr=(int)out; - emit_jno(0); - } - else - { - if(!memtarget||!rs1[i]) { - jaddr=(int)out; - emit_jmp(0); - } - } - #ifdef RAM_OFFSET - int map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - gen_tlb_addr_w(temp,map); - #else - if((u_int)rdram!=0x80000000) - emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); - #endif - }else{ // using tlb - int map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<=0) emit_mov(s,temp); - do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr); - if(!jaddr&&!memtarget) { + if(!c) { + emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); + if(!offset&&s!=temp) emit_mov(s,temp); + jaddr=(int)out; + emit_jno(0); + } + else + { + if(!memtarget||!rs1[i]) { jaddr=(int)out; emit_jmp(0); } - gen_tlb_addr_w(temp,map); } + #ifdef RAM_OFFSET + int map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #else + if((u_int)rdram!=0x80000000) + emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); + #endif if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR temp2=get_reg(i_regs->regmap,FTEMP); @@ -3263,7 +3173,7 @@ void storelr_assemble(int i,struct regstat *i_regs) } if(!c||!memtarget) add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); - if(!using_tlb&&!(i_regs->waswritten&(1<waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3322,7 +3232,6 @@ void c2ls_assemble(int i,struct regstat *i_regs) offset=imm[i]; assert(rs1[i]>0); assert(tl>=0); - assert(!using_tlb); for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap,rt1[i]); if(ra<0) ra=get_reg(i_regs->regmap,-1); @@ -3711,17 +3619,11 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } int rs=get_reg(i_regs->regmap,rs1[i]); - int rm=get_reg(i_regs->regmap,TLREG); if(ra>=0) { int offset=imm[i]; int c=(i_regs->wasconst>>rs)&1; if(rs1[i]==0) { // Using r0 as a base address - /*if(rm>=0) { - if(!entry||entry[rm]!=mgr) { - generate_map_const(offset,rm); - } // else did it in the previous cycle - }*/ if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR @@ -3747,8 +3649,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ #ifdef HOST_IMM_ADDR32 - if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2 - (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) + if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2 #endif emit_movimm(constmap[i][rs]+offset,ra); regs[i].loadedconst|=1<=(signed int)0xC0000000)) + if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2 #endif emit_movimm(constmap[i+1][rs]+offset,ra); regs[i+1].loadedconst|=1<>hr)&1)) { - #ifdef HOST_IMM_ADDR32 - if(!using_tlb||((signed int)constmap[i][hr]+imm[i+2])<(signed int)0xC0000000) return 0; - #endif // Precompute load address *value=constmap[i][hr]+imm[i+2]; return 1; @@ -3832,9 +3729,6 @@ int get_final_value(int hr, int i, int *value) } if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg) { - #ifdef HOST_IMM_ADDR32 - if(!using_tlb||((signed int)constmap[i][hr]+imm[i+1])<(signed int)0xC0000000) return 0; - #endif // Precompute load address *value=constmap[i][hr]+imm[i+1]; //printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]); @@ -9028,12 +8922,6 @@ int new_recompile_block(int addr) d1=dep1[i+1]; d2=dep2[i+1]; } - if(using_tlb) { - if(itype[i+1]==LOAD || itype[i+1]==LOADLR || - itype[i+1]==STORE || itype[i+1]==STORELR || - itype[i+1]==C1LS || itype[i+1]==C2LS) - map=TLREG; - } else if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; @@ -9088,12 +8976,7 @@ int new_recompile_block(int addr) d1=dep1[i]; d2=dep2[i]; } - if(using_tlb) { - if(itype[i]==LOAD || itype[i]==LOADLR || - itype[i]==STORE || itype[i]==STORELR || - itype[i]==C1LS || itype[i]==C2LS) - map=TLREG; - } else if(itype[i]==STORE || itype[i]==STORELR || + if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; } @@ -9483,14 +9366,14 @@ int new_recompile_block(int addr) // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET - if(using_tlb) + if(0) #endif { int earliest_available[HOST_REGS]; int loop_start[HOST_REGS]; int score[HOST_REGS]; int end[HOST_REGS]; - int reg=using_tlb?MMREG:ROREG; + int reg=ROREG; // Init for(hr=0;hr=0) { - int sr=get_reg(regs[i+1].regmap,rs1[i+1]); - if(sr>=0&&((regs[i+1].wasconst>>sr)&1)) { - int nr; - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=MGEN1+((i+1)&1); - regmap_pre[i+1][hr]=MGEN1+((i+1)&1); - regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1); - regs[i].isconst&=~(1<=0) - { - // move it to another register - regs[i+1].regmap[hr]=-1; - regmap_pre[i+2][hr]=-1; - regs[i+1].regmap[nr]=TLREG; - regmap_pre[i+2][nr]=TLREG; - regs[i].regmap[nr]=MGEN1+((i+1)&1); - regmap_pre[i+1][nr]=MGEN1+((i+1)&1); - regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1); - regs[i].isconst&=~(1<>5)&1) printf("ebp "); if((needed_reg[i]>>6)&1) printf("esi "); if((needed_reg[i]>>7)&1) printf("edi "); - printf("r:"); - for(r=0;r<=CCREG;r++) { - //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((requires_32bit[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } printf("\n"); - /*printf("pr:"); - for(r=0;r<=CCREG;r++) { - //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((pr32[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - if(pr32[i]!=requires_32bit[i]) printf(" OOPS"); - printf("\n");*/ #if defined(__i386__) || defined(__x86_64__) printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]); printf("dirty: "); @@ -10014,17 +9835,6 @@ int new_recompile_block(int addr) #endif printf("\n"); } - /*printf(" p32:"); - for(r=0;r<=CCREG;r++) { - if((p32[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - if(p32[i]!=regs[i].is32) printf(" NO MATCH\n"); - else printf("\n");*/ if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { #if defined(__i386__) || defined(__x86_64__) printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); -- cgit v1.2.3 From 9f51b4b9aa4fffa7b9411ca274f5dfb179a6a30a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 20:10:06 +0300 Subject: drc: strip eol blanks Because people send patches while using editors that strip blanks and the patch becomes a mess. --- libpcsxcore/new_dynarec/assem_arm.c | 44 ++++++------- libpcsxcore/new_dynarec/new_dynarec.c | 118 +++++++++++++++++----------------- 2 files changed, 81 insertions(+), 81 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 20a6956..592cc88 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -186,8 +186,8 @@ add_literal(int addr,int val) assert(literalcountu>>reg)&1) return; - + // see if it's already allocated for(hr=0;hrregmap[hr]==reg) return; } - + // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); - + // Try to allocate the preferred register if(cur->regmap[preferred_reg]==-1) { cur->regmap[preferred_reg]=reg; @@ -348,7 +348,7 @@ void alloc_reg(struct regstat *cur,int i,signed char reg) cur->isconst&=~(1<uu>>reg)&1) return; - + // see if the upper half is already allocated for(hr=0;hrregmap[hr]==reg+64) return; } - + // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); - + // Try to allocate the preferred register if(cur->regmap[preferred_reg]==-1) { cur->regmap[preferred_reg]=reg|64; @@ -514,7 +514,7 @@ void alloc_reg64(struct regstat *cur,int i,signed char reg) cur->isconst&=~(1<regmap[hr]==reg) return; } - + // Try to allocate any available register for(hr=HOST_REGS-1;hr>=0;hr--) { if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { @@ -663,7 +663,7 @@ void alloc_reg_temp(struct regstat *cur,int i,signed char reg) return; } } - + // Find an unneeded register for(hr=HOST_REGS-1;hr>=0;hr--) { @@ -692,7 +692,7 @@ void alloc_reg_temp(struct regstat *cur,int i,signed char reg) } } } - + // Ok, now we have to evict someone // Pick a register we hopefully won't need soon // TODO: we might want to follow unconditional jumps here @@ -767,7 +767,7 @@ void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr) { int n; int dirty=0; - + // see if it's already allocated (and dealloc it) for(n=0;nregmap[n]=-1; } } - + cur->regmap[hr]=reg; cur->dirty&=~(1<dirty|=dirty<dirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -574,7 +574,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -677,7 +677,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -789,7 +789,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -885,7 +885,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; @@ -3028,7 +3028,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int map=get_reg(i_regs->regmap,ROREG); if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif @@ -3600,7 +3600,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int agr=AGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4120,7 +4120,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -4554,7 +4554,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -4588,7 +4588,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -4614,7 +4614,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4635,10 +4635,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -4689,7 +4689,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4718,7 +4718,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -4850,7 +4850,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -4905,7 +4905,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -4977,7 +4977,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5107,7 +5107,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5290,7 +5290,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -5377,7 +5377,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -5625,7 +5625,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6133,14 +6133,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -6730,7 +6730,7 @@ void clean_registers(int istart,int iend,int wr) if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<istart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7140,7 +7140,7 @@ int new_recompile_block(int addr) assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -7196,7 +7196,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -7849,7 +7849,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -7876,7 +7876,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;i--) { int hr; @@ -8892,7 +8892,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -9071,10 +9071,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -9244,10 +9244,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -9320,7 +9320,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -9362,7 +9362,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -9542,7 +9542,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -9677,7 +9677,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -9701,10 +9701,10 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ for (i=slen-1;i>=0;i--) { @@ -10165,15 +10165,15 @@ int new_recompile_block(int addr) //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4); memcpy(copy,source,slen*4); copy+=slen*4; - + #ifdef __arm__ __clear_cache((void *)beginning,out); #endif - + // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) if((u_int)out>(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -10186,9 +10186,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -10230,7 +10230,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From e2b5e7aa45f75cd13ef238fa4ff9516891dabef5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 23:43:04 +0300 Subject: drc: mark things static --- libpcsxcore/new_dynarec/assem_arm.c | 521 ++++++++++++++++++++-------------- libpcsxcore/new_dynarec/new_dynarec.c | 135 ++++----- 2 files changed, 374 insertions(+), 282 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 592cc88..6bd9e49 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -38,6 +38,8 @@ char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #define CALLER_SAVE_REGS 0x120f #endif +#define unused __attribute__((unused)) + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -112,11 +114,11 @@ const u_int invalidate_addr_reg[16] = { 0, 0}; -unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; +static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; /* Linker */ -void set_jump_target(int addr,u_int target) +static void set_jump_target(int addr,u_int target) { u_char *ptr=(u_char *)addr; u_int *ptr2=(u_int *)ptr; @@ -150,7 +152,8 @@ void set_jump_target(int addr,u_int target) // This optionally copies the instruction from the target of the branch into // the space before the branch. Works, but the difference in speed is // usually insignificant. -void set_jump_target_fillslot(int addr,u_int target,int copy) +#if 0 +static void set_jump_target_fillslot(int addr,u_int target,int copy) { u_char *ptr=(u_char *)addr; u_int *ptr2=(u_int *)ptr; @@ -179,9 +182,10 @@ void set_jump_target_fillslot(int addr,u_int target,int copy) *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); } } +#endif /* Literal pool */ -add_literal(int addr,int val) +static void add_literal(int addr,int val) { assert(literalcount=33554432) { @@ -865,96 +875,99 @@ u_int genjmp(u_int addr) return ((u_int)offset>>2)&0xffffff; } -void emit_mov(int rs,int rt) +static void emit_mov(int rs,int rt) { assem_debug("mov %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); } -void emit_movs(int rs,int rt) +static void emit_movs(int rs,int rt) { assem_debug("movs %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); } -void emit_add(int rs1,int rs2,int rt) +static void emit_add(int rs1,int rs2,int rt) { assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_adds(int rs1,int rs2,int rt) +static void emit_adds(int rs1,int rs2,int rt) { assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_adcs(int rs1,int rs2,int rt) +static void emit_adcs(int rs1,int rs2,int rt) { assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_sbc(int rs1,int rs2,int rt) +static void emit_sbc(int rs1,int rs2,int rt) { assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_sbcs(int rs1,int rs2,int rt) +static void emit_sbcs(int rs1,int rs2,int rt) { assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_neg(int rs, int rt) +static void emit_neg(int rs, int rt) { assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); } -void emit_negs(int rs, int rt) +static void emit_negs(int rs, int rt) { assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); } -void emit_sub(int rs1,int rs2,int rt) +static void emit_sub(int rs1,int rs2,int rt) { assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); } -void emit_subs(int rs1,int rs2,int rt) +static void emit_subs(int rs1,int rs2,int rt) { assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); } -void emit_zeroreg(int rt) +static void emit_zeroreg(int rt) { assem_debug("mov %s,#0\n",regname[rt]); output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); } -void emit_loadlp(u_int imm,u_int rt) +static void emit_loadlp(u_int imm,u_int rt) { add_literal((int)out,imm); assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); output_w32(0xe5900000|rd_rn_rm(rt,15,0)); } -void emit_movw(u_int imm,u_int rt) + +static void emit_movw(u_int imm,u_int rt) { assert(imm<65536); assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); } -void emit_movt(u_int imm,u_int rt) + +static void emit_movt(u_int imm,u_int rt) { assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); } -void emit_movimm(u_int imm,u_int rt) + +static void emit_movimm(u_int imm,u_int rt) { u_int armval; if(genimm(imm,&armval)) { @@ -981,13 +994,14 @@ void emit_movimm(u_int imm,u_int rt) #endif } } -void emit_pcreladdr(u_int rt) + +static void emit_pcreladdr(u_int rt) { assem_debug("add %s,pc,#?\n",regname[rt]); output_w32(0xe2800000|rd_rn_rm(rt,15,0)); } -void emit_loadreg(int r, int hr) +static void emit_loadreg(int r, int hr) { if(r&64) { SysPrintf("64bit load in 32bit mode!\n"); @@ -1010,7 +1024,8 @@ void emit_loadreg(int r, int hr) output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); } } -void emit_storereg(int r, int hr) + +static void emit_storereg(int r, int hr) { if(r&64) { SysPrintf("64bit store in 32bit mode!\n"); @@ -1028,13 +1043,13 @@ void emit_storereg(int r, int hr) output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); } -void emit_test(int rs, int rt) +static void emit_test(int rs, int rt) { assem_debug("tst %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); } -void emit_testimm(int rs,int imm) +static void emit_testimm(int rs,int imm) { u_int armval; assem_debug("tst %s,#%d\n",regname[rs],imm); @@ -1042,7 +1057,7 @@ void emit_testimm(int rs,int imm) output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); } -void emit_testeqimm(int rs,int imm) +static void emit_testeqimm(int rs,int imm) { u_int armval; assem_debug("tsteq %s,$%d\n",regname[rs],imm); @@ -1050,36 +1065,37 @@ void emit_testeqimm(int rs,int imm) output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); } -void emit_not(int rs,int rt) +static void emit_not(int rs,int rt) { assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); } -void emit_mvnmi(int rs,int rt) +static void emit_mvnmi(int rs,int rt) { assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); } -void emit_and(u_int rs1,u_int rs2,u_int rt) +static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); } -void emit_or(u_int rs1,u_int rs2,u_int rt) +static void emit_or(u_int rs1,u_int rs2,u_int rt) { assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_or_and_set_flags(int rs1,int rs2,int rt) + +static void emit_or_and_set_flags(int rs1,int rs2,int rt) { assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) +static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1088,7 +1104,7 @@ void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); } -void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) +static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1097,13 +1113,13 @@ void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); } -void emit_xor(u_int rs1,u_int rs2,u_int rt) +static void emit_xor(u_int rs1,u_int rs2,u_int rt) { assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); } -void emit_addimm(u_int rs,int imm,u_int rt) +static void emit_addimm(u_int rs,int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1144,7 +1160,7 @@ void emit_addimm(u_int rs,int imm,u_int rt) else if(rs!=rt) emit_mov(rs,rt); } -void emit_addimm_and_set_flags(int imm,int rt) +static void emit_addimm_and_set_flags(int imm,int rt) { assert(imm>-65536&&imm<65536); u_int armval; @@ -1166,19 +1182,20 @@ void emit_addimm_and_set_flags(int imm,int rt) output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); } } -void emit_addimm_no_flags(u_int imm,u_int rt) + +static void emit_addimm_no_flags(u_int imm,u_int rt) { emit_addimm(rt,imm,rt); } -void emit_addnop(u_int r) +static void emit_addnop(u_int r) { assert(r<16); assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); output_w32(0xe2800000|rd_rn_rm(r,r,0)); } -void emit_adcimm(u_int rs,int imm,u_int rt) +static void emit_adcimm(u_int rs,int imm,u_int rt) { u_int armval; genimm_checked(imm,&armval); @@ -1186,7 +1203,7 @@ void emit_adcimm(u_int rs,int imm,u_int rt) output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_rscimm(int rs,int imm,u_int rt) +static void emit_rscimm(int rs,int imm,u_int rt) { assert(0); u_int armval; @@ -1195,7 +1212,7 @@ void emit_rscimm(int rs,int imm,u_int rt) output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) +static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) { // TODO: if(genimm(imm,&armval)) ... // else @@ -1204,7 +1221,7 @@ void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) emit_adcimm(rsh,0,rth); } -void emit_andimm(int rs,int imm,int rt) +static void emit_andimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1240,7 +1257,7 @@ void emit_andimm(int rs,int imm,int rt) } } -void emit_orimm(int rs,int imm,int rt) +static void emit_orimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1257,7 +1274,7 @@ void emit_orimm(int rs,int imm,int rt) } } -void emit_xorimm(int rs,int imm,int rt) +static void emit_xorimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1274,7 +1291,7 @@ void emit_xorimm(int rs,int imm,int rt) } } -void emit_shlimm(int rs,u_int imm,int rt) +static void emit_shlimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1283,7 +1300,7 @@ void emit_shlimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_lsls_imm(int rs,int imm,int rt) +static void emit_lsls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1291,7 +1308,7 @@ void emit_lsls_imm(int rs,int imm,int rt) output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_lslpls_imm(int rs,int imm,int rt) +static unused void emit_lslpls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1299,7 +1316,7 @@ void emit_lslpls_imm(int rs,int imm,int rt) output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_shrimm(int rs,u_int imm,int rt) +static void emit_shrimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1307,7 +1324,7 @@ void emit_shrimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_sarimm(int rs,u_int imm,int rt) +static void emit_sarimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1315,7 +1332,7 @@ void emit_sarimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); } -void emit_rorimm(int rs,u_int imm,int rt) +static void emit_rorimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1323,7 +1340,7 @@ void emit_rorimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); } -void emit_shldimm(int rs,int rs2,u_int imm,int rt) +static void emit_shldimm(int rs,int rs2,u_int imm,int rt) { assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); assert(imm>0); @@ -1335,7 +1352,7 @@ void emit_shldimm(int rs,int rs2,u_int imm,int rt) output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); } -void emit_shrdimm(int rs,int rs2,u_int imm,int rt) +static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) { assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); assert(imm>0); @@ -1347,7 +1364,7 @@ void emit_shrdimm(int rs,int rs2,u_int imm,int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); } -void emit_signextend16(int rs,int rt) +static void emit_signextend16(int rs,int rt) { #ifndef HAVE_ARMV6 emit_shlimm(rs,16,rt); @@ -1358,7 +1375,7 @@ void emit_signextend16(int rs,int rt) #endif } -void emit_signextend8(int rs,int rt) +static void emit_signextend8(int rs,int rt) { #ifndef HAVE_ARMV6 emit_shlimm(rs,24,rt); @@ -1369,7 +1386,7 @@ void emit_signextend8(int rs,int rt) #endif } -void emit_shl(u_int rs,u_int shift,u_int rt) +static void emit_shl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1378,7 +1395,8 @@ void emit_shl(u_int rs,u_int shift,u_int rt) assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); } -void emit_shr(u_int rs,u_int shift,u_int rt) + +static void emit_shr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1386,7 +1404,8 @@ void emit_shr(u_int rs,u_int shift,u_int rt) assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); } -void emit_sar(u_int rs,u_int shift,u_int rt) + +static void emit_sar(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1395,7 +1414,7 @@ void emit_sar(u_int rs,u_int shift,u_int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -void emit_orrshl(u_int rs,u_int shift,u_int rt) +static void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1403,7 +1422,8 @@ void emit_orrshl(u_int rs,u_int shift,u_int rt) assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); } -void emit_orrshr(u_int rs,u_int shift,u_int rt) + +static void emit_orrshr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1412,7 +1432,7 @@ void emit_orrshr(u_int rs,u_int shift,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); } -void emit_cmpimm(int rs,int imm) +static void emit_cmpimm(int rs,int imm) { u_int armval; if(genimm(imm,&armval)) { @@ -1434,70 +1454,79 @@ void emit_cmpimm(int rs,int imm) } } -void emit_cmovne_imm(int imm,int rt) +static void emit_cmovne_imm(int imm,int rt) { assem_debug("movne %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovl_imm(int imm,int rt) + +static void emit_cmovl_imm(int imm,int rt) { assem_debug("movlt %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovb_imm(int imm,int rt) + +static void emit_cmovb_imm(int imm,int rt) { assem_debug("movcc %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovs_imm(int imm,int rt) + +static void emit_cmovs_imm(int imm,int rt) { assem_debug("movmi %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmove_reg(int rs,int rt) + +static void emit_cmove_reg(int rs,int rt) { assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovne_reg(int rs,int rt) + +static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovl_reg(int rs,int rt) + +static void emit_cmovl_reg(int rs,int rt) { assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovs_reg(int rs,int rt) + +static void emit_cmovs_reg(int rs,int rt) { assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); } -void emit_slti32(int rs,int imm,int rt) +static void emit_slti32(int rs,int imm,int rt) { if(rs!=rt) emit_zeroreg(rt); emit_cmpimm(rs,imm); if(rs==rt) emit_movimm(0,rt); emit_cmovl_imm(1,rt); } -void emit_sltiu32(int rs,int imm,int rt) + +static void emit_sltiu32(int rs,int imm,int rt) { if(rs!=rt) emit_zeroreg(rt); emit_cmpimm(rs,imm); if(rs==rt) emit_movimm(0,rt); emit_cmovb_imm(1,rt); } -void emit_slti64_32(int rsh,int rsl,int imm,int rt) + +static void emit_slti64_32(int rsh,int rsl,int imm,int rt) { assert(rsh!=rt); emit_slti32(rsl,imm,rt); @@ -1514,7 +1543,8 @@ void emit_slti64_32(int rsh,int rsl,int imm,int rt) emit_cmovl_imm(1,rt); } } -void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) + +static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) { assert(rsh!=rt); emit_sltiu32(rsl,imm,rt); @@ -1530,26 +1560,29 @@ void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) } } -void emit_cmp(int rs,int rt) +static void emit_cmp(int rs,int rt) { assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); } -void emit_set_gz32(int rs, int rt) + +static void emit_set_gz32(int rs, int rt) { //assem_debug("set_gz32\n"); emit_cmpimm(rs,1); emit_movimm(1,rt); emit_cmovl_imm(0,rt); } -void emit_set_nz32(int rs, int rt) + +static void emit_set_nz32(int rs, int rt) { //assem_debug("set_nz32\n"); if(rs!=rt) emit_movs(rs,rt); else emit_test(rs,rs); emit_cmovne_imm(1,rt); } -void emit_set_gz64_32(int rsh, int rsl, int rt) + +static void emit_set_gz64_32(int rsh, int rsl, int rt) { //assem_debug("set_gz64\n"); emit_set_gz32(rsl,rt); @@ -1557,13 +1590,15 @@ void emit_set_gz64_32(int rsh, int rsl, int rt) emit_cmovne_imm(1,rt); emit_cmovs_imm(0,rt); } -void emit_set_nz64_32(int rsh, int rsl, int rt) + +static void emit_set_nz64_32(int rsh, int rsl, int rt) { //assem_debug("set_nz64\n"); emit_or_and_set_flags(rsh,rsl,rt); emit_cmovne_imm(1,rt); } -void emit_set_if_less32(int rs1, int rs2, int rt) + +static void emit_set_if_less32(int rs1, int rs2, int rt) { //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); @@ -1571,7 +1606,8 @@ void emit_set_if_less32(int rs1, int rs2, int rt) if(rs1==rt||rs2==rt) emit_movimm(0,rt); emit_cmovl_imm(1,rt); } -void emit_set_if_carry32(int rs1, int rs2, int rt) + +static void emit_set_if_carry32(int rs1, int rs2, int rt) { //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); @@ -1579,7 +1615,8 @@ void emit_set_if_carry32(int rs1, int rs2, int rt) if(rs1==rt||rs2==rt) emit_movimm(0,rt); emit_cmovb_imm(1,rt); } -void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) + +static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) { //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); assert(u1!=rt); @@ -1589,7 +1626,8 @@ void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) emit_sbcs(u1,u2,HOST_TEMPREG); emit_cmovl_imm(1,rt); } -void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) + +static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) { //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); assert(u1!=rt); @@ -1600,86 +1638,97 @@ void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) emit_cmovb_imm(1,rt); } -void emit_call(int a) +static void emit_call(int a) { assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); u_int offset=genjmp(a); output_w32(0xeb000000|offset); } -void emit_jmp(int a) + +static void emit_jmp(int a) { assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); u_int offset=genjmp(a); output_w32(0xea000000|offset); } -void emit_jne(int a) + +static void emit_jne(int a) { assem_debug("bne %x\n",a); u_int offset=genjmp(a); output_w32(0x1a000000|offset); } -void emit_jeq(int a) + +static void emit_jeq(int a) { assem_debug("beq %x\n",a); u_int offset=genjmp(a); output_w32(0x0a000000|offset); } -void emit_js(int a) + +static void emit_js(int a) { assem_debug("bmi %x\n",a); u_int offset=genjmp(a); output_w32(0x4a000000|offset); } -void emit_jns(int a) + +static void emit_jns(int a) { assem_debug("bpl %x\n",a); u_int offset=genjmp(a); output_w32(0x5a000000|offset); } -void emit_jl(int a) + +static void emit_jl(int a) { assem_debug("blt %x\n",a); u_int offset=genjmp(a); output_w32(0xba000000|offset); } -void emit_jge(int a) + +static void emit_jge(int a) { assem_debug("bge %x\n",a); u_int offset=genjmp(a); output_w32(0xaa000000|offset); } -void emit_jno(int a) + +static void emit_jno(int a) { assem_debug("bvc %x\n",a); u_int offset=genjmp(a); output_w32(0x7a000000|offset); } -void emit_jc(int a) + +static void emit_jc(int a) { assem_debug("bcs %x\n",a); u_int offset=genjmp(a); output_w32(0x2a000000|offset); } -void emit_jcc(int a) + +static void emit_jcc(int a) { assem_debug("bcc %x\n",a); u_int offset=genjmp(a); output_w32(0x3a000000|offset); } -void emit_callreg(u_int r) +static void emit_callreg(u_int r) { assert(r<15); assem_debug("blx %s\n",regname[r]); output_w32(0xe12fff30|r); } -void emit_jmpreg(u_int r) + +static void emit_jmpreg(u_int r) { assem_debug("mov pc,%s\n",regname[r]); output_w32(0xe1a00000|rd_rn_rm(15,0,r)); } -void emit_readword_indexed(int offset, int rs, int rt) +static void emit_readword_indexed(int offset, int rs, int rt) { assert(offset>-4096&&offset<4096); assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1689,37 +1738,44 @@ void emit_readword_indexed(int offset, int rs, int rt) output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_readword_dualindexedx4(int rs1, int rs2, int rt) + +static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) { assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); } -void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_readword_indexed(addr, rs, rt); else { @@ -1727,7 +1783,8 @@ void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) emit_readword_dualindexedx4(rs, map, rt); } } -void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) + +static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) { if(map<0) { if(rh>=0) emit_readword_indexed(addr, rs, rh); @@ -1739,7 +1796,8 @@ void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) emit_readword_indexed_tlb(addr, rs, map, rl); } } -void emit_movsbl_indexed(int offset, int rs, int rt) + +static void emit_movsbl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1749,7 +1807,8 @@ void emit_movsbl_indexed(int offset, int rs, int rt) output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_movsbl_indexed(addr, rs, rt); else { @@ -1765,7 +1824,8 @@ void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) } } } -void emit_movswl_indexed(int offset, int rs, int rt) + +static void emit_movswl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1775,7 +1835,8 @@ void emit_movswl_indexed(int offset, int rs, int rt) output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_movzbl_indexed(int offset, int rs, int rt) + +static void emit_movzbl_indexed(int offset, int rs, int rt) { assert(offset>-4096&&offset<4096); assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1785,12 +1846,14 @@ void emit_movzbl_indexed(int offset, int rs, int rt) output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) + +static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) { assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_movzbl_indexed(addr, rs, rt); else { @@ -1802,7 +1865,8 @@ void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) } } } -void emit_movzwl_indexed(int offset, int rs, int rt) + +static void emit_movzwl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1812,6 +1876,7 @@ void emit_movzwl_indexed(int offset, int rs, int rt) output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } + static void emit_ldrd(int offset, int rs, int rt) { assert(offset>-256&&offset<256); @@ -1822,35 +1887,40 @@ static void emit_ldrd(int offset, int rs, int rt) output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_readword(int addr, int rt) + +static void emit_readword(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("ldr %s,fp+%d\n",regname[rt],offset); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } -void emit_movsbl(int addr, int rt) + +static unused void emit_movsbl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movswl(int addr, int rt) + +static unused void emit_movswl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movzbl(int addr, int rt) + +static unused void emit_movzbl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_movzwl(int addr, int rt) + +static unused void emit_movzwl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); @@ -1858,7 +1928,7 @@ void emit_movzwl(int addr, int rt) output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_writeword_indexed(int rt, int offset, int rs) +static void emit_writeword_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1868,12 +1938,14 @@ void emit_writeword_indexed(int rt, int offset, int rs) output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) + +static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) { assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) + +static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) { if(map<0) emit_writeword_indexed(rt, addr, rs); else { @@ -1881,7 +1953,8 @@ void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) emit_writeword_dualindexedx4(rt, rs, map); } } -void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) + +static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) { if(map<0) { if(rh>=0) emit_writeword_indexed(rh, addr, rs); @@ -1897,7 +1970,8 @@ void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int } } } -void emit_writehword_indexed(int rt, int offset, int rs) + +static void emit_writehword_indexed(int rt, int offset, int rs) { assert(offset>-256&&offset<256); assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1907,7 +1981,8 @@ void emit_writehword_indexed(int rt, int offset, int rs) output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_writebyte_indexed(int rt, int offset, int rs) + +static void emit_writebyte_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1917,12 +1992,14 @@ void emit_writebyte_indexed(int rt, int offset, int rs) output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) + +static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) { assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) + +static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) { if(map<0) emit_writebyte_indexed(rt, addr, rs); else { @@ -1934,36 +2011,42 @@ void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) } } } -void emit_strcc_dualindexed(int rs1, int rs2, int rt) + +static void emit_strcc_dualindexed(int rs1, int rs2, int rt) { assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_strccb_dualindexed(int rs1, int rs2, int rt) + +static void emit_strccb_dualindexed(int rs1, int rs2, int rt) { assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_strcch_dualindexed(int rs1, int rs2, int rt) + +static void emit_strcch_dualindexed(int rs1, int rs2, int rt) { assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); } -void emit_writeword(int rt, int addr) + +static void emit_writeword(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("str %s,fp+%d\n",regname[rt],offset); output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } -void emit_writehword(int rt, int addr) + +static unused void emit_writehword(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("strh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_writebyte(int rt, int addr) + +static unused void emit_writebyte(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); @@ -1971,7 +2054,7 @@ void emit_writebyte(int rt, int addr) output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); assert(rs1<16); @@ -1980,7 +2063,8 @@ void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) assert(lo<16); output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) + +static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); assert(rs1<16); @@ -1990,19 +2074,19 @@ void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_clz(int rs,int rt) +static void emit_clz(int rs,int rt) { assem_debug("clz %s,%s\n",regname[rt],regname[rs]); output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); } -void emit_subcs(int rs1,int rs2,int rt) +static void emit_subcs(int rs1,int rs2,int rt) { assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); } -void emit_shrcc_imm(int rs,u_int imm,int rt) +static void emit_shrcc_imm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -2010,7 +2094,7 @@ void emit_shrcc_imm(int rs,u_int imm,int rt) output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_shrne_imm(int rs,u_int imm,int rt) +static void emit_shrne_imm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -2018,73 +2102,73 @@ void emit_shrne_imm(int rs,u_int imm,int rt) output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_negmi(int rs, int rt) +static void emit_negmi(int rs, int rt) { assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0x42600000|rd_rn_rm(rt,rs,0)); } -void emit_negsmi(int rs, int rt) +static void emit_negsmi(int rs, int rt) { assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0x42700000|rd_rn_rm(rt,rs,0)); } -void emit_orreq(u_int rs1,u_int rs2,u_int rt) +static void emit_orreq(u_int rs1,u_int rs2,u_int rt) { assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_orrne(u_int rs1,u_int rs2,u_int rt) +static void emit_orrne(u_int rs1,u_int rs2,u_int rt) { assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_teq(int rs, int rt) +static void emit_teq(int rs, int rt) { assem_debug("teq %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); } -void emit_rsbimm(int rs, int imm, int rt) +static void emit_rsbimm(int rs, int imm, int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2093,7 +2177,7 @@ void emit_rsbimm(int rs, int imm, int rt) } // Load 2 immediates optimizing for small code size -void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) { emit_movimm(imm1,rt1); u_int armval; @@ -2109,7 +2193,7 @@ void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) // Conditionally select one of two immediates, optimizing for small code size // This will only be called if HAVE_CMOV_IMM is defined -void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) +static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) { u_int armval; if(genimm(imm2-imm1,&armval)) { @@ -2143,7 +2227,7 @@ void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } // special case for checking invalid_code -void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) { assert(imm<128&&imm>=0); assert(r>=0&&r<16); @@ -2152,7 +2236,7 @@ void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) emit_cmpimm(HOST_TEMPREG,imm); } -void emit_callne(int a) +static void emit_callne(int a) { assem_debug("blne %x\n",a); u_int offset=genjmp(a); @@ -2160,21 +2244,21 @@ void emit_callne(int a) } // Used to preload hash table entries -void emit_prefetchreg(int r) +static unused void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); } // Special case for mini_ht -void emit_ldreq_indexed(int rs, u_int offset, int rt) +static void emit_ldreq_indexed(int rs, u_int offset, int rt) { assert(offset<4096); assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); } -void emit_bicne_imm(int rs,int imm,int rt) +static unused void emit_bicne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2182,7 +2266,7 @@ void emit_bicne_imm(int rs,int imm,int rt) output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_biccs_imm(int rs,int imm,int rt) +static unused void emit_biccs_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2190,7 +2274,7 @@ void emit_biccs_imm(int rs,int imm,int rt) output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_bicvc_imm(int rs,int imm,int rt) +static unused void emit_bicvc_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2198,7 +2282,7 @@ void emit_bicvc_imm(int rs,int imm,int rt) output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_bichi_imm(int rs,int imm,int rt) +static unused void emit_bichi_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2206,7 +2290,7 @@ void emit_bichi_imm(int rs,int imm,int rt) output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_orrvs_imm(int rs,int imm,int rt) +static unused void emit_orrvs_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2214,7 +2298,7 @@ void emit_orrvs_imm(int rs,int imm,int rt) output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_orrne_imm(int rs,int imm,int rt) +static void emit_orrne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2222,7 +2306,7 @@ void emit_orrne_imm(int rs,int imm,int rt) output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_andne_imm(int rs,int imm,int rt) +static void emit_andne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2230,7 +2314,7 @@ void emit_andne_imm(int rs,int imm,int rt) output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); } -void emit_addpl_imm(int rs,int imm,int rt) +static unused void emit_addpl_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2238,7 +2322,7 @@ void emit_addpl_imm(int rs,int imm,int rt) output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_jno_unlikely(int a) +static void emit_jno_unlikely(int a) { //emit_jno(a); assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); @@ -2256,6 +2340,7 @@ static void save_regs_all(u_int reglist) assem_debug("}\n"); output_w32(0xe88b0000|reglist); } + static void restore_regs_all(u_int reglist) { int i; @@ -2267,12 +2352,14 @@ static void restore_regs_all(u_int reglist) assem_debug("}\n"); output_w32(0xe89b0000|reglist); } + // Save registers before function call static void save_regs(u_int reglist) { reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12 save_regs_all(reglist); } + // Restore registers after function call static void restore_regs(u_int reglist) { @@ -2282,7 +2369,7 @@ static void restore_regs(u_int reglist) /* Stubs/epilogue */ -void literal_pool(int n) +static void literal_pool(int n) { if(!literalcount) return; if(n) { @@ -2314,7 +2401,7 @@ void literal_pool(int n) literalcount=0; } -void literal_pool_jumpover(int n) +static void literal_pool_jumpover(int n) { if(!literalcount) return; if(n) { @@ -2326,10 +2413,12 @@ void literal_pool_jumpover(int n) set_jump_target(jaddr,(int)out); } -emit_extjump2(u_int addr, int target, int linker) +static void emit_extjump2(u_int addr, int target, int linker) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); + (void)ptr; + emit_loadlp(target,0); emit_loadlp(addr,1); assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<>12]; @@ -2542,7 +2632,7 @@ u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) } } -inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rth=get_reg(regmap,target|64); @@ -2624,7 +2714,7 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i restore_regs(reglist); } -do_writestub(int n) +static void do_writestub(int n) { assem_debug("do_writestub %x\n",start+stubs[n][3]*4); literal_pool(256); @@ -2708,7 +2798,7 @@ do_writestub(int n) emit_jmp(ra); } -inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,-1); int rth=get_reg(regmap,target|64); @@ -2745,7 +2835,7 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, restore_regs(reglist); } -do_unalignedwritestub(int n) +static void do_unalignedwritestub(int n) { assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); literal_pool(256); @@ -2834,12 +2924,7 @@ do_unalignedwritestub(int n) #endif } -void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a) -{ - printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]); -} - -do_invstub(int n) +static void do_invstub(int n) { literal_pool(20); u_int reglist=stubs[n][3]; @@ -2876,7 +2961,7 @@ int do_dirty_stub(int i) return entry; } -void do_dirty_stub_ds() +static void do_dirty_stub_ds() { // Careful about the code output here, verify_dirty needs to parse it. #ifndef HAVE_ARMV7 @@ -2894,7 +2979,7 @@ void do_dirty_stub_ds() emit_call((int)&verify_code_ds); } -do_cop1stub(int n) +static void do_cop1stub(int n) { literal_pool(256); assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4); @@ -2917,7 +3002,7 @@ do_cop1stub(int n) /* Special assem */ -void shift_assemble_arm(int i,struct regstat *i_regs) +static void shift_assemble_arm(int i,struct regstat *i_regs) { if(rt1[i]) { if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV @@ -3209,7 +3294,7 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) #define shift_assemble shift_assemble_arm -void loadlr_assemble_arm(int i,struct regstat *i_regs) +static void loadlr_assemble_arm(int i,struct regstat *i_regs) { int s,th,tl,temp,temp2,addr,map=-1; int offset; @@ -3338,7 +3423,7 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) } #define loadlr_assemble loadlr_assemble_arm -void cop0_assemble(int i,struct regstat *i_regs) +static void cop0_assemble(int i,struct regstat *i_regs) { if(opcode2[i]==0) // MFC0 { @@ -3525,7 +3610,7 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) } } -void cop2_assemble(int i,struct regstat *i_regs) +static void cop2_assemble(int i,struct regstat *i_regs) { u_int copr=(source[i]>>11)&0x1f; signed char temp=get_reg(i_regs->regmap,-1); @@ -3734,7 +3819,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) } } -void cop1_unusable(int i,struct regstat *i_regs) +static void cop1_unusable(int i,struct regstat *i_regs) { // XXX: should just just do the exception instead if(!cop1_usable) { @@ -3745,28 +3830,28 @@ void cop1_unusable(int i,struct regstat *i_regs) } } -void cop1_assemble(int i,struct regstat *i_regs) +static void cop1_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void fconv_assemble_arm(int i,struct regstat *i_regs) +static void fconv_assemble_arm(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } #define fconv_assemble fconv_assemble_arm -void fcomp_assemble(int i,struct regstat *i_regs) +static void fcomp_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void float_assemble(int i,struct regstat *i_regs) +static void float_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void multdiv_assemble_arm(int i,struct regstat *i_regs) +static void multdiv_assemble_arm(int i,struct regstat *i_regs) { // case 0x18: MULT // case 0x19: MULTU @@ -3891,25 +3976,25 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm -void do_preload_rhash(int r) { +static void do_preload_rhash(int r) { // Don't need this for ARM. On x86, this puts the value 0xf8 into the // register. On ARM the hash can be done with a single instruction (below) } -void do_preload_rhtbl(int ht) { +static void do_preload_rhtbl(int ht) { emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); } -void do_rhash(int rs,int rh) { +static void do_rhash(int rs,int rh) { emit_andimm(rs,0xf8,rh); } -void do_miniht_load(int ht,int rh) { +static void do_miniht_load(int ht,int rh) { assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); } -void do_miniht_jump(int rs,int rh,int ht) { +static void do_miniht_jump(int rs,int rh,int ht) { emit_cmp(rh,rs); emit_ldreq_indexed(ht,4,15); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -3920,7 +4005,7 @@ void do_miniht_jump(int rs,int rh,int ht) { #endif } -void do_miniht_insert(u_int return_address,int rt,int temp) { +static void do_miniht_insert(u_int return_address,int rt,int temp) { #ifndef HAVE_ARMV7 emit_movimm(return_address,rt); // PC into link register add_to_linker((int)out,return_address,1); @@ -3937,7 +4022,7 @@ void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; int hr,reg,new_hr; @@ -3966,7 +4051,7 @@ void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty, /* using strd could possibly help but you'd have to allocate registers in pairs -void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) +static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) { int hr; int wrote=-1; @@ -4023,7 +4108,7 @@ void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint // Clearing the cache is rather slow on ARM Linux, so mark the areas // that need to be cleared, and then only clear these areas once. -void do_clear_cache() +static void do_clear_cache() { int i,j; for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 21e7fed..6501d26 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -83,22 +83,29 @@ struct ll_entry struct ll_entry *next; }; - u_int start; - u_int *source; - char insn[MAXBLOCK][10]; - u_char itype[MAXBLOCK]; - u_char opcode[MAXBLOCK]; - u_char opcode2[MAXBLOCK]; - u_char bt[MAXBLOCK]; - u_char rs1[MAXBLOCK]; - u_char rs2[MAXBLOCK]; - u_char rt1[MAXBLOCK]; - u_char rt2[MAXBLOCK]; - u_char us1[MAXBLOCK]; - u_char us2[MAXBLOCK]; - u_char dep1[MAXBLOCK]; - u_char dep2[MAXBLOCK]; - u_char lt1[MAXBLOCK]; + // used by asm: + u_char *out; + u_int hash_table[65536][4] __attribute__((aligned(16))); + struct ll_entry *jump_in[4096] __attribute__((aligned(16))); + struct ll_entry *jump_dirty[4096]; + + static struct ll_entry *jump_out[4096]; + static u_int start; + static u_int *source; + static char insn[MAXBLOCK][10]; + static u_char itype[MAXBLOCK]; + static u_char opcode[MAXBLOCK]; + static u_char opcode2[MAXBLOCK]; + static u_char bt[MAXBLOCK]; + static u_char rs1[MAXBLOCK]; + static u_char rs2[MAXBLOCK]; + static u_char rt1[MAXBLOCK]; + static u_char rt2[MAXBLOCK]; + static u_char us1[MAXBLOCK]; + static u_char us2[MAXBLOCK]; + static u_char dep1[MAXBLOCK]; + static u_char dep2[MAXBLOCK]; + static u_char lt1[MAXBLOCK]; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -107,52 +114,47 @@ struct ll_entry static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes static u_int smrv_weak_next; - int imm[MAXBLOCK]; - u_int ba[MAXBLOCK]; - char likely[MAXBLOCK]; - char is_ds[MAXBLOCK]; - char ooo[MAXBLOCK]; - uint64_t unneeded_reg[MAXBLOCK]; - uint64_t unneeded_reg_upper[MAXBLOCK]; - uint64_t branch_unneeded_reg[MAXBLOCK]; - uint64_t branch_unneeded_reg_upper[MAXBLOCK]; - uint64_t pr32[MAXBLOCK]; - signed char regmap_pre[MAXBLOCK][HOST_REGS]; + static int imm[MAXBLOCK]; + static u_int ba[MAXBLOCK]; + static char likely[MAXBLOCK]; + static char is_ds[MAXBLOCK]; + static char ooo[MAXBLOCK]; + static uint64_t unneeded_reg[MAXBLOCK]; + static uint64_t unneeded_reg_upper[MAXBLOCK]; + static uint64_t branch_unneeded_reg[MAXBLOCK]; + static uint64_t branch_unneeded_reg_upper[MAXBLOCK]; + static signed char regmap_pre[MAXBLOCK][HOST_REGS]; static uint64_t current_constmap[HOST_REGS]; static uint64_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; - signed char minimum_free_regs[MAXBLOCK]; - u_int needed_reg[MAXBLOCK]; - u_int wont_dirty[MAXBLOCK]; - u_int will_dirty[MAXBLOCK]; - int ccadj[MAXBLOCK]; - int slen; - u_int instr_addr[MAXBLOCK]; - u_int link_addr[MAXBLOCK][3]; - int linkcount; - u_int stubs[MAXBLOCK*3][8]; - int stubcount; - u_int literals[1024][2]; - int literalcount; - int is_delayslot; - int cop1_usable; - u_char *out; - struct ll_entry *jump_in[4096] __attribute__((aligned(16))); - struct ll_entry *jump_out[4096]; - struct ll_entry *jump_dirty[4096]; - u_int hash_table[65536][4] __attribute__((aligned(16))); - char shadow[1048576] __attribute__((aligned(16))); - void *copy; - int expirep; - int new_dynarec_did_compile; - int new_dynarec_hacks; - u_int stop_after_jal; + static signed char minimum_free_regs[MAXBLOCK]; + static u_int needed_reg[MAXBLOCK]; + static u_int wont_dirty[MAXBLOCK]; + static u_int will_dirty[MAXBLOCK]; + static int ccadj[MAXBLOCK]; + static int slen; + static u_int instr_addr[MAXBLOCK]; + static u_int link_addr[MAXBLOCK][3]; + static int linkcount; + static u_int stubs[MAXBLOCK*3][8]; + static int stubcount; + static u_int literals[1024][2]; + static int literalcount; + static int is_delayslot; + static int cop1_usable; + static char shadow[1048576] __attribute__((aligned(16))); + static void *copy; + static int expirep; + static u_int stop_after_jal; #ifndef RAM_FIXED static u_int ram_offset; #else static const u_int ram_offset=0; #endif + + int new_dynarec_hacks; + int new_dynarec_did_compile; extern u_char restore_candidate[512]; extern int cycle_count; @@ -254,15 +256,20 @@ void jump_intcall(); void new_dyna_leave(); // Needed by assembler -void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); -void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); -void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); -void load_all_regs(signed char i_regmap[]); -void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); -void load_regs_entry(int t); -void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); +static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); +static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); +static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); +static void load_all_regs(signed char i_regmap[]); +static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); +static void load_regs_entry(int t); +static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); + +static int verify_dirty(u_int *ptr); +static int get_final_value(int hr, int i, int *value); +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e); +static void add_to_linker(int addr,int target,int ext); -int tracedebug=0; +static int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 @@ -1747,7 +1754,7 @@ static void pagespan_alloc(struct regstat *current,int i) //else ... } -add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) { stubs[stubcount][0]=type; stubs[stubcount][1]=addr; @@ -3703,7 +3710,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } -int get_final_value(int hr, int i, int *value) +static int get_final_value(int hr, int i, int *value) { int reg=regs[i].regmap[hr]; while(iregmap; - int addr=get_reg(i_regmap,AGEN1+(i&1)); - int rth,rt; - int ds; + int rt; if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { - rth=get_reg(i_regmap,FTEMP|64); rt=get_reg(i_regmap,FTEMP); }else{ - rth=get_reg(i_regmap,rt1[i]|64); rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); @@ -2635,7 +2631,6 @@ static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_h static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); - int rth=get_reg(regmap,target|64); int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); @@ -2725,14 +2720,10 @@ static void do_writestub(int n) struct regstat *i_regs=(struct regstat *)stubs[n][5]; u_int reglist=stubs[n][7]; signed char *i_regmap=i_regs->regmap; - int addr=get_reg(i_regmap,AGEN1+(i&1)); - int rth,rt,r; - int ds; + int rt,r; if(itype[i]==C1LS||itype[i]==C2LS) { - rth=get_reg(i_regmap,FTEMP|64); rt=get_reg(i_regmap,r=FTEMP); }else{ - rth=get_reg(i_regmap,rs2[i]|64); rt=get_reg(i_regmap,r=rs2[i]); } assert(rs>=0); @@ -2801,7 +2792,6 @@ static void do_writestub(int n) static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,-1); - int rth=get_reg(regmap,target|64); int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); @@ -2848,7 +2838,6 @@ static void do_unalignedwritestub(int n) signed char *i_regmap=i_regs->regmap; int temp2=get_reg(i_regmap,FTEMP); int rt; - int ds, real_rs; rt=get_reg(i_regmap,rs2[i]); assert(rt>=0); assert(addr>=0); @@ -3232,7 +3221,7 @@ static int get_ptr_mem_type(u_int a) static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { - int jaddr,type=0; + int jaddr=0,type=0; int mr=rs1[i]; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -3700,7 +3689,6 @@ static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) static void c2op_assemble(int i,struct regstat *i_regs) { - signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; u_int hr,reglist_full=0,reglist; int need_flags,need_ir; @@ -4025,7 +4013,7 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; - int hr,reg,new_hr; + int hr,reg; for(hr=0;hrvaddr==vaddr) { //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; ht_bin[3]=ht_bin[1]; ht_bin[2]=ht_bin[0]; - ht_bin[1]=(int)head->addr; + ht_bin[1]=(u_int)head->addr; ht_bin[0]=vaddr; return head->addr; } @@ -334,9 +334,9 @@ void *get_addr(u_int vaddr) restore_candidate[vpage>>3]|=1<<(vpage&7); } else restore_candidate[page>>3]|=1<<(page&7); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) { - ht_bin[1]=(int)head->addr; // Replace existing entry + ht_bin[1]=(u_int)head->addr; // Replace existing entry } else { @@ -366,7 +366,7 @@ void *get_addr(u_int vaddr) void *get_addr_ht(u_int vaddr) { //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) return (void *)ht_bin[1]; if(ht_bin[2]==vaddr) return (void *)ht_bin[3]; return get_addr(vaddr); @@ -634,6 +634,7 @@ int needed_again(int r, int i) } }*/ if(rn<10) return 1; + (void)b; return 0; } @@ -781,7 +782,7 @@ void *check_addr(u_int vaddr) void remove_hash(int vaddr) { //printf("remove hash: %x\n",vaddr); - int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF]; if(ht_bin[2]==vaddr) { ht_bin[2]=ht_bin[3]=-1; } @@ -817,7 +818,7 @@ void ll_clear(struct ll_entry **head) { struct ll_entry *cur; struct ll_entry *next; - if(cur=*head) { + if((cur=*head)) { *head=0; while(cur) { next=cur->next; @@ -991,7 +992,7 @@ void invalidate_addr(u_int addr) // Anything could have changed, so invalidate everything. void invalidate_all_pages() { - u_int page,n; + u_int page; for(page=0;page<4096;page++) invalidate_page(page); for(page=0;page<1048576;page++) @@ -1014,6 +1015,7 @@ void add_link(u_int vaddr,void *src) inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page); int *ptr=(int *)(src+4); assert((*ptr&0x0fff0000)==0x059f0000); + (void)ptr; ll_add(jump_out+page,vaddr,src); //int ptr=get_pointer(src); //inv_debug("add_link: Pointer is to %x\n",(int)ptr); @@ -1033,7 +1035,7 @@ void clean_blocks(u_int page) // Don't restore blocks which are about to expire from the cache if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { u_int start,end; - if(verify_dirty((int)head->addr)) { + if(verify_dirty(head->addr)) { //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); u_int i; u_int inv=0; @@ -1054,12 +1056,12 @@ void clean_blocks(u_int page) //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); - int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; if(ht_bin[0]==head->vaddr) { - ht_bin[1]=(int)clean_addr; // Replace existing entry + ht_bin[1]=(u_int)clean_addr; // Replace existing entry } if(ht_bin[2]==head->vaddr) { - ht_bin[3]=(int)clean_addr; // Replace existing entry + ht_bin[3]=(u_int)clean_addr; // Replace existing entry } } } @@ -2333,23 +2335,25 @@ void imm16_assemble(int i,struct regstat *i_regs) emit_mov(sh,th); } } - if(opcode[i]==0x0d) //ORI - if(sl<0) { - emit_orimm(tl,imm[i],tl); - }else{ - if(!((i_regs->wasconst>>sl)&1)) - emit_orimm(sl,imm[i],tl); - else - emit_movimm(constmap[i][sl]|imm[i],tl); + if(opcode[i]==0x0d) { // ORI + if(sl<0) { + emit_orimm(tl,imm[i],tl); + }else{ + if(!((i_regs->wasconst>>sl)&1)) + emit_orimm(sl,imm[i],tl); + else + emit_movimm(constmap[i][sl]|imm[i],tl); + } } - if(opcode[i]==0x0e) //XORI - if(sl<0) { - emit_xorimm(tl,imm[i],tl); - }else{ - if(!((i_regs->wasconst>>sl)&1)) - emit_xorimm(sl,imm[i],tl); - else - emit_movimm(constmap[i][sl]^imm[i],tl); + if(opcode[i]==0x0e) { // XORI + if(sl<0) { + emit_xorimm(tl,imm[i],tl); + }else{ + if(!((i_regs->wasconst>>sl)&1)) + emit_xorimm(sl,imm[i],tl); + else + emit_movimm(constmap[i][sl]^imm[i],tl); + } } } else { @@ -2533,7 +2537,7 @@ void load_assemble(int i,struct regstat *i_regs) //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... - if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) + if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)) ||rt1[i]==0) { // could be FIFO, must perform the read // ||dummy read @@ -2790,7 +2794,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_call((int)memdebug); //emit_popa(); restore_regs(0x100f); - }/**/ + }*/ } #ifndef loadlr_assemble @@ -2806,7 +2810,7 @@ void store_assemble(int i,struct regstat *i_regs) int s,th,tl,map=-1; int addr,temp; int offset; - int jaddr=0,jaddr2,type; + int jaddr=0,type; int memtarget=0,c=0; int agr=AGEN1+(i&1); int faststore_reg_override=0; @@ -2925,7 +2929,7 @@ void store_assemble(int i,struct regstat *i_regs) #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[addr]); #else - jaddr2=(int)out; + int jaddr2=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX @@ -3381,6 +3386,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX @@ -3392,6 +3398,7 @@ void intcall_assemble(int i,struct regstat *i_regs) signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4,0); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); @@ -4636,7 +4643,6 @@ static void ujump_assemble_write_ra(int i) void ujump_assemble(int i,struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; int ra_done=0; if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -4644,6 +4650,7 @@ void ujump_assemble(int i,struct regstat *i_regs) int temp=get_reg(branch_regs[i].regmap,PTEMP); if(rt1[i]==31&&temp>=0) { + signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); @@ -4709,9 +4716,8 @@ static void rjump_assemble_write_ra(int i) void rjump_assemble(int i,struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; int temp; - int rs,cc,adj; + int rs,cc; int ra_done=0; rs=get_reg(branch_regs[i].regmap,rs1[i]); assert(rs>=0); @@ -4728,6 +4734,7 @@ void rjump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { + signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4756,6 +4763,7 @@ void rjump_assemble(int i,struct regstat *i_regs) rjump_assemble_write_ra(i); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); + (void)cc; #ifdef USE_MINI_HT int rh=get_reg(branch_regs[i].regmap,RHASH); int ht=get_reg(branch_regs[i].regmap,RHTBL); @@ -5761,7 +5769,6 @@ static void pagespan_assemble(int i,struct regstat *i_regs) int s1h=get_reg(i_regs->regmap,rs1[i]|64); int s2l=get_reg(i_regs->regmap,rs2[i]); int s2h=get_reg(i_regs->regmap,rs2[i]|64); - void *nt_branch=NULL; int taken=0; int nottaken=0; int unconditional=0; @@ -5778,7 +5785,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) s1h=s2h=-1; } int hr=0; - int addr,alt,ntaddr; + int addr=-1,alt=-1,ntaddr=-1; if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;} else { while(hr>r)&1));*/} + }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/} } } } @@ -6783,7 +6790,7 @@ void clean_registers(int istart,int iend,int wr) if(r!=EXCLUDE_REG) { if(regs[i].regmap[r]==regmap_pre[i+1][r]) { regs[i+1].wasdirty&=wont_dirty_i|~(1<>r)&1));*/} + }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/} } } } @@ -6826,7 +6833,7 @@ void clean_registers(int istart,int iend,int wr) wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>r)&1));*/ + /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/ } } } @@ -6988,7 +6995,6 @@ void new_dynarec_init() if (mprotect(out, 1<=0) { if(r!=regmap_pre[i][hr]) { @@ -10151,7 +10159,7 @@ int new_recompile_block(int addr) // replace it with the new address. // Don't add new entries. We'll insert the // ones that actually get used in check_addr(). - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) { ht_bin[1]=entry_point; } @@ -10219,7 +10227,7 @@ int new_recompile_block(int addr) case 2: // Clear hash table for(i=0;i<32;i++) { - int *ht_bin=hash_table[((expirep&2047)<<5)+i]; + u_int *ht_bin=hash_table[((expirep&2047)<<5)+i]; if((ht_bin[3]>>shift)==(base>>shift) || ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]); -- cgit v1.2.3 From 1fedf1ea555e4a6be68dd0ba384909ac21da65d0 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 19 Sep 2016 00:00:48 +0300 Subject: drc: remove unnecessary cache flushing should already be flushed or not matter at this point --- libpcsxcore/new_dynarec/new_dynarec.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ca12ea9..bfe3961 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1000,9 +1000,6 @@ void invalidate_all_pages() restore_candidate[(page&2047)>>3]|=1<<(page&7); restore_candidate[((page&2047)>>3)+256]|=1<<(page&7); } - #ifdef __arm__ - __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<>6)+8; } @@ -4094,6 +4090,17 @@ static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dir #define wb_invalidate wb_invalidate_arm */ +static void mark_clear_cache(void *target) +{ + u_long offset = (char *)target - (char *)BASE_ADDR; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((u_long)target & ~4095ul); + start_tcache_write(start, start + 4096); + needs_clear_cache[offset >> 17] |= mask; + } +} + // Clearing the cache is rather slow on ARM Linux, so mark the areas // that need to be cleared, and then only clear these areas once. static void do_clear_cache() @@ -4115,7 +4122,7 @@ static void do_clear_cache() end+=4096; j++; }else{ - __clear_cache((void *)start,(void *)end); + end_tcache_write((void *)start,(void *)end); break; } } diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 2d10ac7..acf65bd 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -5,21 +5,9 @@ #define HOST_IMM8 1 #define HAVE_CMOV_IMM 1 -#define CORTEX_A8_BRANCH_PREDICTION_HACK 1 -#define USE_MINI_HT 1 -//#define REG_PREFETCH 1 #define HAVE_CONDITIONAL_CALL 1 #define RAM_SIZE 0x200000 -#ifndef __ARM_ARCH_7A__ -//#undef CORTEX_A8_BRANCH_PREDICTION_HACK -//#undef USE_MINI_HT -#endif - -#ifndef BASE_ADDR_FIXED -#define BASE_ADDR_FIXED 0 -#endif - #define REG_SHIFT 2 /* ARM calling convention: diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 95af8b4..147b0df 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -20,6 +20,7 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include "arm_features.h" +#include "new_dynarec_config.h" #include "linkage_offsets.h" @@ -160,9 +161,10 @@ ptr_hash_table: #endif .endm +/* r0 = virtual target address */ +/* r1 = instruction to patch */ .macro dyna_linker_main - /* r0 = virtual target address */ - /* r1 = instruction to patch */ +#ifndef NO_WRITE_EXEC load_varadr_ext r3, jump_in /* get_page */ lsr r2, r0, #12 @@ -242,6 +244,11 @@ ptr_hash_table: str r3, [r6, #12] mov pc, r1 8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif .endm diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bfe3961..ec2a6fa 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -23,7 +23,11 @@ #include #include #include +#ifdef __MACH__ +#include +#endif +#include "new_dynarec_config.h" #include "emu_if.h" //emulator interface //#define DISASM @@ -42,19 +46,6 @@ #include "assem_arm.h" #endif -#ifdef __BLACKBERRY_QNX__ -#undef __clear_cache -#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); -#elif defined(__MACH__) -#include -#define __clear_cache mach_clear_cache -static void __clear_cache(void *start, void *end) { - size_t len = (char *)end - (char *)start; - sys_dcache_flush(start, len); - sys_icache_invalidate(start, len); -} -#endif - #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -271,6 +262,56 @@ static void add_to_linker(int addr,int target,int ext); static int tracedebug=0; +static void mprotect_w_x(void *start, void *end, int is_x) +{ +#ifdef NO_WRITE_EXEC + u_long mstart = (u_long)start & ~4095ul; + u_long mend = (u_long)end; + if (mprotect((void *)mstart, mend - mstart, + PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0) + SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno)); +#endif +} + +static void start_tcache_write(void *start, void *end) +{ + mprotect_w_x(start, end, 0); +} + +static void end_tcache_write(void *start, void *end) +{ +#ifdef __arm__ + size_t len = (char *)end - (char *)start; + #if defined(__BLACKBERRY_QNX__) + msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); + #elif defined(__MACH__) + sys_cache_control(kCacheFunctionPrepareForExecution, start, len); + #elif defined(VITA) + int block = sceKernelFindMemBlockByAddr(start, len); + sceKernelSyncVMDomain(block, start, len); + #else + __clear_cache(start, end); + #endif + (void)len; +#endif + + mprotect_w_x(start, end, 1); +} + +static void *start_block(void) +{ + u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; + if (end > (u_char *)BASE_ADDR + (1<addr); @@ -838,10 +879,11 @@ void ll_kill_pointers(struct ll_entry *head,int addr,int shift) (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))) { inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr); - u_int host_addr=(u_int)kill_pointer(head->addr); + void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ - needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + mark_clear_cache(host_addr); #endif + set_jump_target((int)host_addr,(int)head->addr); } head=head->next; } @@ -865,10 +907,11 @@ void invalidate_page(u_int page) jump_out[page]=0; while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr); - u_int host_addr=(u_int)kill_pointer(head->addr); + void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ - needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + mark_clear_cache(host_addr); #endif + set_jump_target((int)host_addr,(int)head->addr); next=head->next; free(head); head=next; @@ -6936,13 +6979,14 @@ static void disassemble_inst(int i) {} static int new_dynarec_test(void) { int (*testfunc)(void) = (void *)out; + void *beginning; int ret; + + beginning = start_block(); emit_movimm(DRC_TEST_VAL,0); // test emit_jmpreg(14); literal_pool(0); -#ifdef __arm__ - __clear_cache((void *)testfunc, out); -#endif + end_block(beginning); SysPrintf("testing if we can run recompiled code..\n"); ret = testfunc(); if (ret == DRC_TEST_VAL) @@ -6987,7 +7031,7 @@ void new_dynarec_init() -1, 0) <= 0) { SysPrintf("mmap() failed: %s\n", strerror(errno)); } -#else +#elif !defined(NO_WRITE_EXEC) // not all systems allow execute in data segment by default if (mprotect(out, 1<>12]=0; emit_movimm(start,0); emit_writeword(0,(int)&pcaddr); emit_jmp((int)new_dyna_leave); literal_pool(0); -#ifdef __arm__ - __clear_cache((void *)beginning,out); -#endif + end_block(beginning); ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } @@ -9883,7 +9926,7 @@ int new_recompile_block(int addr) cop1_usable=0; uint64_t is32_pre=0; u_int dirty_pre=0; - u_int beginning=(u_int)out; + void *beginning=start_block(); if((u_int)addr&1) { ds=1; pagespan_ds(); @@ -10173,14 +10216,12 @@ int new_recompile_block(int addr) // Align code if(((u_int)out)&7) emit_addnop(13); #endif - assert((u_int)out-beginning>3)+256]|=1<<(page&7); } #ifdef __arm__ - #if defined(VITA) - sceKernelCloseVMDomain(); - #endif __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<Country_code&0xFF) + switch (ROM_HEADER->Country_code&0xFF) { case 0x45: // U addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E + break; + case 0x4A: // J + addr=0x34b70; + break; + case 0x50: // E addr=0x329f0; - break; - default: + break; + default: // Unknown country code addr=0; break; @@ -533,7 +526,7 @@ static void flush_dirty_uppers(struct regstat *cur) for (hr=0;hrdirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -690,7 +683,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -793,7 +786,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -834,7 +827,7 @@ void mult64(uint64_t m1,uint64_t m2) unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; int sign = 0; - + if (m1 < 0) { op2 = -m1; @@ -847,22 +840,22 @@ void mult64(uint64_t m1,uint64_t m2) sign = 1 - sign; } else op4 = m2; - + op1 = op2 & 0xFFFFFFFF; op2 = (op2 >> 32) & 0xFFFFFFFF; op3 = op4 & 0xFFFFFFFF; op4 = (op4 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); if (sign) @@ -878,25 +871,25 @@ void multu64(uint64_t m1,uint64_t m2) unsigned long long int op1, op2, op3, op4; unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; - + op1 = m1 & 0xFFFFFFFF; op2 = (m1 >> 32) & 0xFFFFFFFF; op3 = m2 & 0xFFFFFFFF; op4 = (m2 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - + //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); } @@ -1018,7 +1011,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -1114,7 +1107,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; #ifndef DISABLE_TLB @@ -3393,7 +3386,7 @@ void storelr_assemble(int i,struct regstat *i_regs) if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); gen_tlb_addr_w(temp,map); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif }else{ // using tlb @@ -4166,7 +4159,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int mgr=MGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4764,7 +4757,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -5201,7 +5194,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -5235,7 +5228,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -5261,7 +5254,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5282,10 +5275,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -5336,7 +5329,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5365,7 +5358,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -5499,7 +5492,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -5554,7 +5547,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -5626,7 +5619,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5756,7 +5749,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5939,7 +5932,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -6026,7 +6019,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -6274,7 +6267,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6782,14 +6775,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -7052,7 +7045,7 @@ static void provisional_32bit() int i,j; uint64_t is32=1; uint64_t lastbranch=1; - + for(i=0;i0) { @@ -7089,13 +7082,13 @@ static void provisional_32bit() uint64_t temp_is32=is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) //temp_is32&=branch_regs[j].is32; temp_is32&=p32[j]; } for(j=i;j=0;i--) { int hr; @@ -7380,7 +7373,7 @@ static void provisional_r32() } //requires_32bit[i]=r32; pr32[i]=r32; - + // Dirty registers which are 32-bit, require 32-bit input // as they will be written as 32-bit values for(hr=0;hristart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7981,28 +7974,17 @@ void new_dynarec_init() { SysPrintf("Init new dynarec\n"); out=(u_char *)BASE_ADDR; -#if defined(VITA) - +#if BASE_ADDR_FIXED if (mmap (out, 1< %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -8264,7 +8246,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -8924,7 +8906,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -8954,7 +8936,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0;j--) { - if(ba[j]==start+i*4+4) + if(ba[j]==start+i*4+4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9077,12 +9059,12 @@ int new_recompile_block(int addr) uint64_t temp_is32=current.is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4+8) + if(ba[j]==start+i*4+8) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9190,7 +9172,7 @@ int new_recompile_block(int addr) } } else { // First instruction expects CCREG to be allocated - if(i==0&&hr==HOST_CCREG) + if(i==0&&hr==HOST_CCREG) regs[i].regmap_entry[hr]=CCREG; else regs[i].regmap_entry[hr]=-1; @@ -9525,7 +9507,7 @@ int new_recompile_block(int addr) pagespan_alloc(¤t,i); break; } - + // Drop the upper half of registers that have become 32-bit current.uu|=current.is32&((1LL<=0;i--) { int hr; @@ -10083,7 +10065,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -10273,10 +10255,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -10446,10 +10428,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -10522,7 +10504,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -10564,7 +10546,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -10744,7 +10726,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -10919,7 +10901,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -10943,16 +10925,16 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ #ifndef FORCE32 provisional_r32(); u_int r32=0; - + for (i=slen-1;i>=0;i--) { int hr; @@ -11048,7 +11030,7 @@ int new_recompile_block(int addr) if((regs[i].was32>>dep2[i])&1) r32|=1LL<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -11636,9 +11618,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -11680,7 +11662,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From 186935dccdeb09590c0858b7510c769f5ccb06de Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 21 Sep 2016 01:06:32 +0300 Subject: drc: avoid MAP_FIXED it's almost never a good idea, it will override some other mapping and make things crash --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ec2a6fa..b0bfb23 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7027,8 +7027,8 @@ void new_dynarec_init() #if BASE_ADDR_FIXED if (mmap (out, 1< #endif +#ifdef _3DS +#include <3ds_utils.h> +#endif +#ifdef VITA +#include +static int sceBlock; +#endif #include "new_dynarec_config.h" #include "emu_if.h" //emulator interface @@ -265,11 +272,20 @@ static int tracedebug=0; static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC + #if defined(VITA) + // *Open* enables write on all memory that was + // allocated by sceKernelAllocMemBlockForVM()? + if (is_x) + sceKernelCloseVMDomain(); + else + sceKernelOpenVMDomain(); + #else u_long mstart = (u_long)start & ~4095ul; u_long mend = (u_long)end; if (mprotect((void *)mstart, mend - mstart, PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0) SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno)); + #endif #endif } @@ -287,8 +303,9 @@ static void end_tcache_write(void *start, void *end) #elif defined(__MACH__) sys_cache_control(kCacheFunctionPrepareForExecution, start, len); #elif defined(VITA) - int block = sceKernelFindMemBlockByAddr(start, len); - sceKernelSyncVMDomain(block, start, len); + sceKernelSyncVMDomain(sceBlock, start, len); + #elif defined(_3DS) + ctr_flush_invalidate_cache(); #else __clear_cache(start, end); #endif @@ -7023,19 +7040,43 @@ void new_dynarec_clear_full() void new_dynarec_init() { SysPrintf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; -#if BASE_ADDR_FIXED - if (mmap (out, 1< static int sceBlock; +int getVMBlock(); #endif #include "new_dynarec_config.h" @@ -53,6 +54,10 @@ static int sceBlock; #include "assem_arm.h" #endif +#ifdef VITA +int _newlib_vm_size_user = 1 << TARGET_SIZE_2; +#endif + #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -7054,12 +7059,13 @@ void new_dynarec_init() } #elif defined(BASE_ADDR_DYNAMIC) #ifdef VITA - sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); + sceBlock = getVMBlock();//sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); if (sceBlock < 0) SysPrintf("sceKernelAllocMemBlockForVM failed\n"); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&translation_cache); if (ret < 0) SysPrintf("sceKernelGetMemBlockBase failed\n"); + sceClibPrintf("translation_cache = 0x%08X \n ", translation_cache); #else translation_cache = mmap (NULL, 1 << TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -7097,8 +7103,8 @@ void new_dynarec_cleanup() int n; #if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC) #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap ((void *)BASE_ADDR, 1<>2)|0xF00; + //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2); + } + else if(ptr[3]==0x72) { + // generated by emit_jno_unlikely + if((target-(u_int)ptr2-8)<1024) { + assert((addr&3)==0); + assert((target&3)==0); + *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; + } + else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) { + assert((addr&3)==0); + assert((target&3)==0); + *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00; + } + else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } + else { + assert((ptr[3]&0x0e)==0xa); + *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } +} + +// This optionally copies the instruction from the target of the branch into +// the space before the branch. Works, but the difference in speed is +// usually insignificant. +#if 0 +static void set_jump_target_fillslot(int addr,u_int target,int copy) +{ + u_char *ptr=(u_char *)addr; + u_int *ptr2=(u_int *)ptr; + assert(!copy||ptr2[-1]==0xe28dd000); + if(ptr[3]==0xe2) { + assert(!copy); + assert((target-(u_int)ptr2-8)<4096); + *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8); + } + else { + assert((ptr[3]&0x0e)==0xa); + u_int target_insn=*(u_int *)target; + if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags + copy=0; + } + if((target_insn&0x0c100000)==0x04100000) { // Load + copy=0; + } + if(target_insn&0x08000000) { + copy=0; + } + if(copy) { + ptr2[-1]=target_insn; + target+=4; + } + *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } +} +#endif + +/* Literal pool */ +static void add_literal(int addr,int val) +{ + assert(literalcount>6)+8; +} + +// Find the "clean" entry point from a "dirty" entry point +// by skipping past the call to verify_code +static u_int get_clean_addr(int addr) +{ + int *ptr=(int *)addr; + #ifndef HAVE_ARMV7 + ptr+=4; + #else + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + ptr++; + if((*ptr&0xFF000000)==0xea000000) { + return (int)ptr+((*ptr<<8)>>6)+8; // follow jump + } + return (u_int)ptr; +} + +static int verify_dirty(u_int *ptr) +{ + #ifndef HAVE_ARMV7 + // get from literal pool + assert((*ptr&0xFFFF0000)==0xe59f0000); + u_int offset=*ptr&0xfff; + u_int *l_ptr=(void *)ptr+offset+8; + u_int source=l_ptr[0]; + u_int copy=l_ptr[1]; + u_int len=l_ptr[2]; + ptr+=4; + #else + // ARMv7 movw/movt + assert((*ptr&0xFFF00000)==0xe3000000); + u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); + u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); + u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + //printf("verify_dirty: %x %x %x\n",source,copy,len); + return !memcmp((void *)source,(void *)copy,len); +} + +// This doesn't necessarily find all clean entry points, just +// guarantees that it's not dirty +static int isclean(int addr) +{ + #ifndef HAVE_ARMV7 + u_int *ptr=((u_int *)addr)+4; + #else + u_int *ptr=((u_int *)addr)+6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0; + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; + return 1; +} + +// get source that block at addr was compiled from (host pointers) +static void get_bounds(int addr,u_int *start,u_int *end) +{ + u_int *ptr=(u_int *)addr; + #ifndef HAVE_ARMV7 + // get from literal pool + assert((*ptr&0xFFFF0000)==0xe59f0000); + u_int offset=*ptr&0xfff; + u_int *l_ptr=(void *)ptr+offset+8; + u_int source=l_ptr[0]; + //u_int copy=l_ptr[1]; + u_int len=l_ptr[2]; + ptr+=4; + #else + // ARMv7 movw/movt + assert((*ptr&0xFFF00000)==0xe3000000); + u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); + //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); + u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + *start=source; + *end=source+len; +} + +/* Register allocation */ + +// Note: registers are allocated clean (unmodified state) +// if you intend to modify the register, you must call dirty_reg(). +static void alloc_reg(struct regstat *cur,int i,signed char reg) +{ + int r,hr; + int preferred_reg = (reg&7); + if(reg==CCREG) preferred_reg=HOST_CCREG; + if(reg==PTEMP||reg==FTEMP) preferred_reg=12; + + // Don't allocate unused registers + if((cur->u>>reg)&1) return; + + // see if it's already allocated + for(hr=0;hrregmap[hr]==reg) return; + } + + // Keep the same mapping if the register was already allocated in a loop + preferred_reg = loop_reg(i,reg,preferred_reg); + + // Try to allocate the preferred register + if(cur->regmap[preferred_reg]==-1) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; + if(r<64&&((cur->u>>r)&1)) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} + } + else + { + if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} + } + } + } + // Try to allocate any available register, but prefer + // registers that have not been used recently. + if(i>0) { + for(hr=0;hrregmap[hr]==-1) { + if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if(hsn[r=cur->regmap[preferred_reg]&63]==j) { + for(hr=0;hrregmap[hr]&63)==r) { + cur->regmap[hr]=-1; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<uu>>reg)&1) return; + + // see if the upper half is already allocated + for(hr=0;hrregmap[hr]==reg+64) return; + } + + // Keep the same mapping if the register was already allocated in a loop + preferred_reg = loop_reg(i,reg,preferred_reg); + + // Try to allocate the preferred register + if(cur->regmap[preferred_reg]==-1) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; + if(r<64&&((cur->u>>r)&1)) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=0;hr--) + { + r=cur->regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} + } + else + { + if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} + } + } + } + // Try to allocate any available register, but prefer + // registers that have not been used recently. + if(i>0) { + for(hr=0;hrregmap[hr]==-1) { + if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if(hsn[r=cur->regmap[preferred_reg]&63]==j) { + for(hr=0;hrregmap[hr]&63)==r) { + cur->regmap[hr]=-1; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg|64; + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; + } + + // Try to allocate any available register + for(hr=HOST_REGS-1;hr>=0;hr--) { + if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;hr--) + { + r=cur->regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) { + if(i==0||((unneeded_reg[i-1]>>r)&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<uu>>(r&63))&1) { + if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hr2) { + if(cur->regmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<2) { + if(cur->regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[n]==reg) { + dirty=(cur->dirty>>n)&1; + cur->regmap[n]=-1; + } + } + + cur->regmap[hr]=reg; + cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<0) + { + if(imm<256) { + *encoded=((i&30)<<7)|imm; + return 1; + } + imm=(imm>>2)|(imm<<30);i-=2; + } + return 0; +} + +static void genimm_checked(u_int imm,u_int *encoded) +{ + u_int ret=genimm(imm,encoded); + assert(ret); + (void)ret; +} + +static u_int genjmp(u_int addr) +{ + int offset=addr-(int)out-8; + if(offset<-33554432||offset>=33554432) { + if (addr>2) { + SysPrintf("genjmp: out of range: %08x\n", offset); + exit(1); + } + return 0; + } + return ((u_int)offset>>2)&0xffffff; +} + +static void emit_mov(int rs,int rt) +{ + assem_debug("mov %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_movs(int rs,int rt) +{ + assem_debug("movs %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_add(int rs1,int rs2,int rt) +{ + assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adds(int rs1,int rs2,int rt) +{ + assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adcs(int rs1,int rs2,int rt) +{ + assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbc(int rs1,int rs2,int rt) +{ + assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbcs(int rs1,int rs2,int rt) +{ + assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_neg(int rs, int rt) +{ + assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); +} + +static void emit_negs(int rs, int rt) +{ + assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); +} + +static void emit_sub(int rs1,int rs2,int rt) +{ + assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_subs(int rs1,int rs2,int rt) +{ + assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_zeroreg(int rt) +{ + assem_debug("mov %s,#0\n",regname[rt]); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); +} + +static void emit_loadlp(u_int imm,u_int rt) +{ + add_literal((int)out,imm); + assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); + output_w32(0xe5900000|rd_rn_rm(rt,15,0)); +} + +static void emit_movw(u_int imm,u_int rt) +{ + assert(imm<65536); + assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); + output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); +} + +static void emit_movt(u_int imm,u_int rt) +{ + assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); + output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); +} + +static void emit_movimm(u_int imm,u_int rt) +{ + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],imm); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + }else if(genimm(~imm,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],imm); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + }else if(imm<65536) { + #ifndef HAVE_ARMV7 + assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00); + output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + #else + emit_movw(imm,rt); + #endif + }else{ + #ifndef HAVE_ARMV7 + emit_loadlp(imm,rt); + #else + emit_movw(imm&0x0000FFFF,rt); + emit_movt(imm&0xFFFF0000,rt); + #endif + } +} + +static void emit_pcreladdr(u_int rt) +{ + assem_debug("add %s,pc,#?\n",regname[rt]); + output_w32(0xe2800000|rd_rn_rm(rt,15,0)); +} + +static void emit_loadreg(int r, int hr) +{ + if(r&64) { + SysPrintf("64bit load in 32bit mode!\n"); + assert(0); + return; + } + if((r&63)==0) + emit_zeroreg(hr); + else { + int addr=((int)reg)+((r&63)<>4); + if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); + if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); + if(r==CCREG) addr=(int)&cycle_count; + if(r==CSREG) addr=(int)&Status; + if(r==FSREG) addr=(int)&FCR31; + if(r==INVCP) addr=(int)&invc_ptr; + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldr %s,fp+%d\n",regname[hr],offset); + output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); + } +} + +static void emit_storereg(int r, int hr) +{ + if(r&64) { + SysPrintf("64bit store in 32bit mode!\n"); + assert(0); + return; + } + int addr=((int)reg)+((r&63)<>4); + if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); + if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); + if(r==CCREG) addr=(int)&cycle_count; + if(r==FSREG) addr=(int)&FCR31; + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("str %s,fp+%d\n",regname[hr],offset); + output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); +} + +static void emit_test(int rs, int rt) +{ + assem_debug("tst %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); +} + +static void emit_testimm(int rs,int imm) +{ + u_int armval; + assem_debug("tst %s,#%d\n",regname[rs],imm); + genimm_checked(imm,&armval); + output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); +} + +static void emit_testeqimm(int rs,int imm) +{ + u_int armval; + assem_debug("tsteq %s,$%d\n",regname[rs],imm); + genimm_checked(imm,&armval); + output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); +} + +static void emit_not(int rs,int rt) +{ + assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_mvnmi(int rs,int rt) +{ + assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); + output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_and(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_or(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_or_and_set_flags(int rs1,int rs2,int rt) +{ + assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(imm<32); + assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); +} + +static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(imm<32); + assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); + output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); +} + +static void emit_xor(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_addimm(u_int rs,int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + if(imm!=0) { + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + #ifdef HAVE_ARMV7 + }else if(rt!=rs&&(u_int)imm<65536) { + emit_movw(imm&0x0000ffff,rt); + emit_add(rs,rt,rt); + }else if(rt!=rs&&(u_int)-imm<65536) { + emit_movw(-imm&0x0000ffff,rt); + emit_sub(rs,rt,rt); + #endif + }else if((u_int)-imm<65536) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else { + do { + int shift = (ffs(imm) - 1) & ~1; + int imm8 = imm & (0xff << shift); + genimm_checked(imm8,&armval); + assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + rs = rt; + imm &= ~imm8; + } + while (imm != 0); + } + } + else if(rs!=rt) emit_mov(rs,rt); +} + +static void emit_addimm_and_set_flags(int imm,int rt) +{ + assert(imm>-65536&&imm<65536); + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); + output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); + output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); + }else if(imm<0) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); + output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else{ + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); + output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_addimm_no_flags(u_int imm,u_int rt) +{ + emit_addimm(rt,imm,rt); +} + +static void emit_addnop(u_int r) +{ + assert(r<16); + assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); + output_w32(0xe2800000|rd_rn_rm(r,r,0)); +} + +static void emit_adcimm(u_int rs,int imm,u_int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_rscimm(int rs,int imm,u_int rt) +{ + assert(0); + u_int armval; + genimm_checked(imm,&armval); + assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) +{ + // TODO: if(genimm(imm,&armval)) ... + // else + emit_movimm(imm,HOST_TEMPREG); + emit_adds(HOST_TEMPREG,rsl,rtl); + emit_adcimm(rsh,0,rth); +} + +static void emit_andimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + emit_zeroreg(rt); + }else if(genimm(imm,&armval)) { + assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(~imm,&armval)) { + assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval); + }else if(imm==65535) { + #ifndef HAVE_ARMV6 + assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]); + output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF); + assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]); + output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF); + #else + assem_debug("uxth %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs)); + #endif + }else{ + assert(imm>0&&imm<65535); + #ifndef HAVE_ARMV7 + assem_debug("mov r14,#%d\n",imm&0xFF00); + output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8)); + assem_debug("add r14,r14,#%d\n",imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0)); + #else + emit_movw(imm,HOST_TEMPREG); + #endif + assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]); + output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG)); + } +} + +static void emit_orimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + if(rs!=rt) emit_mov(rs,rt); + }else if(genimm(imm,&armval)) { + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval); + }else{ + assert(imm>0&&imm<65536); + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); + output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_xorimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + if(rs!=rt) emit_mov(rs,rt); + }else if(genimm(imm,&armval)) { + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval); + }else{ + assert(imm>0&&imm<65536); + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); + output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_shlimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static void emit_lsls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static unused void emit_lslpls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static void emit_shrimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_sarimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); +} + +static void emit_rorimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); +} + +static void emit_shldimm(int rs,int rs2,u_int imm,int rt) +{ + assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); + assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); + output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); +} + +static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) +{ + assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7)); + assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); +} + +static void emit_signextend16(int rs,int rt) +{ + #ifndef HAVE_ARMV6 + emit_shlimm(rs,16,rt); + emit_sarimm(rt,16,rt); + #else + assem_debug("sxth %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs)); + #endif +} + +static void emit_signextend8(int rs,int rt) +{ + #ifndef HAVE_ARMV6 + emit_shlimm(rs,24,rt); + emit_sarimm(rt,24,rt); + #else + assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); + #endif +} + +static void emit_shl(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + //if(imm==1) ... + assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); +} + +static void emit_shr(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); +} + +static void emit_sar(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); +} + +static void emit_orrshl(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); +} + +static void emit_orrshr(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); +} + +static void emit_cmpimm(int rs,int imm) +{ + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("cmp %s,#%d\n",regname[rs],imm); + output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("cmn %s,#%d\n",regname[rs],imm); + output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); + }else if(imm>0) { + assert(imm<65536); + emit_movimm(imm,HOST_TEMPREG); + assem_debug("cmp %s,r14\n",regname[rs]); + output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG)); + }else{ + assert(imm>-65536); + emit_movimm(-imm,HOST_TEMPREG); + assem_debug("cmn %s,r14\n",regname[rs]); + output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG)); + } +} + +static void emit_cmovne_imm(int imm,int rt) +{ + assem_debug("movne %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovl_imm(int imm,int rt) +{ + assem_debug("movlt %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovb_imm(int imm,int rt) +{ + assem_debug("movcc %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovs_imm(int imm,int rt) +{ + assem_debug("movmi %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmove_reg(int rs,int rt) +{ + assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); + output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovne_reg(int rs,int rt) +{ + assem_debug("movne %s,%s\n",regname[rt],regname[rs]); + output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovl_reg(int rs,int rt) +{ + assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); + output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovs_reg(int rs,int rt) +{ + assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); + output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_slti32(int rs,int imm,int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_sltiu32(int rs,int imm,int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +static void emit_slti64_32(int rsh,int rsl,int imm,int rt) +{ + assert(rsh!=rt); + emit_slti32(rsl,imm,rt); + if(imm>=0) + { + emit_test(rsh,rsh); + emit_cmovne_imm(0,rt); + emit_cmovs_imm(1,rt); + } + else + { + emit_cmpimm(rsh,-1); + emit_cmovne_imm(0,rt); + emit_cmovl_imm(1,rt); + } +} + +static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) +{ + assert(rsh!=rt); + emit_sltiu32(rsl,imm,rt); + if(imm>=0) + { + emit_test(rsh,rsh); + emit_cmovne_imm(0,rt); + } + else + { + emit_cmpimm(rsh,-1); + emit_cmovne_imm(1,rt); + } +} + +static void emit_cmp(int rs,int rt) +{ + assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); +} + +static void emit_set_gz32(int rs, int rt) +{ + //assem_debug("set_gz32\n"); + emit_cmpimm(rs,1); + emit_movimm(1,rt); + emit_cmovl_imm(0,rt); +} + +static void emit_set_nz32(int rs, int rt) +{ + //assem_debug("set_nz32\n"); + if(rs!=rt) emit_movs(rs,rt); + else emit_test(rs,rs); + emit_cmovne_imm(1,rt); +} + +static void emit_set_gz64_32(int rsh, int rsl, int rt) +{ + //assem_debug("set_gz64\n"); + emit_set_gz32(rsl,rt); + emit_test(rsh,rsh); + emit_cmovne_imm(1,rt); + emit_cmovs_imm(0,rt); +} + +static void emit_set_nz64_32(int rsh, int rsl, int rt) +{ + //assem_debug("set_nz64\n"); + emit_or_and_set_flags(rsh,rsl,rt); + emit_cmovne_imm(1,rt); +} + +static void emit_set_if_less32(int rs1, int rs2, int rt) +{ + //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_set_if_carry32(int rs1, int rs2, int rt) +{ + //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) +{ + //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); + assert(u1!=rt); + assert(u2!=rt); + emit_cmp(l1,l2); + emit_movimm(0,rt); + emit_sbcs(u1,u2,HOST_TEMPREG); + emit_cmovl_imm(1,rt); +} + +static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) +{ + //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); + assert(u1!=rt); + assert(u2!=rt); + emit_cmp(l1,l2); + emit_movimm(0,rt); + emit_sbcs(u1,u2,HOST_TEMPREG); + emit_cmovb_imm(1,rt); +} + +static void emit_call(int a) +{ + assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); + u_int offset=genjmp(a); + output_w32(0xeb000000|offset); +} + +static void emit_jmp(int a) +{ + assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); + u_int offset=genjmp(a); + output_w32(0xea000000|offset); +} + +static void emit_jne(int a) +{ + assem_debug("bne %x\n",a); + u_int offset=genjmp(a); + output_w32(0x1a000000|offset); +} + +static void emit_jeq(int a) +{ + assem_debug("beq %x\n",a); + u_int offset=genjmp(a); + output_w32(0x0a000000|offset); +} + +static void emit_js(int a) +{ + assem_debug("bmi %x\n",a); + u_int offset=genjmp(a); + output_w32(0x4a000000|offset); +} + +static void emit_jns(int a) +{ + assem_debug("bpl %x\n",a); + u_int offset=genjmp(a); + output_w32(0x5a000000|offset); +} + +static void emit_jl(int a) +{ + assem_debug("blt %x\n",a); + u_int offset=genjmp(a); + output_w32(0xba000000|offset); +} + +static void emit_jge(int a) +{ + assem_debug("bge %x\n",a); + u_int offset=genjmp(a); + output_w32(0xaa000000|offset); +} + +static void emit_jno(int a) +{ + assem_debug("bvc %x\n",a); + u_int offset=genjmp(a); + output_w32(0x7a000000|offset); +} + +static void emit_jc(int a) +{ + assem_debug("bcs %x\n",a); + u_int offset=genjmp(a); + output_w32(0x2a000000|offset); +} + +static void emit_jcc(int a) +{ + assem_debug("bcc %x\n",a); + u_int offset=genjmp(a); + output_w32(0x3a000000|offset); +} + +static void emit_callreg(u_int r) +{ + assert(r<15); + assem_debug("blx %s\n",regname[r]); + output_w32(0xe12fff30|r); +} + +static void emit_jmpreg(u_int r) +{ + assem_debug("mov pc,%s\n",regname[r]); + output_w32(0xe1a00000|rd_rn_rm(15,0,r)); +} + +static void emit_readword_indexed(int offset, int rs, int rt) +{ + assert(offset>-4096&&offset<4096); + assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) +{ + assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_readword_indexed(addr, rs, rt); + else { + assert(addr==0); + emit_readword_dualindexedx4(rs, map, rt); + } +} + +static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) +{ + if(map<0) { + if(rh>=0) emit_readword_indexed(addr, rs, rh); + emit_readword_indexed(addr+4, rs, rl); + }else{ + assert(rh!=rs); + if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh); + emit_addimm(map,1,map); + emit_readword_indexed_tlb(addr, rs, map, rl); + } +} + +static void emit_movsbl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_movsbl_indexed(addr, rs, rt); + else { + if(addr==0) { + emit_shlimm(map,2,map); + assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]); + output_w32(0xe19000d0|rd_rn_rm(rt,rs,map)); + }else{ + assert(addr>-256&&addr<256); + assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]); + output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7)); + emit_movsbl_indexed(addr, rt, rt); + } + } +} + +static void emit_movswl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_movzbl_indexed(int offset, int rs, int rt) +{ + assert(offset>-4096&&offset<4096); + assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) +{ + assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_movzbl_indexed(addr, rs, rt); + else { + if(addr==0) { + emit_movzbl_dualindexedx4(rs, map, rt); + }else{ + emit_addimm(rs,addr,rt); + emit_movzbl_dualindexedx4(rt, map, rt); + } + } +} + +static void emit_movzwl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_ldrd(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_readword(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldr %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_movsbl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_movswl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_movzbl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_movzwl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static void emit_writeword_indexed(int rt, int offset, int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) +{ + assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) +{ + if(map<0) emit_writeword_indexed(rt, addr, rs); + else { + assert(addr==0); + emit_writeword_dualindexedx4(rt, rs, map); + } +} + +static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) +{ + if(map<0) { + if(rh>=0) emit_writeword_indexed(rh, addr, rs); + emit_writeword_indexed(rl, addr+4, rs); + }else{ + assert(rh>=0); + if(temp!=rs) emit_addimm(map,1,temp); + emit_writeword_indexed_tlb(rh, addr, rs, map, temp); + if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp); + else { + emit_addimm(rs,4,rs); + emit_writeword_indexed_tlb(rl, addr, rs, map, temp); + } + } +} + +static void emit_writehword_indexed(int rt, int offset, int rs) +{ + assert(offset>-256&&offset<256); + assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_writebyte_indexed(int rt, int offset, int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) +{ + assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) +{ + if(map<0) emit_writebyte_indexed(rt, addr, rs); + else { + if(addr==0) { + emit_writebyte_dualindexedx4(rt, rs, map); + }else{ + emit_addimm(rs,addr,temp); + emit_writebyte_dualindexedx4(rt, temp, map); + } + } +} + +static void emit_strcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_writeword(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("str %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_writehword(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("strh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_writebyte(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("strb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); +} + +static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_clz(int rs,int rt) +{ + assem_debug("clz %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); +} + +static void emit_subcs(int rs1,int rs2,int rt) +{ + assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_shrcc_imm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_shrne_imm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_negmi(int rs, int rt) +{ + assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0x42600000|rd_rn_rm(rt,rs,0)); +} + +static void emit_negsmi(int rs, int rt) +{ + assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0x42700000|rd_rn_rm(rt,rs,0)); +} + +static void emit_orreq(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_orrne(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_teq(int rs, int rt) +{ + assem_debug("teq %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); +} + +static void emit_rsbimm(int rs, int imm, int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval); +} + +// Load 2 immediates optimizing for small code size +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +{ + emit_movimm(imm1,rt1); + u_int armval; + if(genimm(imm2-imm1,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1); + output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval); + }else if(genimm(imm1-imm2,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2); + output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval); + } + else emit_movimm(imm2,rt2); +} + +// Conditionally select one of two immediates, optimizing for small code size +// This will only be called if HAVE_CMOV_IMM is defined +static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) +{ + u_int armval; + if(genimm(imm2-imm1,&armval)) { + emit_movimm(imm1,rt); + assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1); + output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval); + }else if(genimm(imm1-imm2,&armval)) { + emit_movimm(imm1,rt); + assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2); + output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval); + } + else { + #ifndef HAVE_ARMV7 + emit_movimm(imm1,rt); + add_literal((int)out,imm2); + assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2); + output_w32(0x15900000|rd_rn_rm(rt,15,0)); + #else + emit_movw(imm1&0x0000FFFF,rt); + if((imm1&0xFFFF)!=(imm2&0xFFFF)) { + assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF); + output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000)); + } + emit_movt(imm1&0xFFFF0000,rt); + if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) { + assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000); + output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000)); + } + #endif + } +} + +// special case for checking invalid_code +static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +{ + assert(imm<128&&imm>=0); + assert(r>=0&&r<16); + assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); + output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); + emit_cmpimm(HOST_TEMPREG,imm); +} + +static void emit_callne(int a) +{ + assem_debug("blne %x\n",a); + u_int offset=genjmp(a); + output_w32(0x1b000000|offset); +} + +// Used to preload hash table entries +static unused void emit_prefetchreg(int r) +{ + assem_debug("pld %s\n",regname[r]); + output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); +} + +// Special case for mini_ht +static void emit_ldreq_indexed(int rs, u_int offset, int rt) +{ + assert(offset<4096); + assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); + output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); +} + +static unused void emit_bicne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_biccs_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_bicvc_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_bichi_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_orrvs_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_orrne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_andne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_addpl_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_jno_unlikely(int a) +{ + //emit_jno(a); + assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); + output_w32(0x72800000|rd_rn_rm(15,15,0)); +} + +static void save_regs_all(u_int reglist) +{ + int i; + if(!reglist) return; + assem_debug("stmia fp,{"); + for(i=0;i<16;i++) + if(reglist&(1<=BASE_ADDR&&addr<(BASE_ADDR+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); +//DEBUG > +#ifdef DEBUG_CYCLE_COUNT + emit_readword((int)&last_count,ECX); + emit_add(HOST_CCREG,ECX,HOST_CCREG); + emit_readword((int)&next_interupt,ECX); + emit_writeword(HOST_CCREG,(int)&Count); + emit_sub(HOST_CCREG,ECX,HOST_CCREG); + emit_writeword(ECX,(int)&last_count); +#endif +//DEBUG < + emit_jmp(linker); +} + +static void emit_extjump(int addr, int target) +{ + emit_extjump2(addr, target, (int)dyna_linker); +} + +static void emit_extjump_ds(int addr, int target) +{ + emit_extjump2(addr, target, (int)dyna_linker_ds); +} + +// put rt_val into rt, potentially making use of rs with value rs_val +static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) +{ + u_int armval; + int diff; + if(genimm(rt_val,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + return; + } + if(genimm(~rt_val,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + return; + } + diff=rt_val-rs_val; + if(genimm(diff,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + return; + }else if(genimm(-diff,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + return; + } + emit_movimm(rt_val,rt); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ + u_int xs; + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; +} + +// trashes r2 +static void pass_args(int a0, int a1) +{ + if(a0==1&&a1==0) { + // must swap + emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); + } + else if(a0!=0&&a1==0) { + emit_mov(a1,1); + if (a0>=0) emit_mov(a0,0); + } + else { + if(a0>=0&&a0!=0) emit_mov(a0,0); + if(a1>=0&&a1!=1) emit_mov(a1,1); + } +} + +static void mov_loadtype_adj(int type,int rs,int rt) +{ + switch(type) { + case LOADB_STUB: emit_signextend8(rs,rt); break; + case LOADBU_STUB: emit_andimm(rs,0xff,rt); break; + case LOADH_STUB: emit_signextend16(rs,rt); break; + case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break; + case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break; + default: assert(0); + } +} + +#include "../backends/psx/pcsxmem.h" +#include "../backends/psx/pcsxmem_inline.c" + +static void do_readstub(int n) +{ + assem_debug("do_readstub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + int type=stubs[n][0]; + int i=stubs[n][3]; + int rs=stubs[n][4]; + struct regstat *i_regs=(struct regstat *)stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int rt; + if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { + rt=get_reg(i_regmap,FTEMP); + }else{ + rt=get_reg(i_regmap,rt1[i]); + } + assert(rs>=0); + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; + reglist|=(1<=0&&rt1[i]!=0) + reglist&=~(1<=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; + case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; + case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; + case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; + case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; + } + } + if(regs_saved) { + restore_jump=(int)out; + emit_jcc(0); // jump to reg restore + } + else + emit_jcc(stubs[n][2]); // return address + + if(!regs_saved) + save_regs(reglist); + int handler=0; + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + assert(handler!=0); + pass_args(rs,temp2); + int cc=get_reg(i_regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); + emit_call(handler); + if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + mov_loadtype_adj(type,0,rt); + } + if(restore_jump) + set_jump_target(restore_jump,(int)out); + restore_regs(reglist); + emit_jmp(stubs[n][2]); // return address +} + +// return memhandler, or get directly accessable address and return 0 +static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) +{ + u_int l1,l2=0; + l1=((u_int *)table)[addr>>12]; + if((l1&(1<<31))==0) { + u_int v=l1<<1; + *addr_host=v+addr; + return 0; + } + else { + l1<<=1; + if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) + l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; + else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) + l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; + else + l2=((u_int *)l1)[(addr&0xfff)/4]; + if((l2&(1<<31))==0) { + u_int v=l2<<1; + *addr_host=v+(addr&0xfff); + return 0; + } + return l2<<1; + } +} + +static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +{ + int rs=get_reg(regmap,target); + int rt=get_reg(regmap,target); + if(rs<0) rs=get_reg(regmap,-1); + assert(rs>=0); + u_int handler,host_addr=0,is_dynamic,far_call=0; + int cc=get_reg(regmap,CCREG); + if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + return; + handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); + if (handler==0) { + if(rt<0||rt1[i]==0) + return; + if(addr!=host_addr) + emit_movimm_from(addr,rs,host_addr,rs); + switch(type) { + case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; + case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + default: assert(0); + } + return; + } + is_dynamic=pcsxmem_is_handler_dynamic(addr); + if(is_dynamic) { + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + } + + // call a memhandler + if(rt>=0&&rt1[i]!=0) + reglist&=~(1<=33554432) { + // unreachable memhandler, a plugin func perhaps + emit_movimm(handler,12); + far_call=1; + } + if(cc<0) + emit_loadreg(CCREG,2); + if(is_dynamic) { + emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + } + else { + emit_readword((int)&last_count,3); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_add(2,3,2); + emit_writeword(2,(int)&Count); + } + + if(far_call) + emit_callreg(12); + else + emit_call(handler); + + if(rt>=0&&rt1[i]!=0) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + default: assert(0); + } + } + restore_regs(reglist); +} + +static void do_writestub(int n) +{ + assem_debug("do_writestub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + int type=stubs[n][0]; + int i=stubs[n][3]; + int rs=stubs[n][4]; + struct regstat *i_regs=(struct regstat *)stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int rt,r; + if(itype[i]==C1LS||itype[i]==C2LS) { + rt=get_reg(i_regmap,r=FTEMP); + }else{ + rt=get_reg(i_regmap,r=rs2[i]); + } + assert(rs>=0); + assert(rt>=0); + int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; + int reglist2=reglist|(1<=0); + assert(rt>=0); + u_int handler,host_addr=0; + handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); + if (handler==0) { + if(addr!=host_addr) + emit_movimm_from(addr,rs,host_addr,rs); + switch(type) { + case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; + case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; + case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; + default: assert(0); + } + return; + } + + // call a memhandler + save_regs(reglist); + pass_args(rs,rt); + int cc=get_reg(regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_movimm(handler,3); + // returns new cycle_count + emit_call((int)jump_handler_write_h); + emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); + if(cc<0) + emit_storereg(CCREG,2); + restore_regs(reglist); +} + +static void do_unalignedwritestub(int n) +{ + assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + + int i=stubs[n][3]; + struct regstat *i_regs=(struct regstat *)stubs[n][4]; + int addr=stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int temp2=get_reg(i_regmap,FTEMP); + int rt; + rt=get_reg(i_regmap,rs2[i]); + assert(rt>=0); + assert(addr>=0); + assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented + reglist|=(1<regmap_entry,i_regs->was32,i_regs->wasdirty); + if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); + emit_movimm(start+(i-ds)*4,EAX); // Get PC + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); +} + +/* Special assem */ + +static void shift_assemble_arm(int i,struct regstat *i_regs) +{ + if(rt1[i]) { + if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV + { + signed char s,t,shift; + t=get_reg(i_regs->regmap,rt1[i]); + s=get_reg(i_regs->regmap,rs1[i]); + shift=get_reg(i_regs->regmap,rs2[i]); + if(t>=0){ + if(rs1[i]==0) + { + emit_zeroreg(t); + } + else if(rs2[i]==0) + { + assert(s>=0); + if(s!=t) emit_mov(s,t); + } + else + { + emit_andimm(shift,31,HOST_TEMPREG); + if(opcode2[i]==4) // SLLV + { + emit_shl(s,HOST_TEMPREG,t); + } + if(opcode2[i]==6) // SRLV + { + emit_shr(s,HOST_TEMPREG,t); + } + if(opcode2[i]==7) // SRAV + { + emit_sar(s,HOST_TEMPREG,t); + } + } + } + } else { // DSLLV/DSRLV/DSRAV + signed char sh,sl,th,tl,shift; + th=get_reg(i_regs->regmap,rt1[i]|64); + tl=get_reg(i_regs->regmap,rt1[i]); + sh=get_reg(i_regs->regmap,rs1[i]|64); + sl=get_reg(i_regs->regmap,rs1[i]); + shift=get_reg(i_regs->regmap,rs2[i]); + if(tl>=0){ + if(rs1[i]==0) + { + emit_zeroreg(tl); + if(th>=0) emit_zeroreg(th); + } + else if(rs2[i]==0) + { + assert(sl>=0); + if(sl!=tl) emit_mov(sl,tl); + if(th>=0&&sh!=th) emit_mov(sh,th); + } + else + { + // FIXME: What if shift==tl ? + assert(shift!=tl); + int temp=get_reg(i_regs->regmap,-1); + int real_th=th; + if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register + assert(sl>=0); + assert(sh>=0); + emit_andimm(shift,31,HOST_TEMPREG); + if(opcode2[i]==0x14) // DSLLV + { + if(th>=0) emit_shl(sh,HOST_TEMPREG,th); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + emit_orrshr(sl,HOST_TEMPREG,th); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_shl(sl,HOST_TEMPREG,tl); + if(th>=0) emit_cmovne_reg(tl,th); + emit_cmovne_imm(0,tl); + } + if(opcode2[i]==0x16) // DSRLV + { + assert(th>=0); + emit_shr(sl,HOST_TEMPREG,tl); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + emit_orrshl(sh,HOST_TEMPREG,tl); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_shr(sh,HOST_TEMPREG,th); + emit_cmovne_reg(th,tl); + if(real_th>=0) emit_cmovne_imm(0,th); + } + if(opcode2[i]==0x17) // DSRAV + { + assert(th>=0); + emit_shr(sl,HOST_TEMPREG,tl); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + if(real_th>=0) { + assert(temp>=0); + emit_sarimm(th,31,temp); + } + emit_orrshl(sh,HOST_TEMPREG,tl); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_sar(sh,HOST_TEMPREG,th); + emit_cmovne_reg(th,tl); + if(real_th>=0) emit_cmovne_reg(temp,th); + } + } + } + } + } +} + +static void speculate_mov(int rs,int rt) +{ + if(rt!=0) { + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); + else { + smrv_strong_next&=~(1<=0) { + if(get_final_value(hr,i,&value)) + smrv[rt1[i]]=value; + else smrv[rt1[i]]=constmap[i][hr]; + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + } + break; + case LOAD: + if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { + // special case for BIOS + smrv[rt1[i]]=0xa0000000; + smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); +#endif +} + +enum { + MTYPE_8000 = 0, + MTYPE_8020, + MTYPE_0000, + MTYPE_A000, + MTYPE_1F80, +}; + +static int get_ptr_mem_type(u_int a) +{ + if(a < 0x00200000) { + if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) + // return wrong, must use memhandler for BIOS self-test to pass + // 007 does similar stuff from a00 mirror, weird stuff + return MTYPE_8000; + return MTYPE_0000; + } + if(0x1f800000 <= a && a < 0x1f801000) + return MTYPE_1F80; + if(0x80200000 <= a && a < 0x80800000) + return MTYPE_8020; + if(0xa0000000 <= a && a < 0xa0200000) + return MTYPE_A000; + return MTYPE_8000; +} + +static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +{ + int jaddr=0,type=0; + int mr=rs1[i]; + if(((smrv_strong|smrv_weak)>>mr)&1) { + type=get_ptr_mem_type(smrv[mr]); + //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + } + else { + // use the mirror we are running on + type=get_ptr_mem_type(start); + //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); + } + + if(type==MTYPE_8020) { // RAM 80200000+ mirror + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_0000) { // RAM 0 mirror + emit_orimm(addr,0x80000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_A000) { // RAM A mirror + emit_andimm(addr,~0x20000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_1F80) { // scratchpad + if (psxH == (void *)0x1f800000) { + emit_addimm(addr,-0x1f800000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,0x1000); + jaddr=(int)out; + emit_jc(0); + } + else { + // do usual RAM check, jump will go to the right handler + type=0; + } + } + + if(type==0) + { + emit_cmpimm(addr,RAM_SIZE); + jaddr=(int)out; + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + // Hint to branch predictor that the branch is unlikely to be taken + if(rs1[i]>=28) + emit_jno_unlikely(0); + else + #endif + emit_jno(0); + if(ram_offset!=0) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + } + } + + return jaddr; +} + +#define shift_assemble shift_assemble_arm + +static void loadlr_assemble_arm(int i,struct regstat *i_regs) +{ + int s,th,tl,temp,temp2,addr,map=-1; + int offset; + int jaddr=0; + int memtarget=0,c=0; + int fastload_reg_override=0; + u_int hr,reglist=0; + th=get_reg(i_regs->regmap,rt1[i]|64); + tl=get_reg(i_regs->regmap,rt1[i]); + s=get_reg(i_regs->regmap,rs1[i]); + temp=get_reg(i_regs->regmap,-1); + temp2=get_reg(i_regs->regmap,FTEMP); + addr=get_reg(i_regs->regmap,AGEN1+(i&1)); + assert(addr<0); + offset=imm[i]; + for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { + c=(i_regs->wasconst>>s)&1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + } + } + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + emit_shlimm(addr,3,temp); + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR + }else{ + emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR + } + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + } + else { + if(ram_offset&&memtarget) { + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; + } + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR + }else{ + emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR + } + } + if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR + if(!c||memtarget) { + int a=temp2; + if(fastload_reg_override) a=fastload_reg_override; + //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); + emit_readword_indexed_tlb(0,a,map,temp2); + if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + } + else + inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); + if(rt1[i]) { + assert(tl>=0); + emit_andimm(temp,24,temp); +#ifdef BIG_ENDIAN_MIPS + if (opcode[i]==0x26) // LWR +#else + if (opcode[i]==0x22) // LWL +#endif + emit_xorimm(temp,24,temp); + emit_movimm(-1,HOST_TEMPREG); + if (opcode[i]==0x26) { + emit_shr(temp2,temp,temp2); + emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); + }else{ + emit_shl(temp2,temp,temp2); + emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); + } + emit_or(temp2,tl,tl); + } + //emit_storereg(rt1[i],tl); // DEBUG + } + if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR + // FIXME: little endian, fastload_reg_override + int temp2h=get_reg(i_regs->regmap,FTEMP|64); + if(!c||memtarget) { + //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); + //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); + emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); + if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + } + else + inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); + if(rt1[i]) { + assert(th>=0); + assert(tl>=0); + emit_testimm(temp,32); + emit_andimm(temp,24,temp); + if (opcode[i]==0x1A) { // LDL + emit_rsbimm(temp,32,HOST_TEMPREG); + emit_shl(temp2h,temp,temp2h); + emit_orrshr(temp2,HOST_TEMPREG,temp2h); + emit_movimm(-1,HOST_TEMPREG); + emit_shl(temp2,temp,temp2); + emit_cmove_reg(temp2h,th); + emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl); + emit_bicne_lsl(th,HOST_TEMPREG,temp,th); + emit_orreq(temp2,tl,tl); + emit_orrne(temp2,th,th); + } + if (opcode[i]==0x1B) { // LDR + emit_xorimm(temp,24,temp); + emit_rsbimm(temp,32,HOST_TEMPREG); + emit_shr(temp2,temp,temp2); + emit_orrshl(temp2h,HOST_TEMPREG,temp2); + emit_movimm(-1,HOST_TEMPREG); + emit_shr(temp2h,temp,temp2h); + emit_cmovne_reg(temp2,tl); + emit_bicne_lsr(th,HOST_TEMPREG,temp,th); + emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl); + emit_orrne(temp2h,th,th); + emit_orreq(temp2h,tl,tl); + } + } + } +} +#define loadlr_assemble loadlr_assemble_arm + +static void cop0_assemble(int i,struct regstat *i_regs) +{ + if(opcode2[i]==0) // MFC0 + { + signed char t=get_reg(i_regs->regmap,rt1[i]); + char copr=(source[i]>>11)&0x1f; + //assert(t>=0); // Why does this happen? OOT is weird + if(t>=0&&rt1[i]!=0) { + emit_readword((int)®_cop0+copr*4,t); + } + } + else if(opcode2[i]==4) // MTC0 + { + signed char s=get_reg(i_regs->regmap,rs1[i]); + char copr=(source[i]>>11)&0x1f; + assert(s>=0); + wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword((int)&last_count,HOST_TEMPREG); + emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc + emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_writeword(HOST_CCREG,(int)&Count); + } + // What a mess. The status register (12) can enable interrupts, + // so needs a special case to handle a pending interrupt. + // The interrupt must be taken immediately, because a subsequent + // instruction might disable interrupts again. + if(copr==12||copr==13) { + if (is_delayslot) { + // burn cycles to cause cc_interrupt, which will + // reschedule next_interupt. Relies on CCREG from above. + assem_debug("MTC0 DS %d\n", copr); + emit_writeword(HOST_CCREG,(int)&last_count); + emit_movimm(0,HOST_CCREG); + emit_storereg(CCREG,HOST_CCREG); + emit_loadreg(rs1[i],1); + emit_movimm(copr,0); + emit_call((int)pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); + return; + } + emit_movimm(start+i*4+4,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,(int)&pcaddr); + emit_movimm(0,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,(int)&pending_exception); + } + //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); + //else + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) + emit_mov(s,1); + emit_movimm(copr,0); + emit_call((int)pcsx_mtc0); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword((int)&Count,HOST_CCREG); + emit_readword((int)&next_interupt,HOST_TEMPREG); + emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,(int)&last_count); + emit_storereg(CCREG,HOST_CCREG); + } + if(copr==12||copr==13) { + assert(!is_delayslot); + emit_readword((int)&pending_exception,14); + emit_test(14,14); + emit_jne((int)&do_interrupt); + } + emit_loadreg(rs1[i],s); + if(get_reg(i_regs->regmap,rs1[i]|64)>=0) + emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); + cop1_usable=0; + } + else + { + assert(opcode2[i]==0x10); + if((source[i]&0x3f)==0x10) // RFE + { + emit_readword((int)&Status,0); + emit_andimm(0,0x3c,1); + emit_andimm(0,~0xf,0); + emit_orrshr_imm(1,2,0); + emit_writeword(0,(int)&Status); + } + } +} + +static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) +{ + switch (copr) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + emit_readword((int)®_cop2d[copr],tl); + emit_signextend16(tl,tl); + emit_writeword(tl,(int)®_cop2d[copr]); // hmh + break; + case 7: + case 16: + case 17: + case 18: + case 19: + emit_readword((int)®_cop2d[copr],tl); + emit_andimm(tl,0xffff,tl); + emit_writeword(tl,(int)®_cop2d[copr]); + break; + case 15: + emit_readword((int)®_cop2d[14],tl); // SXY2 + emit_writeword(tl,(int)®_cop2d[copr]); + break; + case 28: + case 29: + emit_readword((int)®_cop2d[9],temp); + emit_testimm(temp,0x8000); // do we need this? + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_shrimm(temp,7,tl); + emit_readword((int)®_cop2d[10],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshr_imm(temp,2,tl); + emit_readword((int)®_cop2d[11],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshl_imm(temp,3,tl); + emit_writeword(tl,(int)®_cop2d[copr]); + break; + default: + emit_readword((int)®_cop2d[copr],tl); + break; + } +} + +static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) +{ + switch (copr) { + case 15: + emit_readword((int)®_cop2d[13],temp); // SXY1 + emit_writeword(sl,(int)®_cop2d[copr]); + emit_writeword(temp,(int)®_cop2d[12]); // SXY0 + emit_readword((int)®_cop2d[14],temp); // SXY2 + emit_writeword(sl,(int)®_cop2d[14]); + emit_writeword(temp,(int)®_cop2d[13]); // SXY1 + break; + case 28: + emit_andimm(sl,0x001f,temp); + emit_shlimm(temp,7,temp); + emit_writeword(temp,(int)®_cop2d[9]); + emit_andimm(sl,0x03e0,temp); + emit_shlimm(temp,2,temp); + emit_writeword(temp,(int)®_cop2d[10]); + emit_andimm(sl,0x7c00,temp); + emit_shrimm(temp,3,temp); + emit_writeword(temp,(int)®_cop2d[11]); + emit_writeword(sl,(int)®_cop2d[28]); + break; + case 30: + emit_movs(sl,temp); + emit_mvnmi(temp,temp); +#ifdef HAVE_ARMV5 + emit_clz(temp,temp); +#else + emit_movs(temp,HOST_TEMPREG); + emit_movimm(0,temp); + emit_jeq((int)out+4*4); + emit_addpl_imm(temp,1,temp); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif + emit_writeword(sl,(int)®_cop2d[30]); + emit_writeword(temp,(int)®_cop2d[31]); + break; + case 31: + break; + default: + emit_writeword(sl,(int)®_cop2d[copr]); + break; + } +} + +static void cop2_assemble(int i,struct regstat *i_regs) +{ + u_int copr=(source[i]>>11)&0x1f; + signed char temp=get_reg(i_regs->regmap,-1); + if (opcode2[i]==0) { // MFC2 + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + cop2_get_dreg(copr,tl,temp); + } + else if (opcode2[i]==4) { // MTC2 + signed char sl=get_reg(i_regs->regmap,rs1[i]); + cop2_put_dreg(copr,sl,temp); + } + else if (opcode2[i]==2) // CFC2 + { + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + emit_readword((int)®_cop2c[copr],tl); + } + else if (opcode2[i]==6) // CTC2 + { + signed char sl=get_reg(i_regs->regmap,rs1[i]); + switch(copr) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + emit_signextend16(sl,temp); + break; + case 31: + //value = value & 0x7ffff000; + //if (value & 0x7f87e000) value |= 0x80000000; + emit_shrimm(sl,12,temp); + emit_shlimm(temp,12,temp); + emit_testimm(temp,0x7f000000); + emit_testeqimm(temp,0x00870000); + emit_testeqimm(temp,0x0000e000); + emit_orrne_imm(temp,0x80000000,temp); + break; + default: + temp=sl; + break; + } + emit_writeword(temp,(int)®_cop2c[copr]); + assert(sl>=0); + } +} + +static void c2op_prologue(u_int op,u_int reglist) +{ + save_regs_all(reglist); +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_start); +#endif + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs +} + +static void c2op_epilogue(u_int op,u_int reglist) +{ +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_end); +#endif + restore_regs_all(reglist); +} + +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + +static void c2op_assemble(int i,struct regstat *i_regs) +{ + u_int c2op=source[i]&0x3f; + u_int hr,reglist_full=0,reglist; + int need_flags,need_ir; + for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works + need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; + assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; + switch(c2op) { +#ifndef DRC_DBG + case GTE_MVMVA: { +#ifdef HAVE_ARMV5 + int v = (source[i] >> 15) & 3; + int cv = (source[i] >> 13) & 3; + int mx = (source[i] >> 17) & 3; + reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} + c2op_prologue(c2op,reglist); + /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ + if(v<3) + emit_ldrd(v*8,0,4); + else { + emit_movzwl_indexed(9*4,0,4); // gteIR + emit_movzwl_indexed(10*4,0,6); + emit_movzwl_indexed(11*4,0,5); + emit_orrshl_imm(6,16,4); + } + if(mx<3) + emit_addimm(0,32*4+mx*8*4,6); + else + emit_readword((int)&zeromem_ptr,6); + if(cv<3) + emit_addimm(0,32*4+(cv*8+5)*4,7); + else + emit_readword((int)&zeromem_ptr,7); +#ifdef __ARM_NEON__ + emit_movimm(source[i],1); // opcode + emit_call((int)gteMVMVA_part_neon); + if(need_flags) { + emit_movimm(lm,1); + emit_call((int)gteMACtoIR_flags_neon); + } +#else + if(cv==3&&shift) + emit_call((int)gteMVMVA_part_cv3sh12_arm); + else { + emit_movimm(shift,1); + emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); + } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); +#endif +#else /* if not HAVE_ARMV5 */ + c2op_prologue(c2op,reglist); + emit_movimm(source[i],1); // opcode + emit_writeword(1,(int)&psxRegs.code); + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); +#endif + break; + } + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; +#endif + default: + c2op_prologue(c2op,reglist); +#ifdef DRC_DBG + emit_movimm(source[i],1); // opcode + emit_writeword(1,(int)&psxRegs.code); +#endif + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + break; + } + c2op_epilogue(c2op,reglist); + } +} + +static void cop1_unusable(int i,struct regstat *i_regs) +{ + // XXX: should just just do the exception instead + if(!cop1_usable) { + int jaddr=(int)out; + emit_jmp(0); + add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0); + cop1_usable=1; + } +} + +static void cop1_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void fconv_assemble_arm(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} +#define fconv_assemble fconv_assemble_arm + +static void fcomp_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void float_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void multdiv_assemble_arm(int i,struct regstat *i_regs) +{ + // case 0x18: MULT + // case 0x19: MULTU + // case 0x1A: DIV + // case 0x1B: DIVU + // case 0x1C: DMULT + // case 0x1D: DMULTU + // case 0x1E: DDIV + // case 0x1F: DDIVU + if(rs1[i]&&rs2[i]) + { + if((opcode2[i]&4)==0) // 32-bit + { + if(opcode2[i]==0x18) // MULT + { + signed char m1=get_reg(i_regs->regmap,rs1[i]); + signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char hi=get_reg(i_regs->regmap,HIREG); + signed char lo=get_reg(i_regs->regmap,LOREG); + assert(m1>=0); + assert(m2>=0); + assert(hi>=0); + assert(lo>=0); + emit_smull(m1,m2,hi,lo); + } + if(opcode2[i]==0x19) // MULTU + { + signed char m1=get_reg(i_regs->regmap,rs1[i]); + signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char hi=get_reg(i_regs->regmap,HIREG); + signed char lo=get_reg(i_regs->regmap,LOREG); + assert(m1>=0); + assert(m2>=0); + assert(hi>=0); + assert(lo>=0); + emit_umull(m1,m2,hi,lo); + } + if(opcode2[i]==0x1A) // DIV + { + signed char d1=get_reg(i_regs->regmap,rs1[i]); + signed char d2=get_reg(i_regs->regmap,rs2[i]); + assert(d1>=0); + assert(d2>=0); + signed char quotient=get_reg(i_regs->regmap,LOREG); + signed char remainder=get_reg(i_regs->regmap,HIREG); + assert(quotient>=0); + assert(remainder>=0); + emit_movs(d1,remainder); + emit_movimm(0xffffffff,quotient); + emit_negmi(quotient,quotient); // .. quotient and .. + emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) + emit_movs(d2,HOST_TEMPREG); + emit_jeq((int)out+52); // Division by zero + emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); +#ifdef HAVE_ARMV5 + emit_clz(HOST_TEMPREG,quotient); + emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); +#else + emit_movimm(0,quotient); + emit_addpl_imm(quotient,1,quotient); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif + emit_orimm(quotient,1<<31,quotient); + emit_shr(quotient,quotient,quotient); + emit_cmp(remainder,HOST_TEMPREG); + emit_subcs(remainder,HOST_TEMPREG,remainder); + emit_adcs(quotient,quotient,quotient); + emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jcc((int)out-16); // -4 + emit_teq(d1,d2); + emit_negmi(quotient,quotient); + emit_test(d1,d1); + emit_negmi(remainder,remainder); + } + if(opcode2[i]==0x1B) // DIVU + { + signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend + signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor + assert(d1>=0); + assert(d2>=0); + signed char quotient=get_reg(i_regs->regmap,LOREG); + signed char remainder=get_reg(i_regs->regmap,HIREG); + assert(quotient>=0); + assert(remainder>=0); + emit_mov(d1,remainder); + emit_movimm(0xffffffff,quotient); // div0 case + emit_test(d2,d2); + emit_jeq((int)out+40); // Division by zero +#ifdef HAVE_ARMV5 + emit_clz(d2,HOST_TEMPREG); + emit_movimm(1<<31,quotient); + emit_shl(d2,HOST_TEMPREG,d2); +#else + emit_movimm(0,HOST_TEMPREG); + emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_lslpls_imm(d2,1,d2); + emit_jns((int)out-2*4); + emit_movimm(1<<31,quotient); +#endif + emit_shr(quotient,HOST_TEMPREG,quotient); + emit_cmp(remainder,d2); + emit_subcs(remainder,d2,remainder); + emit_adcs(quotient,quotient,quotient); + emit_shrcc_imm(d2,1,d2); + emit_jcc((int)out-16); // -4 + } + } + else // 64-bit + assert(0); + } + else + { + // Multiply by zero is zero. + // MIPS does not have a divide by zero exception. + // The result is undefined, we return zero. + signed char hr=get_reg(i_regs->regmap,HIREG); + signed char lr=get_reg(i_regs->regmap,LOREG); + if(hr>=0) emit_zeroreg(hr); + if(lr>=0) emit_zeroreg(lr); + } +} +#define multdiv_assemble multdiv_assemble_arm + +static void do_preload_rhash(int r) { + // Don't need this for ARM. On x86, this puts the value 0xf8 into the + // register. On ARM the hash can be done with a single instruction (below) +} + +static void do_preload_rhtbl(int ht) { + emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); +} + +static void do_rhash(int rs,int rh) { + emit_andimm(rs,0xf8,rh); +} + +static void do_miniht_load(int ht,int rh) { + assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); + output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); +} + +static void do_miniht_jump(int rs,int rh,int ht) { + emit_cmp(rh,rs); + emit_ldreq_indexed(ht,4,15); + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + emit_mov(rs,7); + emit_jmp(jump_vaddr_reg[7]); + #else + emit_jmp(jump_vaddr_reg[rs]); + #endif +} + +static void do_miniht_insert(u_int return_address,int rt,int temp) { + #ifndef HAVE_ARMV7 + emit_movimm(return_address,rt); // PC into link register + add_to_linker((int)out,return_address,1); + emit_pcreladdr(temp); + emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); + emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + #else + emit_movw(return_address&0x0000FFFF,rt); + add_to_linker((int)out,return_address,1); + emit_pcreladdr(temp); + emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + emit_movt(return_address&0xFFFF0000,rt); + emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); + #endif +} + +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) +{ + //if(dirty_pre==dirty) return; + int hr,reg; + for(hr=0;hr>(reg&63))&1) { + if(reg>0) { + if(((dirty_pre&~dirty)>>hr)&1) { + if(reg>0&®<34) { + emit_storereg(reg,hr); + if( ((is32_pre&~uu)>>reg)&1 ) { + emit_sarimm(hr,31,HOST_TEMPREG); + emit_storereg(reg|64,HOST_TEMPREG); + } + } + else if(reg>=64) { + emit_storereg(reg,hr); + } + } + } + } + } + } +} + + +/* using strd could possibly help but you'd have to allocate registers in pairs +static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) +{ + int hr; + int wrote=-1; + for(hr=HOST_REGS-1;hr>=0;hr--) { + if(hr!=EXCLUDE_REG) { + if(pre[hr]!=entry[hr]) { + if(pre[hr]>=0) { + if((dirty>>hr)&1) { + if(get_reg(entry,pre[hr])<0) { + if(pre[hr]<64) { + if(!((u>>pre[hr])&1)) { + if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) { + if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { + emit_sarimm(hr,31,hr+1); + emit_strdreg(pre[hr],hr); + } + else + emit_storereg(pre[hr],hr); + }else{ + emit_storereg(pre[hr],hr); + if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { + emit_sarimm(hr,31,hr); + emit_storereg(pre[hr]|64,hr); + } + } + } + }else{ + if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { + emit_storereg(pre[hr],hr); + } + } + wrote=hr; + } + } + } + } + } + } + for(hr=0;hr=0) { + int nr; + if((nr=get_reg(entry,pre[hr]))>=0) { + emit_mov(hr,nr); + } + } + } + } + } +} +#define wb_invalidate wb_invalidate_arm +*/ + +static void mark_clear_cache(void *target) +{ + u_long offset = (char *)target - (char *)BASE_ADDR; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((u_long)target & ~4095ul); + start_tcache_write(start, start + 4096); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache() +{ + int i,j; + for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) + { + u_int bitmap=needs_clear_cache[i]; + if(bitmap) { + u_int start,end; + for(j=0;j<32;j++) + { + if(bitmap&(1<> 16) +#else + mov \reg, #(\imm & 0x0000ff) + orr \reg, #(\imm & 0x00ff00) + orr \reg, #(\imm & 0xff0000) +#endif +.endm + +/* r0 = virtual target address */ +/* r1 = instruction to patch */ +.macro dyna_linker_main +#ifndef NO_WRITE_EXEC + load_varadr_ext r3, jump_in + /* get_page */ + lsr r2, r0, #12 + mov r6, #4096 + bic r2, r2, #0xe0000 + sub r6, r6, #1 + cmp r2, #0x1000 + ldr r7, [r1] + biclt r2, #0x0e00 + and r6, r6, r2 + cmp r2, #2048 + add r12, r7, #2 + orrcs r2, r6, #2048 + ldr r5, [r3, r2, lsl #2] + lsl r12, r12, #8 + add r6, r1, r12, asr #6 + mov r8, #0 + /* jump_in lookup */ +1: + movs r4, r5 + beq 2f + ldr r3, [r5] /* ll_entry .vaddr */ + ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ + teq r3, r0 + bne 1b + teq r4, r6 + moveq pc, r4 /* Stale i-cache */ + mov r8, r4 + b 1b /* jump_in may have dupes, continue search */ +2: + tst r8, r8 + beq 3f /* r0 not in jump_in */ + + mov r5, r1 + mov r1, r6 + bl add_link + sub r2, r8, r5 + and r1, r7, #0xff000000 + lsl r2, r2, #6 + sub r1, r1, #2 + add r1, r1, r2, lsr #8 + str r1, [r5] + mov pc, r8 +3: + /* hash_table lookup */ + cmp r2, #2048 + load_varadr_ext r3, jump_dirty + eor r4, r0, r0, lsl #16 + lslcc r2, r0, #9 + load_varadr_ext r6, hash_table + lsr r4, r4, #12 + lsrcc r2, r2, #21 + bic r4, r4, #15 + ldr r5, [r3, r2, lsl #2] + ldr r7, [r6, r4]! + teq r7, r0 + ldreq pc, [r6, #4] + ldr r7, [r6, #8] + teq r7, r0 + ldreq pc, [r6, #12] + /* jump_dirty lookup */ +6: + movs r4, r5 + beq 8f + ldr r3, [r5] + ldr r5, [r4, #12] + teq r3, r0 + bne 6b +7: + ldr r1, [r4, #8] + /* hash_table insert */ + ldr r2, [r6] + ldr r3, [r6, #4] + str r0, [r6] + str r1, [r6, #4] + str r2, [r6, #8] + str r3, [r6, #12] + mov pc, r1 +8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif +.endm + + +FUNCTION(dyna_linker): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + + mov r4, r0 + mov r5, r1 + bl new_recompile_block + tst r0, r0 + mov r0, r4 + mov r1, r5 + beq dyna_linker + /* pagefault */ + mov r1, r0 + mov r2, #8 + .size dyna_linker, .-dyna_linker + +FUNCTION(exec_pagefault): + /* r0 = instruction pointer */ + /* r1 = fault address */ + /* r2 = cause */ + ldr r3, [fp, #LO_reg_cop0+48] /* Status */ + mvn r6, #0xF000000F + ldr r4, [fp, #LO_reg_cop0+16] /* Context */ + bic r6, r6, #0x0F800000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r3, r3, #2 + str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ + bic r4, r4, r6 + str r3, [fp, #LO_reg_cop0+48] /* Status */ + and r5, r6, r1, lsr #9 + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + and r1, r1, r6, lsl #9 + str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ + orr r4, r4, r5 + str r4, [fp, #LO_reg_cop0+16] /* Context */ + mov r0, #0x80000000 + bl get_addr_ht + mov pc, r0 + .size exec_pagefault, .-exec_pagefault + +/* Special dynamic linker for the case where a page fault + may occur in a branch delay slot */ +FUNCTION(dyna_linker_ds): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + + mov r4, r0 + bic r0, r0, #7 + mov r5, r1 + orr r0, r0, #1 + bl new_recompile_block + tst r0, r0 + mov r0, r4 + mov r1, r5 + beq dyna_linker_ds + /* pagefault */ + bic r1, r0, #7 + mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ + sub r0, r1, #4 + b exec_pagefault + .size dyna_linker_ds, .-dyna_linker_ds + + .align 2 + +FUNCTION(jump_vaddr_r0): + eor r2, r0, r0, lsl #16 + b jump_vaddr + .size jump_vaddr_r0, .-jump_vaddr_r0 +FUNCTION(jump_vaddr_r1): + eor r2, r1, r1, lsl #16 + mov r0, r1 + b jump_vaddr + .size jump_vaddr_r1, .-jump_vaddr_r1 +FUNCTION(jump_vaddr_r2): + mov r0, r2 + eor r2, r2, r2, lsl #16 + b jump_vaddr + .size jump_vaddr_r2, .-jump_vaddr_r2 +FUNCTION(jump_vaddr_r3): + eor r2, r3, r3, lsl #16 + mov r0, r3 + b jump_vaddr + .size jump_vaddr_r3, .-jump_vaddr_r3 +FUNCTION(jump_vaddr_r4): + eor r2, r4, r4, lsl #16 + mov r0, r4 + b jump_vaddr + .size jump_vaddr_r4, .-jump_vaddr_r4 +FUNCTION(jump_vaddr_r5): + eor r2, r5, r5, lsl #16 + mov r0, r5 + b jump_vaddr + .size jump_vaddr_r5, .-jump_vaddr_r5 +FUNCTION(jump_vaddr_r6): + eor r2, r6, r6, lsl #16 + mov r0, r6 + b jump_vaddr + .size jump_vaddr_r6, .-jump_vaddr_r6 +FUNCTION(jump_vaddr_r8): + eor r2, r8, r8, lsl #16 + mov r0, r8 + b jump_vaddr + .size jump_vaddr_r8, .-jump_vaddr_r8 +FUNCTION(jump_vaddr_r9): + eor r2, r9, r9, lsl #16 + mov r0, r9 + b jump_vaddr + .size jump_vaddr_r9, .-jump_vaddr_r9 +FUNCTION(jump_vaddr_r10): + eor r2, r10, r10, lsl #16 + mov r0, r10 + b jump_vaddr + .size jump_vaddr_r10, .-jump_vaddr_r10 +FUNCTION(jump_vaddr_r12): + eor r2, r12, r12, lsl #16 + mov r0, r12 + b jump_vaddr + .size jump_vaddr_r12, .-jump_vaddr_r12 +FUNCTION(jump_vaddr_r7): + eor r2, r7, r7, lsl #16 + add r0, r7, #0 + .size jump_vaddr_r7, .-jump_vaddr_r7 +FUNCTION(jump_vaddr): + load_varadr_ext r1, hash_table + mvn r3, #15 + and r2, r3, r2, lsr #12 + ldr r2, [r1, r2]! + teq r2, r0 + ldreq pc, [r1, #4] + ldr r2, [r1, #8] + teq r2, r0 + ldreq pc, [r1, #12] + str r10, [fp, #LO_cycle_count] + bl get_addr + ldr r10, [fp, #LO_cycle_count] + mov pc, r0 + .size jump_vaddr, .-jump_vaddr + + .align 2 + +FUNCTION(verify_code_ds): + str r8, [fp, #LO_branch_target] +FUNCTION(verify_code_vm): +FUNCTION(verify_code): + /* r1 = source */ + /* r2 = target */ + /* r3 = length */ + tst r3, #4 + mov r4, #0 + add r3, r1, r3 + mov r5, #0 + ldrne r4, [r1], #4 + mov r12, #0 + ldrne r5, [r2], #4 + teq r1, r3 + beq .D3 +.D2: + ldr r7, [r1], #4 + eor r9, r4, r5 + ldr r8, [r2], #4 + orrs r9, r9, r12 + bne .D4 + ldr r4, [r1], #4 + eor r12, r7, r8 + ldr r5, [r2], #4 + cmp r1, r3 + bcc .D2 + teq r7, r8 +.D3: + teqeq r4, r5 +.D4: + ldr r8, [fp, #LO_branch_target] + moveq pc, lr +.D5: + bl get_addr + mov pc, r0 + .size verify_code, .-verify_code + .size verify_code_vm, .-verify_code_vm + + .align 2 +FUNCTION(cc_interrupt): + ldr r0, [fp, #LO_last_count] + mov r1, #0 + mov r2, #0x1fc + add r10, r0, r10 + str r1, [fp, #LO_pending_exception] + and r2, r2, r10, lsr #17 + add r3, fp, #LO_restore_candidate + str r10, [fp, #LO_cycle] /* PCSX cycles */ +@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ + ldr r4, [r2, r3] + mov r10, lr + tst r4, r4 + bne .E4 +.E1: + bl gen_interupt + mov lr, r10 + ldr r10, [fp, #LO_cycle] + ldr r0, [fp, #LO_next_interupt] + ldr r1, [fp, #LO_pending_exception] + ldr r2, [fp, #LO_stop] + str r0, [fp, #LO_last_count] + sub r10, r10, r0 + tst r2, r2 + ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + tst r1, r1 + moveq pc, lr +.E2: + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + mov pc, r0 +.E4: + /* Move 'dirty' blocks to the 'clean' list */ + lsl r5, r2, #3 + str r1, [r2, r3] +.E5: + lsrs r4, r4, #1 + mov r0, r5 + add r5, r5, #1 + blcs clean_blocks + tst r5, #31 + bne .E5 + b .E1 + .size cc_interrupt, .-cc_interrupt + + .align 2 +FUNCTION(do_interrupt): + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + add r10, r10, #2 + mov pc, r0 + .size do_interrupt, .-do_interrupt + + .align 2 +FUNCTION(fp_exception): + mov r2, #0x10000000 +.E7: + ldr r1, [fp, #LO_reg_cop0+48] /* Status */ + mov r3, #0x80000000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r1, #2 + add r2, r2, #0x2c + str r1, [fp, #LO_reg_cop0+48] /* Status */ + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + add r0, r3, #0x80 + bl get_addr_ht + mov pc, r0 + .size fp_exception, .-fp_exception + .align 2 +FUNCTION(fp_exception_ds): + mov r2, #0x90000000 /* Set high bit if delay slot */ + b .E7 + .size fp_exception_ds, .-fp_exception_ds + + .align 2 +FUNCTION(jump_syscall): + ldr r1, [fp, #LO_reg_cop0+48] /* Status */ + mov r3, #0x80000000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r1, #2 + mov r2, #0x20 + str r1, [fp, #LO_reg_cop0+48] /* Status */ + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + add r0, r3, #0x80 + bl get_addr_ht + mov pc, r0 + .size jump_syscall, .-jump_syscall + .align 2 + + .align 2 +FUNCTION(jump_syscall_hle): + str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ + ldr r2, [fp, #LO_last_count] + mov r1, #0 /* in delay slot */ + add r2, r2, r10 + mov r0, #0x20 /* cause */ + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bl psxException + + /* note: psxException might do recursive recompiler call from it's HLE code, + * so be ready for this */ +pcsx_return: + ldr r1, [fp, #LO_next_interupt] + ldr r10, [fp, #LO_cycle] + ldr r0, [fp, #LO_pcaddr] + sub r10, r10, r1 + str r1, [fp, #LO_last_count] + bl get_addr_ht + mov pc, r0 + .size jump_syscall_hle, .-jump_syscall_hle + + .align 2 +FUNCTION(jump_hlecall): + ldr r2, [fp, #LO_last_count] + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bx r1 + .size jump_hlecall, .-jump_hlecall + + .align 2 +FUNCTION(jump_intcall): + ldr r2, [fp, #LO_last_count] + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + b execI + .size jump_hlecall, .-jump_hlecall + + .align 2 +FUNCTION(new_dyna_leave): + ldr r0, [fp, #LO_last_count] + add r12, fp, #28 + add r10, r0, r10 + str r10, [fp, #LO_cycle] + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + .size new_dyna_leave, .-new_dyna_leave + + .align 2 +FUNCTION(invalidate_addr_r0): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + b invalidate_addr_call + .size invalidate_addr_r0, .-invalidate_addr_r0 + .align 2 +FUNCTION(invalidate_addr_r1): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r1 + b invalidate_addr_call + .size invalidate_addr_r1, .-invalidate_addr_r1 + .align 2 +FUNCTION(invalidate_addr_r2): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r2 + b invalidate_addr_call + .size invalidate_addr_r2, .-invalidate_addr_r2 + .align 2 +FUNCTION(invalidate_addr_r3): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r3 + b invalidate_addr_call + .size invalidate_addr_r3, .-invalidate_addr_r3 + .align 2 +FUNCTION(invalidate_addr_r4): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r4 + b invalidate_addr_call + .size invalidate_addr_r4, .-invalidate_addr_r4 + .align 2 +FUNCTION(invalidate_addr_r5): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r5 + b invalidate_addr_call + .size invalidate_addr_r5, .-invalidate_addr_r5 + .align 2 +FUNCTION(invalidate_addr_r6): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r6 + b invalidate_addr_call + .size invalidate_addr_r6, .-invalidate_addr_r6 + .align 2 +FUNCTION(invalidate_addr_r7): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r7 + b invalidate_addr_call + .size invalidate_addr_r7, .-invalidate_addr_r7 + .align 2 +FUNCTION(invalidate_addr_r8): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r8 + b invalidate_addr_call + .size invalidate_addr_r8, .-invalidate_addr_r8 + .align 2 +FUNCTION(invalidate_addr_r9): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r9 + b invalidate_addr_call + .size invalidate_addr_r9, .-invalidate_addr_r9 + .align 2 +FUNCTION(invalidate_addr_r10): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r10 + b invalidate_addr_call + .size invalidate_addr_r10, .-invalidate_addr_r10 + .align 2 +FUNCTION(invalidate_addr_r12): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r12 + .size invalidate_addr_r12, .-invalidate_addr_r12 + .align 2 +invalidate_addr_call: + ldr r12, [fp, #LO_inv_code_start] + ldr lr, [fp, #LO_inv_code_end] + cmp r0, r12 + cmpcs lr, r0 + blcc invalidate_addr + ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} + .size invalidate_addr_call, .-invalidate_addr_call + + .align 2 +FUNCTION(new_dyna_start): + /* ip is stored to conform EABI alignment */ + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} + load_varadr fp, dynarec_local + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + ldr r1, [fp, #LO_next_interupt] + ldr r10, [fp, #LO_cycle] + str r1, [fp, #LO_last_count] + sub r10, r10, r1 + mov pc, r0 + .size new_dyna_start, .-new_dyna_start + +/* --------------------------------------- */ + +.align 2 + +.macro pcsx_read_mem readop tab_shift + /* r0 = address, r1 = handler_tab, r2 = cycles */ + lsl r3, r0, #20 + lsr r3, #(20+\tab_shift) + ldr r12, [fp, #LO_last_count] + ldr r1, [r1, r3, lsl #2] + add r2, r2, r12 + lsls r1, #1 +.if \tab_shift == 1 + lsl r3, #1 + \readop r0, [r1, r3] +.else + \readop r0, [r1, r3, lsl #\tab_shift] +.endif + movcc pc, lr + str r2, [fp, #LO_cycle] + bx r1 +.endm + +FUNCTION(jump_handler_read8): + add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_read_mem ldrbcc, 0 + +FUNCTION(jump_handler_read16): + add r1, #0x1000/4*4 @ shift to r16 part + pcsx_read_mem ldrhcc, 1 + +FUNCTION(jump_handler_read32): + pcsx_read_mem ldrcc, 2 + + +.macro pcsx_write_mem wrtop tab_shift + /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ + lsl r12,r0, #20 + lsr r12, #(20+\tab_shift) + ldr r3, [r3, r12, lsl #2] + str r0, [fp, #LO_address] @ some handlers still need it.. + lsls r3, #1 + mov r0, r2 @ cycle return in case of direct store +.if \tab_shift == 1 + lsl r12, #1 + \wrtop r1, [r3, r12] +.else + \wrtop r1, [r3, r12, lsl #\tab_shift] +.endif + movcc pc, lr + ldr r12, [fp, #LO_last_count] + mov r0, r1 + add r2, r2, r12 + push {r2, lr} + str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] + pop {r2, r3} + str r0, [fp, #LO_last_count] + sub r0, r2, r0 + bx r3 +.endm + +FUNCTION(jump_handler_write8): + add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_write_mem strbcc, 0 + +FUNCTION(jump_handler_write16): + add r3, #0x1000/4*4 @ shift to r16 part + pcsx_write_mem strhcc, 1 + +FUNCTION(jump_handler_write32): + pcsx_write_mem strcc, 2 + +FUNCTION(jump_handler_write_h): + /* r0 = address, r1 = data, r2 = cycles, r3 = handler */ + ldr r12, [fp, #LO_last_count] + str r0, [fp, #LO_address] @ some handlers still need it.. + add r2, r2, r12 + mov r0, r1 + push {r2, lr} + str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] + pop {r2, r3} + str r0, [fp, #LO_last_count] + sub r0, r2, r0 + bx r3 + +FUNCTION(jump_handle_swl): + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #LO_mem_wtab] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + mov r0, r2 + tst r3, #2 + beq 101f + tst r3, #1 + beq 2f +3: + str r1, [r3, #-3] + bx lr +2: + lsr r2, r1, #8 + lsr r1, #24 + strh r2, [r3, #-2] + strb r1, [r3] + bx lr +101: + tst r3, #1 + lsrne r1, #16 @ 1 + lsreq r12, r1, #24 @ 0 + strhne r1, [r3, #-1] + strbeq r12, [r3] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +FUNCTION(jump_handle_swr): + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #LO_mem_wtab] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + and r12,r3, #3 + mov r0, r2 + cmp r12,#2 + strbgt r1, [r3] @ 3 + strheq r1, [r3] @ 2 + cmp r12,#1 + strlt r1, [r3] @ 0 + bxne lr + lsr r2, r1, #8 @ 1 + strb r1, [r3] + strh r2, [r3, #1] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +.macro rcntx_read_mode0 num + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart + mov r0, r2, lsl #16 + sub r0, r0, r3, lsl #16 + lsr r0, #16 + bx lr +.endm + +FUNCTION(rcnt0_read_count_m0): + rcntx_read_mode0 0 + +FUNCTION(rcnt1_read_count_m0): + rcntx_read_mode0 1 + +FUNCTION(rcnt2_read_count_m0): + rcntx_read_mode0 2 + +FUNCTION(rcnt0_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*0] @ cycleStart + mov_16 r1, 0x3334 + sub r2, r2, r3 + mul r0, r1, r2 @ /= 5 + lsr r0, #16 + bx lr + +FUNCTION(rcnt1_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*1] + mov_24 r1, 0x1e6cde + sub r2, r2, r3 + umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd + bx lr + +FUNCTION(rcnt2_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*2] + mov r0, r2, lsl #16-3 + sub r0, r0, r3, lsl #16-3 + lsr r0, #16 @ /= 8 + bx lr + +@ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/arm/linkage_offsets.h b/libpcsxcore/new_dynarec/arm/linkage_offsets.h new file mode 100644 index 0000000..f7e1911 --- /dev/null +++ b/libpcsxcore/new_dynarec/arm/linkage_offsets.h @@ -0,0 +1,41 @@ + +#define LO_next_interupt 64 +#define LO_cycle_count (LO_next_interupt + 4) +#define LO_last_count (LO_cycle_count + 4) +#define LO_pending_exception (LO_last_count + 4) +#define LO_stop (LO_pending_exception + 4) +#define LO_invc_ptr (LO_stop + 4) +#define LO_address (LO_invc_ptr + 4) +#define LO_psxRegs (LO_address + 4) +#define LO_reg (LO_psxRegs) +#define LO_lo (LO_reg + 128) +#define LO_hi (LO_lo + 4) +#define LO_reg_cop0 (LO_hi + 4) +#define LO_reg_cop2d (LO_reg_cop0 + 128) +#define LO_reg_cop2c (LO_reg_cop2d + 128) +#define LO_PC (LO_reg_cop2c + 128) +#define LO_pcaddr (LO_PC) +#define LO_code (LO_PC + 4) +#define LO_cycle (LO_code + 4) +#define LO_interrupt (LO_cycle + 4) +#define LO_intCycle (LO_interrupt + 4) +#define LO_psxRegs_end (LO_intCycle + 256) +#define LO_rcnts (LO_psxRegs_end) +#define LO_rcnts_end (LO_rcnts + 7*4*4) +#define LO_mem_rtab (LO_rcnts_end) +#define LO_mem_wtab (LO_mem_rtab + 4) +#define LO_psxH_ptr (LO_mem_wtab + 4) +#define LO_zeromem_ptr (LO_psxH_ptr + 4) +#define LO_inv_code_start (LO_zeromem_ptr + 4) +#define LO_inv_code_end (LO_inv_code_start + 4) +#define LO_branch_target (LO_inv_code_end + 4) +#define LO_scratch_buf_ptr (LO_branch_target + 4) +#define LO_align0 (LO_scratch_buf_ptr + 4) +#define LO_mini_ht (LO_align0 + 12) +#define LO_restore_candidate (LO_mini_ht + 256) +#define LO_dynarec_local_size (LO_restore_candidate + 512) + +#define LO_FCR0 (LO_align0) +#define LO_FCR31 (LO_align0) + +#define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c deleted file mode 100644 index 21640f8..0000000 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ /dev/null @@ -1,4143 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus/PCSX - assem_arm.c * - * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include "../gte.h" -#define FLAGLESS -#include "../gte.h" -#undef FLAGLESS -#include "../gte_arm.h" -#include "../gte_neon.h" -#include "pcnt.h" -#include "arm_features.h" - -#if defined(BASE_ADDR_FIXED) -#elif defined(BASE_ADDR_DYNAMIC) -char *translation_cache; -#else -char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); -#endif - -#ifndef __MACH__ -#define CALLER_SAVE_REGS 0x100f -#else -#define CALLER_SAVE_REGS 0x120f -#endif - -#define unused __attribute__((unused)) - -extern int cycle_count; -extern int last_count; -extern int pcaddr; -extern int pending_exception; -extern int branch_target; -extern uint64_t readmem_dword; -extern void *dynarec_local; -extern u_int mini_ht[32][2]; - -void indirect_jump_indexed(); -void indirect_jump(); -void do_interrupt(); -void jump_vaddr_r0(); -void jump_vaddr_r1(); -void jump_vaddr_r2(); -void jump_vaddr_r3(); -void jump_vaddr_r4(); -void jump_vaddr_r5(); -void jump_vaddr_r6(); -void jump_vaddr_r7(); -void jump_vaddr_r8(); -void jump_vaddr_r9(); -void jump_vaddr_r10(); -void jump_vaddr_r12(); - -const u_int jump_vaddr_reg[16] = { - (int)jump_vaddr_r0, - (int)jump_vaddr_r1, - (int)jump_vaddr_r2, - (int)jump_vaddr_r3, - (int)jump_vaddr_r4, - (int)jump_vaddr_r5, - (int)jump_vaddr_r6, - (int)jump_vaddr_r7, - (int)jump_vaddr_r8, - (int)jump_vaddr_r9, - (int)jump_vaddr_r10, - 0, - (int)jump_vaddr_r12, - 0, - 0, - 0}; - -void invalidate_addr_r0(); -void invalidate_addr_r1(); -void invalidate_addr_r2(); -void invalidate_addr_r3(); -void invalidate_addr_r4(); -void invalidate_addr_r5(); -void invalidate_addr_r6(); -void invalidate_addr_r7(); -void invalidate_addr_r8(); -void invalidate_addr_r9(); -void invalidate_addr_r10(); -void invalidate_addr_r12(); - -const u_int invalidate_addr_reg[16] = { - (int)invalidate_addr_r0, - (int)invalidate_addr_r1, - (int)invalidate_addr_r2, - (int)invalidate_addr_r3, - (int)invalidate_addr_r4, - (int)invalidate_addr_r5, - (int)invalidate_addr_r6, - (int)invalidate_addr_r7, - (int)invalidate_addr_r8, - (int)invalidate_addr_r9, - (int)invalidate_addr_r10, - 0, - (int)invalidate_addr_r12, - 0, - 0, - 0}; - -static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; - -/* Linker */ - -static void set_jump_target(int addr,u_int target) -{ - u_char *ptr=(u_char *)addr; - u_int *ptr2=(u_int *)ptr; - if(ptr[3]==0xe2) { - assert((target-(u_int)ptr2-8)<1024); - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; - //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2); - } - else if(ptr[3]==0x72) { - // generated by emit_jno_unlikely - if((target-(u_int)ptr2-8)<1024) { - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; - } - else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) { - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00; - } - else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } - else { - assert((ptr[3]&0x0e)==0xa); - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } -} - -// This optionally copies the instruction from the target of the branch into -// the space before the branch. Works, but the difference in speed is -// usually insignificant. -#if 0 -static void set_jump_target_fillslot(int addr,u_int target,int copy) -{ - u_char *ptr=(u_char *)addr; - u_int *ptr2=(u_int *)ptr; - assert(!copy||ptr2[-1]==0xe28dd000); - if(ptr[3]==0xe2) { - assert(!copy); - assert((target-(u_int)ptr2-8)<4096); - *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8); - } - else { - assert((ptr[3]&0x0e)==0xa); - u_int target_insn=*(u_int *)target; - if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags - copy=0; - } - if((target_insn&0x0c100000)==0x04100000) { // Load - copy=0; - } - if(target_insn&0x08000000) { - copy=0; - } - if(copy) { - ptr2[-1]=target_insn; - target+=4; - } - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } -} -#endif - -/* Literal pool */ -static void add_literal(int addr,int val) -{ - assert(literalcount>6)+8; -} - -// Find the "clean" entry point from a "dirty" entry point -// by skipping past the call to verify_code -static u_int get_clean_addr(int addr) -{ - int *ptr=(int *)addr; - #ifndef HAVE_ARMV7 - ptr+=4; - #else - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - ptr++; - if((*ptr&0xFF000000)==0xea000000) { - return (int)ptr+((*ptr<<8)>>6)+8; // follow jump - } - return (u_int)ptr; -} - -static int verify_dirty(u_int *ptr) -{ - #ifndef HAVE_ARMV7 - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - //printf("verify_dirty: %x %x %x\n",source,copy,len); - return !memcmp((void *)source,(void *)copy,len); -} - -// This doesn't necessarily find all clean entry points, just -// guarantees that it's not dirty -static int isclean(int addr) -{ - #ifndef HAVE_ARMV7 - u_int *ptr=((u_int *)addr)+4; - #else - u_int *ptr=((u_int *)addr)+6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; - return 1; -} - -// get source that block at addr was compiled from (host pointers) -static void get_bounds(int addr,u_int *start,u_int *end) -{ - u_int *ptr=(u_int *)addr; - #ifndef HAVE_ARMV7 - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - //u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - *start=source; - *end=source+len; -} - -/* Register allocation */ - -// Note: registers are allocated clean (unmodified state) -// if you intend to modify the register, you must call dirty_reg(). -static void alloc_reg(struct regstat *cur,int i,signed char reg) -{ - int r,hr; - int preferred_reg = (reg&7); - if(reg==CCREG) preferred_reg=HOST_CCREG; - if(reg==PTEMP||reg==FTEMP) preferred_reg=12; - - // Don't allocate unused registers - if((cur->u>>reg)&1) return; - - // see if it's already allocated - for(hr=0;hrregmap[hr]==reg) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<uu>>reg)&1) return; - - // see if the upper half is already allocated - for(hr=0;hrregmap[hr]==reg+64) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg|64; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; - } - - // Try to allocate any available register - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) { - if(i==0||((unneeded_reg[i-1]>>r)&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<uu>>(r&63))&1) { - if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hr2) { - if(cur->regmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<2) { - if(cur->regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[n]==reg) { - dirty=(cur->dirty>>n)&1; - cur->regmap[n]=-1; - } - } - - cur->regmap[hr]=reg; - cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<0) - { - if(imm<256) { - *encoded=((i&30)<<7)|imm; - return 1; - } - imm=(imm>>2)|(imm<<30);i-=2; - } - return 0; -} - -static void genimm_checked(u_int imm,u_int *encoded) -{ - u_int ret=genimm(imm,encoded); - assert(ret); - (void)ret; -} - -static u_int genjmp(u_int addr) -{ - int offset=addr-(int)out-8; - if(offset<-33554432||offset>=33554432) { - if (addr>2) { - SysPrintf("genjmp: out of range: %08x\n", offset); - exit(1); - } - return 0; - } - return ((u_int)offset>>2)&0xffffff; -} - -static void emit_mov(int rs,int rt) -{ - assem_debug("mov %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_movs(int rs,int rt) -{ - assem_debug("movs %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_add(int rs1,int rs2,int rt) -{ - assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_adds(int rs1,int rs2,int rt) -{ - assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_adcs(int rs1,int rs2,int rt) -{ - assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_sbc(int rs1,int rs2,int rt) -{ - assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_sbcs(int rs1,int rs2,int rt) -{ - assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_neg(int rs, int rt) -{ - assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); -} - -static void emit_negs(int rs, int rt) -{ - assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); -} - -static void emit_sub(int rs1,int rs2,int rt) -{ - assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_subs(int rs1,int rs2,int rt) -{ - assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_zeroreg(int rt) -{ - assem_debug("mov %s,#0\n",regname[rt]); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); -} - -static void emit_loadlp(u_int imm,u_int rt) -{ - add_literal((int)out,imm); - assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); - output_w32(0xe5900000|rd_rn_rm(rt,15,0)); -} - -static void emit_movw(u_int imm,u_int rt) -{ - assert(imm<65536); - assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); - output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); -} - -static void emit_movt(u_int imm,u_int rt) -{ - assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); - output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); -} - -static void emit_movimm(u_int imm,u_int rt) -{ - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("mov %s,#%d\n",regname[rt],imm); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); - }else if(genimm(~imm,&armval)) { - assem_debug("mvn %s,#%d\n",regname[rt],imm); - output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); - }else if(imm<65536) { - #ifndef HAVE_ARMV7 - assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00); - output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - #else - emit_movw(imm,rt); - #endif - }else{ - #ifndef HAVE_ARMV7 - emit_loadlp(imm,rt); - #else - emit_movw(imm&0x0000FFFF,rt); - emit_movt(imm&0xFFFF0000,rt); - #endif - } -} - -static void emit_pcreladdr(u_int rt) -{ - assem_debug("add %s,pc,#?\n",regname[rt]); - output_w32(0xe2800000|rd_rn_rm(rt,15,0)); -} - -static void emit_loadreg(int r, int hr) -{ - if(r&64) { - SysPrintf("64bit load in 32bit mode!\n"); - assert(0); - return; - } - if((r&63)==0) - emit_zeroreg(hr); - else { - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; - if(r==CSREG) addr=(int)&Status; - if(r==FSREG) addr=(int)&FCR31; - if(r==INVCP) addr=(int)&invc_ptr; - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[hr],offset); - output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); - } -} - -static void emit_storereg(int r, int hr) -{ - if(r&64) { - SysPrintf("64bit store in 32bit mode!\n"); - assert(0); - return; - } - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; - if(r==FSREG) addr=(int)&FCR31; - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[hr],offset); - output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); -} - -static void emit_test(int rs, int rt) -{ - assem_debug("tst %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); -} - -static void emit_testimm(int rs,int imm) -{ - u_int armval; - assem_debug("tst %s,#%d\n",regname[rs],imm); - genimm_checked(imm,&armval); - output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); -} - -static void emit_testeqimm(int rs,int imm) -{ - u_int armval; - assem_debug("tsteq %s,$%d\n",regname[rs],imm); - genimm_checked(imm,&armval); - output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); -} - -static void emit_not(int rs,int rt) -{ - assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_mvnmi(int rs,int rt) -{ - assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); - output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_and(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_or(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_or_and_set_flags(int rs1,int rs2,int rt) -{ - assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(imm<32); - assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); -} - -static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(imm<32); - assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); - output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); -} - -static void emit_xor(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_addimm(u_int rs,int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - if(imm!=0) { - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); - output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); - #ifdef HAVE_ARMV7 - }else if(rt!=rs&&(u_int)imm<65536) { - emit_movw(imm&0x0000ffff,rt); - emit_add(rs,rt,rt); - }else if(rt!=rs&&(u_int)-imm<65536) { - emit_movw(-imm&0x0000ffff,rt); - emit_sub(rs,rt,rt); - #endif - }else if((u_int)-imm<65536) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else { - do { - int shift = (ffs(imm) - 1) & ~1; - int imm8 = imm & (0xff << shift); - genimm_checked(imm8,&armval); - assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - rs = rt; - imm &= ~imm8; - } - while (imm != 0); - } - } - else if(rs!=rt) emit_mov(rs,rt); -} - -static void emit_addimm_and_set_flags(int imm,int rt) -{ - assert(imm>-65536&&imm<65536); - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); - }else if(imm<0) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); - output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else{ - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); - output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_addimm_no_flags(u_int imm,u_int rt) -{ - emit_addimm(rt,imm,rt); -} - -static void emit_addnop(u_int r) -{ - assert(r<16); - assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); - output_w32(0xe2800000|rd_rn_rm(r,r,0)); -} - -static void emit_adcimm(u_int rs,int imm,u_int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_rscimm(int rs,int imm,u_int rt) -{ - assert(0); - u_int armval; - genimm_checked(imm,&armval); - assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) -{ - // TODO: if(genimm(imm,&armval)) ... - // else - emit_movimm(imm,HOST_TEMPREG); - emit_adds(HOST_TEMPREG,rsl,rtl); - emit_adcimm(rsh,0,rth); -} - -static void emit_andimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - emit_zeroreg(rt); - }else if(genimm(imm,&armval)) { - assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval); - }else if(genimm(~imm,&armval)) { - assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval); - }else if(imm==65535) { - #ifndef HAVE_ARMV6 - assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]); - output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF); - assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]); - output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF); - #else - assem_debug("uxth %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs)); - #endif - }else{ - assert(imm>0&&imm<65535); - #ifndef HAVE_ARMV7 - assem_debug("mov r14,#%d\n",imm&0xFF00); - output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8)); - assem_debug("add r14,r14,#%d\n",imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0)); - #else - emit_movw(imm,HOST_TEMPREG); - #endif - assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]); - output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG)); - } -} - -static void emit_orimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - if(rs!=rt) emit_mov(rs,rt); - }else if(genimm(imm,&armval)) { - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval); - }else{ - assert(imm>0&&imm<65536); - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); - output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); - output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_xorimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - if(rs!=rt) emit_mov(rs,rt); - }else if(genimm(imm,&armval)) { - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval); - }else{ - assert(imm>0&&imm<65536); - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); - output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8)); - output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_shlimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static void emit_lsls_imm(int rs,int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static unused void emit_lslpls_imm(int rs,int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static void emit_shrimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_sarimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); -} - -static void emit_rorimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); -} - -static void emit_shldimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - -static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - -static void emit_signextend16(int rs,int rt) -{ - #ifndef HAVE_ARMV6 - emit_shlimm(rs,16,rt); - emit_sarimm(rt,16,rt); - #else - assem_debug("sxth %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs)); - #endif -} - -static void emit_signextend8(int rs,int rt) -{ - #ifndef HAVE_ARMV6 - emit_shlimm(rs,24,rt); - emit_sarimm(rt,24,rt); - #else - assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); - #endif -} - -static void emit_shl(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - //if(imm==1) ... - assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); -} - -static void emit_shr(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); -} - -static void emit_sar(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); -} - -static void emit_orrshl(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); -} - -static void emit_orrshr(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); -} - -static void emit_cmpimm(int rs,int imm) -{ - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("cmp %s,#%d\n",regname[rs],imm); - output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("cmn %s,#%d\n",regname[rs],imm); - output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); - }else if(imm>0) { - assert(imm<65536); - emit_movimm(imm,HOST_TEMPREG); - assem_debug("cmp %s,r14\n",regname[rs]); - output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG)); - }else{ - assert(imm>-65536); - emit_movimm(-imm,HOST_TEMPREG); - assem_debug("cmn %s,r14\n",regname[rs]); - output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG)); - } -} - -static void emit_cmovne_imm(int imm,int rt) -{ - assem_debug("movne %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovl_imm(int imm,int rt) -{ - assem_debug("movlt %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovb_imm(int imm,int rt) -{ - assem_debug("movcc %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovs_imm(int imm,int rt) -{ - assem_debug("movmi %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmove_reg(int rs,int rt) -{ - assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); - output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovne_reg(int rs,int rt) -{ - assem_debug("movne %s,%s\n",regname[rt],regname[rs]); - output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovl_reg(int rs,int rt) -{ - assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); - output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovs_reg(int rs,int rt) -{ - assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); - output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_slti32(int rs,int imm,int rt) -{ - if(rs!=rt) emit_zeroreg(rt); - emit_cmpimm(rs,imm); - if(rs==rt) emit_movimm(0,rt); - emit_cmovl_imm(1,rt); -} - -static void emit_sltiu32(int rs,int imm,int rt) -{ - if(rs!=rt) emit_zeroreg(rt); - emit_cmpimm(rs,imm); - if(rs==rt) emit_movimm(0,rt); - emit_cmovb_imm(1,rt); -} - -static void emit_slti64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_slti32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - emit_cmovs_imm(1,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(0,rt); - emit_cmovl_imm(1,rt); - } -} - -static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_sltiu32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(1,rt); - } -} - -static void emit_cmp(int rs,int rt) -{ - assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); -} - -static void emit_set_gz32(int rs, int rt) -{ - //assem_debug("set_gz32\n"); - emit_cmpimm(rs,1); - emit_movimm(1,rt); - emit_cmovl_imm(0,rt); -} - -static void emit_set_nz32(int rs, int rt) -{ - //assem_debug("set_nz32\n"); - if(rs!=rt) emit_movs(rs,rt); - else emit_test(rs,rs); - emit_cmovne_imm(1,rt); -} - -static void emit_set_gz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_gz64\n"); - emit_set_gz32(rsl,rt); - emit_test(rsh,rsh); - emit_cmovne_imm(1,rt); - emit_cmovs_imm(0,rt); -} - -static void emit_set_nz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_nz64\n"); - emit_or_and_set_flags(rsh,rsl,rt); - emit_cmovne_imm(1,rt); -} - -static void emit_set_if_less32(int rs1, int rs2, int rt) -{ - //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); - if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); - emit_cmp(rs1,rs2); - if(rs1==rt||rs2==rt) emit_movimm(0,rt); - emit_cmovl_imm(1,rt); -} - -static void emit_set_if_carry32(int rs1, int rs2, int rt) -{ - //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); - if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); - emit_cmp(rs1,rs2); - if(rs1==rt||rs2==rt) emit_movimm(0,rt); - emit_cmovb_imm(1,rt); -} - -static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovl_imm(1,rt); -} - -static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovb_imm(1,rt); -} - -static void emit_call(int a) -{ - assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); - u_int offset=genjmp(a); - output_w32(0xeb000000|offset); -} - -static void emit_jmp(int a) -{ - assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); - u_int offset=genjmp(a); - output_w32(0xea000000|offset); -} - -static void emit_jne(int a) -{ - assem_debug("bne %x\n",a); - u_int offset=genjmp(a); - output_w32(0x1a000000|offset); -} - -static void emit_jeq(int a) -{ - assem_debug("beq %x\n",a); - u_int offset=genjmp(a); - output_w32(0x0a000000|offset); -} - -static void emit_js(int a) -{ - assem_debug("bmi %x\n",a); - u_int offset=genjmp(a); - output_w32(0x4a000000|offset); -} - -static void emit_jns(int a) -{ - assem_debug("bpl %x\n",a); - u_int offset=genjmp(a); - output_w32(0x5a000000|offset); -} - -static void emit_jl(int a) -{ - assem_debug("blt %x\n",a); - u_int offset=genjmp(a); - output_w32(0xba000000|offset); -} - -static void emit_jge(int a) -{ - assem_debug("bge %x\n",a); - u_int offset=genjmp(a); - output_w32(0xaa000000|offset); -} - -static void emit_jno(int a) -{ - assem_debug("bvc %x\n",a); - u_int offset=genjmp(a); - output_w32(0x7a000000|offset); -} - -static void emit_jc(int a) -{ - assem_debug("bcs %x\n",a); - u_int offset=genjmp(a); - output_w32(0x2a000000|offset); -} - -static void emit_jcc(int a) -{ - assem_debug("bcc %x\n",a); - u_int offset=genjmp(a); - output_w32(0x3a000000|offset); -} - -static void emit_callreg(u_int r) -{ - assert(r<15); - assem_debug("blx %s\n",regname[r]); - output_w32(0xe12fff30|r); -} - -static void emit_jmpreg(u_int r) -{ - assem_debug("mov pc,%s\n",regname[r]); - output_w32(0xe1a00000|rd_rn_rm(15,0,r)); -} - -static void emit_readword_indexed(int offset, int rs, int rt) -{ - assert(offset>-4096&&offset<4096); - assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) -{ - assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_readword_indexed(addr, rs, rt); - else { - assert(addr==0); - emit_readword_dualindexedx4(rs, map, rt); - } -} - -static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) -{ - if(map<0) { - if(rh>=0) emit_readword_indexed(addr, rs, rh); - emit_readword_indexed(addr+4, rs, rl); - }else{ - assert(rh!=rs); - if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh); - emit_addimm(map,1,map); - emit_readword_indexed_tlb(addr, rs, map, rl); - } -} - -static void emit_movsbl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movsbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_shlimm(map,2,map); - assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe19000d0|rd_rn_rm(rt,rs,map)); - }else{ - assert(addr>-256&&addr<256); - assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7)); - emit_movsbl_indexed(addr, rt, rt); - } - } -} - -static void emit_movswl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_movzbl_indexed(int offset, int rs, int rt) -{ - assert(offset>-4096&&offset<4096); - assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) -{ - assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movzbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_movzbl_dualindexedx4(rs, map, rt); - }else{ - emit_addimm(rs,addr,rt); - emit_movzbl_dualindexedx4(rt, map, rt); - } - } -} - -static void emit_movzwl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_ldrd(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_readword(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_movsbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movswl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movzbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_movzwl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static void emit_writeword_indexed(int rt, int offset, int rs) -{ - assert(offset>-4096&&offset<4096); - assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) -{ - assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writeword_indexed(rt, addr, rs); - else { - assert(addr==0); - emit_writeword_dualindexedx4(rt, rs, map); - } -} - -static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) -{ - if(map<0) { - if(rh>=0) emit_writeword_indexed(rh, addr, rs); - emit_writeword_indexed(rl, addr+4, rs); - }else{ - assert(rh>=0); - if(temp!=rs) emit_addimm(map,1,temp); - emit_writeword_indexed_tlb(rh, addr, rs, map, temp); - if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp); - else { - emit_addimm(rs,4,rs); - emit_writeword_indexed_tlb(rl, addr, rs, map, temp); - } - } -} - -static void emit_writehword_indexed(int rt, int offset, int rs) -{ - assert(offset>-256&&offset<256); - assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_writebyte_indexed(int rt, int offset, int rs) -{ - assert(offset>-4096&&offset<4096); - assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) -{ - assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writebyte_indexed(rt, addr, rs); - else { - if(addr==0) { - emit_writebyte_dualindexedx4(rt, rs, map); - }else{ - emit_addimm(rs,addr,temp); - emit_writebyte_dualindexedx4(rt, temp, map); - } - } -} - -static void emit_strcc_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_strccb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_strcch_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_writeword(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_writehword(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("strh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_writebyte(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("strb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); -} - -static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) -{ - assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); -} - -static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) -{ - assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); -} - -static void emit_clz(int rs,int rt) -{ - assem_debug("clz %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); -} - -static void emit_subcs(int rs1,int rs2,int rt) -{ - assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_shrcc_imm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_shrne_imm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_negmi(int rs, int rt) -{ - assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0x42600000|rd_rn_rm(rt,rs,0)); -} - -static void emit_negsmi(int rs, int rt) -{ - assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0x42700000|rd_rn_rm(rt,rs,0)); -} - -static void emit_orreq(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_orrne(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_teq(int rs, int rt) -{ - assem_debug("teq %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); -} - -static void emit_rsbimm(int rs, int imm, int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval); -} - -// Load 2 immediates optimizing for small code size -static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) -{ - emit_movimm(imm1,rt1); - u_int armval; - if(genimm(imm2-imm1,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1); - output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval); - }else if(genimm(imm1-imm2,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2); - output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval); - } - else emit_movimm(imm2,rt2); -} - -// Conditionally select one of two immediates, optimizing for small code size -// This will only be called if HAVE_CMOV_IMM is defined -static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) -{ - u_int armval; - if(genimm(imm2-imm1,&armval)) { - emit_movimm(imm1,rt); - assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1); - output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(imm1-imm2,&armval)) { - emit_movimm(imm1,rt); - assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2); - output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval); - } - else { - #ifndef HAVE_ARMV7 - emit_movimm(imm1,rt); - add_literal((int)out,imm2); - assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2); - output_w32(0x15900000|rd_rn_rm(rt,15,0)); - #else - emit_movw(imm1&0x0000FFFF,rt); - if((imm1&0xFFFF)!=(imm2&0xFFFF)) { - assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF); - output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000)); - } - emit_movt(imm1&0xFFFF0000,rt); - if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) { - assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000); - output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000)); - } - #endif - } -} - -// special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) -{ - assert(imm<128&&imm>=0); - assert(r>=0&&r<16); - assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); - output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); - emit_cmpimm(HOST_TEMPREG,imm); -} - -static void emit_callne(int a) -{ - assem_debug("blne %x\n",a); - u_int offset=genjmp(a); - output_w32(0x1b000000|offset); -} - -// Used to preload hash table entries -static unused void emit_prefetchreg(int r) -{ - assem_debug("pld %s\n",regname[r]); - output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); -} - -// Special case for mini_ht -static void emit_ldreq_indexed(int rs, u_int offset, int rt) -{ - assert(offset<4096); - assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); - output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); -} - -static unused void emit_bicne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_biccs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bicvc_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bichi_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_orrvs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_orrne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_andne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_addpl_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_jno_unlikely(int a) -{ - //emit_jno(a); - assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); - output_w32(0x72800000|rd_rn_rm(15,15,0)); -} - -static void save_regs_all(u_int reglist) -{ - int i; - if(!reglist) return; - assem_debug("stmia fp,{"); - for(i=0;i<16;i++) - if(reglist&(1<=BASE_ADDR&&addr<(BASE_ADDR+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); -//DEBUG > -#ifdef DEBUG_CYCLE_COUNT - emit_readword((int)&last_count,ECX); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); - emit_writeword(HOST_CCREG,(int)&Count); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); -#endif -//DEBUG < - emit_jmp(linker); -} - -static void emit_extjump(int addr, int target) -{ - emit_extjump2(addr, target, (int)dyna_linker); -} - -static void emit_extjump_ds(int addr, int target) -{ - emit_extjump2(addr, target, (int)dyna_linker_ds); -} - -// put rt_val into rt, potentially making use of rs with value rs_val -static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) -{ - u_int armval; - int diff; - if(genimm(rt_val,&armval)) { - assem_debug("mov %s,#%d\n",regname[rt],rt_val); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); - return; - } - if(genimm(~rt_val,&armval)) { - assem_debug("mvn %s,#%d\n",regname[rt],rt_val); - output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); - return; - } - diff=rt_val-rs_val; - if(genimm(diff,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - return; - }else if(genimm(-diff,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); - output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); - return; - } - emit_movimm(rt_val,rt); -} - -// return 1 if above function can do it's job cheaply -static int is_similar_value(u_int v1,u_int v2) -{ - u_int xs; - int diff; - if(v1==v2) return 1; - diff=v2-v1; - for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) - ; - if(xs<0x100) return 1; - for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) - ; - if(xs<0x100) return 1; - return 0; -} - -// trashes r2 -static void pass_args(int a0, int a1) -{ - if(a0==1&&a1==0) { - // must swap - emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); - } - else if(a0!=0&&a1==0) { - emit_mov(a1,1); - if (a0>=0) emit_mov(a0,0); - } - else { - if(a0>=0&&a0!=0) emit_mov(a0,0); - if(a1>=0&&a1!=1) emit_mov(a1,1); - } -} - -static void mov_loadtype_adj(int type,int rs,int rt) -{ - switch(type) { - case LOADB_STUB: emit_signextend8(rs,rt); break; - case LOADBU_STUB: emit_andimm(rs,0xff,rt); break; - case LOADH_STUB: emit_signextend16(rs,rt); break; - case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break; - case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break; - default: assert(0); - } -} - -#include "pcsxmem.h" -#include "pcsxmem_inline.c" - -static void do_readstub(int n) -{ - assem_debug("do_readstub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int rt; - if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { - rt=get_reg(i_regmap,FTEMP); - }else{ - rt=get_reg(i_regmap,rt1[i]); - } - assert(rs>=0); - int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; - reglist|=(1<=0&&rt1[i]!=0) - reglist&=~(1<=0&&rt1[i]!=0)) { - switch(type) { - case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; - case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; - case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; - case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; - case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; - } - } - if(regs_saved) { - restore_jump=(int)out; - emit_jcc(0); // jump to reg restore - } - else - emit_jcc(stubs[n][2]); // return address - - if(!regs_saved) - save_regs(reglist); - int handler=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; - if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; - if(type==LOADW_STUB) - handler=(int)jump_handler_read32; - assert(handler!=0); - pass_args(rs,temp2); - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); - emit_call(handler); - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { - mov_loadtype_adj(type,0,rt); - } - if(restore_jump) - set_jump_target(restore_jump,(int)out); - restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address -} - -// return memhandler, or get directly accessable address and return 0 -static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) -{ - u_int l1,l2=0; - l1=((u_int *)table)[addr>>12]; - if((l1&(1<<31))==0) { - u_int v=l1<<1; - *addr_host=v+addr; - return 0; - } - else { - l1<<=1; - if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) - l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; - else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) - l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; - else - l2=((u_int *)l1)[(addr&0xfff)/4]; - if((l2&(1<<31))==0) { - u_int v=l2<<1; - *addr_host=v+(addr&0xfff); - return 0; - } - return l2<<1; - } -} - -static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) -{ - int rs=get_reg(regmap,target); - int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg(regmap,-1); - assert(rs>=0); - u_int handler,host_addr=0,is_dynamic,far_call=0; - int cc=get_reg(regmap,CCREG); - if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) - return; - handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); - if (handler==0) { - if(rt<0||rt1[i]==0) - return; - if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); - switch(type) { - case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; - case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; - case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; - case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; - case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; - default: assert(0); - } - return; - } - is_dynamic=pcsxmem_is_handler_dynamic(addr); - if(is_dynamic) { - if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; - if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; - if(type==LOADW_STUB) - handler=(int)jump_handler_read32; - } - - // call a memhandler - if(rt>=0&&rt1[i]!=0) - reglist&=~(1<=33554432) { - // unreachable memhandler, a plugin func perhaps - emit_movimm(handler,12); - far_call=1; - } - if(cc<0) - emit_loadreg(CCREG,2); - if(is_dynamic) { - emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - } - else { - emit_readword((int)&last_count,3); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_add(2,3,2); - emit_writeword(2,(int)&Count); - } - - if(far_call) - emit_callreg(12); - else - emit_call(handler); - - if(rt>=0&&rt1[i]!=0) { - switch(type) { - case LOADB_STUB: emit_signextend8(0,rt); break; - case LOADBU_STUB: emit_andimm(0,0xff,rt); break; - case LOADH_STUB: emit_signextend16(0,rt); break; - case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; - case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; - default: assert(0); - } - } - restore_regs(reglist); -} - -static void do_writestub(int n) -{ - assem_debug("do_writestub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int rt,r; - if(itype[i]==C1LS||itype[i]==C2LS) { - rt=get_reg(i_regmap,r=FTEMP); - }else{ - rt=get_reg(i_regmap,r=rs2[i]); - } - assert(rs>=0); - assert(rt>=0); - int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; - int reglist2=reglist|(1<=0); - assert(rt>=0); - u_int handler,host_addr=0; - handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); - if (handler==0) { - if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); - switch(type) { - case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; - case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; - case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; - default: assert(0); - } - return; - } - - // call a memhandler - save_regs(reglist); - pass_args(rs,rt); - int cc=get_reg(regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_movimm(handler,3); - // returns new cycle_count - emit_call((int)jump_handler_write_h); - emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); - if(cc<0) - emit_storereg(CCREG,2); - restore_regs(reglist); -} - -static void do_unalignedwritestub(int n) -{ - assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - - int i=stubs[n][3]; - struct regstat *i_regs=(struct regstat *)stubs[n][4]; - int addr=stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int temp2=get_reg(i_regmap,FTEMP); - int rt; - rt=get_reg(i_regmap,rs2[i]); - assert(rt>=0); - assert(addr>=0); - assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented - reglist|=(1<regmap_entry,i_regs->was32,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); -} - -/* Special assem */ - -static void shift_assemble_arm(int i,struct regstat *i_regs) -{ - if(rt1[i]) { - if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV - { - signed char s,t,shift; - t=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(t>=0){ - if(rs1[i]==0) - { - emit_zeroreg(t); - } - else if(rs2[i]==0) - { - assert(s>=0); - if(s!=t) emit_mov(s,t); - } - else - { - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==4) // SLLV - { - emit_shl(s,HOST_TEMPREG,t); - } - if(opcode2[i]==6) // SRLV - { - emit_shr(s,HOST_TEMPREG,t); - } - if(opcode2[i]==7) // SRAV - { - emit_sar(s,HOST_TEMPREG,t); - } - } - } - } else { // DSLLV/DSRLV/DSRAV - signed char sh,sl,th,tl,shift; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); - sl=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(tl>=0){ - if(rs1[i]==0) - { - emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); - } - else if(rs2[i]==0) - { - assert(sl>=0); - if(sl!=tl) emit_mov(sl,tl); - if(th>=0&&sh!=th) emit_mov(sh,th); - } - else - { - // FIXME: What if shift==tl ? - assert(shift!=tl); - int temp=get_reg(i_regs->regmap,-1); - int real_th=th; - if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register - assert(sl>=0); - assert(sh>=0); - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==0x14) // DSLLV - { - if(th>=0) emit_shl(sh,HOST_TEMPREG,th); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshr(sl,HOST_TEMPREG,th); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shl(sl,HOST_TEMPREG,tl); - if(th>=0) emit_cmovne_reg(tl,th); - emit_cmovne_imm(0,tl); - } - if(opcode2[i]==0x16) // DSRLV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shr(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_imm(0,th); - } - if(opcode2[i]==0x17) // DSRAV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - if(real_th>=0) { - assert(temp>=0); - emit_sarimm(th,31,temp); - } - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_sar(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_reg(temp,th); - } - } - } - } - } -} - -static void speculate_mov(int rs,int rt) -{ - if(rt!=0) { - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); - else { - smrv_strong_next&=~(1<=0) { - if(get_final_value(hr,i,&value)) - smrv[rt1[i]]=value; - else smrv[rt1[i]]=constmap[i][hr]; - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - } - break; - case LOAD: - if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { - // special case for BIOS - smrv[rt1[i]]=0xa0000000; - smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); -#endif -} - -enum { - MTYPE_8000 = 0, - MTYPE_8020, - MTYPE_0000, - MTYPE_A000, - MTYPE_1F80, -}; - -static int get_ptr_mem_type(u_int a) -{ - if(a < 0x00200000) { - if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) - // return wrong, must use memhandler for BIOS self-test to pass - // 007 does similar stuff from a00 mirror, weird stuff - return MTYPE_8000; - return MTYPE_0000; - } - if(0x1f800000 <= a && a < 0x1f801000) - return MTYPE_1F80; - if(0x80200000 <= a && a < 0x80800000) - return MTYPE_8020; - if(0xa0000000 <= a && a < 0xa0200000) - return MTYPE_A000; - return MTYPE_8000; -} - -static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) -{ - int jaddr=0,type=0; - int mr=rs1[i]; - if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(smrv[mr]); - //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); - } - else { - // use the mirror we are running on - type=get_ptr_mem_type(start); - //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); - } - - if(type==MTYPE_8020) { // RAM 80200000+ mirror - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_0000) { // RAM 0 mirror - emit_orimm(addr,0x80000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_A000) { // RAM A mirror - emit_andimm(addr,~0x20000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_1F80) { // scratchpad - if (psxH == (void *)0x1f800000) { - emit_addimm(addr,-0x1f800000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,0x1000); - jaddr=(int)out; - emit_jc(0); - } - else { - // do usual RAM check, jump will go to the right handler - type=0; - } - } - - if(type==0) - { - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); - if(ram_offset!=0) { - emit_addimm(addr,ram_offset,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - } - } - - return jaddr; -} - -#define shift_assemble shift_assemble_arm - -static void loadlr_assemble_arm(int i,struct regstat *i_regs) -{ - int s,th,tl,temp,temp2,addr,map=-1; - int offset; - int jaddr=0; - int memtarget=0,c=0; - int fastload_reg_override=0; - u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - temp=get_reg(i_regs->regmap,-1); - temp2=get_reg(i_regs->regmap,FTEMP); - addr=get_reg(i_regs->regmap,AGEN1+(i&1)); - assert(addr<0); - offset=imm[i]; - for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { - c=(i_regs->wasconst>>s)&1; - if(c) { - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - } - } - if(!c) { - #ifdef RAM_OFFSET - map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif - emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR - }else{ - emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR - } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); - } - else { - if(ram_offset&&memtarget) { - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR - }else{ - emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR - } - } - if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR - if(!c||memtarget) { - int a=temp2; - if(fastload_reg_override) a=fastload_reg_override; - //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); - emit_readword_indexed_tlb(0,a,map,temp2); - if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(tl>=0); - emit_andimm(temp,24,temp); -#ifdef BIG_ENDIAN_MIPS - if (opcode[i]==0x26) // LWR -#else - if (opcode[i]==0x22) // LWL -#endif - emit_xorimm(temp,24,temp); - emit_movimm(-1,HOST_TEMPREG); - if (opcode[i]==0x26) { - emit_shr(temp2,temp,temp2); - emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); - }else{ - emit_shl(temp2,temp,temp2); - emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); - } - emit_or(temp2,tl,tl); - } - //emit_storereg(rt1[i],tl); // DEBUG - } - if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR - // FIXME: little endian, fastload_reg_override - int temp2h=get_reg(i_regs->regmap,FTEMP|64); - if(!c||memtarget) { - //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); - emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); - if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(th>=0); - assert(tl>=0); - emit_testimm(temp,32); - emit_andimm(temp,24,temp); - if (opcode[i]==0x1A) { // LDL - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shl(temp2h,temp,temp2h); - emit_orrshr(temp2,HOST_TEMPREG,temp2h); - emit_movimm(-1,HOST_TEMPREG); - emit_shl(temp2,temp,temp2); - emit_cmove_reg(temp2h,th); - emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl); - emit_bicne_lsl(th,HOST_TEMPREG,temp,th); - emit_orreq(temp2,tl,tl); - emit_orrne(temp2,th,th); - } - if (opcode[i]==0x1B) { // LDR - emit_xorimm(temp,24,temp); - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shr(temp2,temp,temp2); - emit_orrshl(temp2h,HOST_TEMPREG,temp2); - emit_movimm(-1,HOST_TEMPREG); - emit_shr(temp2h,temp,temp2h); - emit_cmovne_reg(temp2,tl); - emit_bicne_lsr(th,HOST_TEMPREG,temp,th); - emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl); - emit_orrne(temp2h,th,th); - emit_orreq(temp2h,tl,tl); - } - } - } -} -#define loadlr_assemble loadlr_assemble_arm - -static void cop0_assemble(int i,struct regstat *i_regs) -{ - if(opcode2[i]==0) // MFC0 - { - signed char t=get_reg(i_regs->regmap,rt1[i]); - char copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird - if(t>=0&&rt1[i]!=0) { - emit_readword((int)®_cop0+copr*4,t); - } - } - else if(opcode2[i]==4) // MTC0 - { - signed char s=get_reg(i_regs->regmap,rs1[i]); - char copr=(source[i]>>11)&0x1f; - assert(s>=0); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&last_count,HOST_TEMPREG); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - } - // What a mess. The status register (12) can enable interrupts, - // so needs a special case to handle a pending interrupt. - // The interrupt must be taken immediately, because a subsequent - // instruction might disable interrupts again. - if(copr==12||copr==13) { - if (is_delayslot) { - // burn cycles to cause cc_interrupt, which will - // reschedule next_interupt. Relies on CCREG from above. - assem_debug("MTC0 DS %d\n", copr); - emit_writeword(HOST_CCREG,(int)&last_count); - emit_movimm(0,HOST_CCREG); - emit_storereg(CCREG,HOST_CCREG); - emit_loadreg(rs1[i],1); - emit_movimm(copr,0); - emit_call((int)pcsx_mtc0_ds); - emit_loadreg(rs1[i],s); - return; - } - emit_movimm(start+i*4+4,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pcaddr); - emit_movimm(0,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pending_exception); - } - //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); - //else - if(s==HOST_CCREG) - emit_loadreg(rs1[i],1); - else if(s!=1) - emit_mov(s,1); - emit_movimm(copr,0); - emit_call((int)pcsx_mtc0); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,HOST_TEMPREG); - emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_writeword(HOST_TEMPREG,(int)&last_count); - emit_storereg(CCREG,HOST_CCREG); - } - if(copr==12||copr==13) { - assert(!is_delayslot); - emit_readword((int)&pending_exception,14); - emit_test(14,14); - emit_jne((int)&do_interrupt); - } - emit_loadreg(rs1[i],s); - if(get_reg(i_regs->regmap,rs1[i]|64)>=0) - emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - cop1_usable=0; - } - else - { - assert(opcode2[i]==0x10); - if((source[i]&0x3f)==0x10) // RFE - { - emit_readword((int)&Status,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,(int)&Status); - } - } -} - -static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) -{ - switch (copr) { - case 1: - case 3: - case 5: - case 8: - case 9: - case 10: - case 11: - emit_readword((int)®_cop2d[copr],tl); - emit_signextend16(tl,tl); - emit_writeword(tl,(int)®_cop2d[copr]); // hmh - break; - case 7: - case 16: - case 17: - case 18: - case 19: - emit_readword((int)®_cop2d[copr],tl); - emit_andimm(tl,0xffff,tl); - emit_writeword(tl,(int)®_cop2d[copr]); - break; - case 15: - emit_readword((int)®_cop2d[14],tl); // SXY2 - emit_writeword(tl,(int)®_cop2d[copr]); - break; - case 28: - case 29: - emit_readword((int)®_cop2d[9],temp); - emit_testimm(temp,0x8000); // do we need this? - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_shrimm(temp,7,tl); - emit_readword((int)®_cop2d[10],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshr_imm(temp,2,tl); - emit_readword((int)®_cop2d[11],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshl_imm(temp,3,tl); - emit_writeword(tl,(int)®_cop2d[copr]); - break; - default: - emit_readword((int)®_cop2d[copr],tl); - break; - } -} - -static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) -{ - switch (copr) { - case 15: - emit_readword((int)®_cop2d[13],temp); // SXY1 - emit_writeword(sl,(int)®_cop2d[copr]); - emit_writeword(temp,(int)®_cop2d[12]); // SXY0 - emit_readword((int)®_cop2d[14],temp); // SXY2 - emit_writeword(sl,(int)®_cop2d[14]); - emit_writeword(temp,(int)®_cop2d[13]); // SXY1 - break; - case 28: - emit_andimm(sl,0x001f,temp); - emit_shlimm(temp,7,temp); - emit_writeword(temp,(int)®_cop2d[9]); - emit_andimm(sl,0x03e0,temp); - emit_shlimm(temp,2,temp); - emit_writeword(temp,(int)®_cop2d[10]); - emit_andimm(sl,0x7c00,temp); - emit_shrimm(temp,3,temp); - emit_writeword(temp,(int)®_cop2d[11]); - emit_writeword(sl,(int)®_cop2d[28]); - break; - case 30: - emit_movs(sl,temp); - emit_mvnmi(temp,temp); -#ifdef HAVE_ARMV5 - emit_clz(temp,temp); -#else - emit_movs(temp,HOST_TEMPREG); - emit_movimm(0,temp); - emit_jeq((int)out+4*4); - emit_addpl_imm(temp,1,temp); - emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); -#endif - emit_writeword(sl,(int)®_cop2d[30]); - emit_writeword(temp,(int)®_cop2d[31]); - break; - case 31: - break; - default: - emit_writeword(sl,(int)®_cop2d[copr]); - break; - } -} - -static void cop2_assemble(int i,struct regstat *i_regs) -{ - u_int copr=(source[i]>>11)&0x1f; - signed char temp=get_reg(i_regs->regmap,-1); - if (opcode2[i]==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - cop2_get_dreg(copr,tl,temp); - } - else if (opcode2[i]==4) { // MTC2 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - cop2_put_dreg(copr,sl,temp); - } - else if (opcode2[i]==2) // CFC2 - { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - emit_readword((int)®_cop2c[copr],tl); - } - else if (opcode2[i]==6) // CTC2 - { - signed char sl=get_reg(i_regs->regmap,rs1[i]); - switch(copr) { - case 4: - case 12: - case 20: - case 26: - case 27: - case 29: - case 30: - emit_signextend16(sl,temp); - break; - case 31: - //value = value & 0x7ffff000; - //if (value & 0x7f87e000) value |= 0x80000000; - emit_shrimm(sl,12,temp); - emit_shlimm(temp,12,temp); - emit_testimm(temp,0x7f000000); - emit_testeqimm(temp,0x00870000); - emit_testeqimm(temp,0x0000e000); - emit_orrne_imm(temp,0x80000000,temp); - break; - default: - temp=sl; - break; - } - emit_writeword(temp,(int)®_cop2c[copr]); - assert(sl>=0); - } -} - -static void c2op_prologue(u_int op,u_int reglist) -{ - save_regs_all(reglist); -#ifdef PCNT - emit_movimm(op,0); - emit_call((int)pcnt_gte_start); -#endif - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs -} - -static void c2op_epilogue(u_int op,u_int reglist) -{ -#ifdef PCNT - emit_movimm(op,0); - emit_call((int)pcnt_gte_end); -#endif - restore_regs_all(reglist); -} - -static void c2op_call_MACtoIR(int lm,int need_flags) -{ - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); -} - -static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) -{ - emit_call((int)func); - // func is C code and trashes r0 - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - if(need_flags||need_ir) - c2op_call_MACtoIR(lm,need_flags); - emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); -} - -static void c2op_assemble(int i,struct regstat *i_regs) -{ - u_int c2op=source[i]&0x3f; - u_int hr,reglist_full=0,reglist; - int need_flags,need_ir; - for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works - need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; - assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", - source[i],gte_unneeded[i+1],need_flags,need_ir); - if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) - need_flags=0; - int shift = (source[i] >> 19) & 1; - int lm = (source[i] >> 10) & 1; - switch(c2op) { -#ifndef DRC_DBG - case GTE_MVMVA: { -#ifdef HAVE_ARMV5 - int v = (source[i] >> 15) & 3; - int cv = (source[i] >> 13) & 3; - int mx = (source[i] >> 17) & 3; - reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} - c2op_prologue(c2op,reglist); - /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ - if(v<3) - emit_ldrd(v*8,0,4); - else { - emit_movzwl_indexed(9*4,0,4); // gteIR - emit_movzwl_indexed(10*4,0,6); - emit_movzwl_indexed(11*4,0,5); - emit_orrshl_imm(6,16,4); - } - if(mx<3) - emit_addimm(0,32*4+mx*8*4,6); - else - emit_readword((int)&zeromem_ptr,6); - if(cv<3) - emit_addimm(0,32*4+(cv*8+5)*4,7); - else - emit_readword((int)&zeromem_ptr,7); -#ifdef __ARM_NEON__ - emit_movimm(source[i],1); // opcode - emit_call((int)gteMVMVA_part_neon); - if(need_flags) { - emit_movimm(lm,1); - emit_call((int)gteMACtoIR_flags_neon); - } -#else - if(cv==3&&shift) - emit_call((int)gteMVMVA_part_cv3sh12_arm); - else { - emit_movimm(shift,1); - emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); - } - if(need_flags||need_ir) - c2op_call_MACtoIR(lm,need_flags); -#endif -#else /* if not HAVE_ARMV5 */ - c2op_prologue(c2op,reglist); - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); -#endif - break; - } - case GTE_OP: - c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); - if(need_flags||need_ir) { - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - c2op_call_MACtoIR(lm,need_flags); - } - break; - case GTE_DPCS: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); - break; - case GTE_INTPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); - break; - case GTE_SQR: - c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); - if(need_flags||need_ir) { - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - c2op_call_MACtoIR(lm,need_flags); - } - break; - case GTE_DCPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); - break; - case GTE_GPF: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); - break; - case GTE_GPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); - break; -#endif - default: - c2op_prologue(c2op,reglist); -#ifdef DRC_DBG - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); -#endif - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); - break; - } - c2op_epilogue(c2op,reglist); - } -} - -static void cop1_unusable(int i,struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - if(!cop1_usable) { - int jaddr=(int)out; - emit_jmp(0); - add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } -} - -static void cop1_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void fconv_assemble_arm(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} -#define fconv_assemble fconv_assemble_arm - -static void fcomp_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void float_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void multdiv_assemble_arm(int i,struct regstat *i_regs) -{ - // case 0x18: MULT - // case 0x19: MULTU - // case 0x1A: DIV - // case 0x1B: DIVU - // case 0x1C: DMULT - // case 0x1D: DMULTU - // case 0x1E: DDIV - // case 0x1F: DDIVU - if(rs1[i]&&rs2[i]) - { - if((opcode2[i]&4)==0) // 32-bit - { - if(opcode2[i]==0x18) // MULT - { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); - signed char hi=get_reg(i_regs->regmap,HIREG); - signed char lo=get_reg(i_regs->regmap,LOREG); - assert(m1>=0); - assert(m2>=0); - assert(hi>=0); - assert(lo>=0); - emit_smull(m1,m2,hi,lo); - } - if(opcode2[i]==0x19) // MULTU - { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); - signed char hi=get_reg(i_regs->regmap,HIREG); - signed char lo=get_reg(i_regs->regmap,LOREG); - assert(m1>=0); - assert(m2>=0); - assert(hi>=0); - assert(lo>=0); - emit_umull(m1,m2,hi,lo); - } - if(opcode2[i]==0x1A) // DIV - { - signed char d1=get_reg(i_regs->regmap,rs1[i]); - signed char d2=get_reg(i_regs->regmap,rs2[i]); - assert(d1>=0); - assert(d2>=0); - signed char quotient=get_reg(i_regs->regmap,LOREG); - signed char remainder=get_reg(i_regs->regmap,HIREG); - assert(quotient>=0); - assert(remainder>=0); - emit_movs(d1,remainder); - emit_movimm(0xffffffff,quotient); - emit_negmi(quotient,quotient); // .. quotient and .. - emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) - emit_movs(d2,HOST_TEMPREG); - emit_jeq((int)out+52); // Division by zero - emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); -#ifdef HAVE_ARMV5 - emit_clz(HOST_TEMPREG,quotient); - emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); -#else - emit_movimm(0,quotient); - emit_addpl_imm(quotient,1,quotient); - emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); -#endif - emit_orimm(quotient,1<<31,quotient); - emit_shr(quotient,quotient,quotient); - emit_cmp(remainder,HOST_TEMPREG); - emit_subcs(remainder,HOST_TEMPREG,remainder); - emit_adcs(quotient,quotient,quotient); - emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jcc((int)out-16); // -4 - emit_teq(d1,d2); - emit_negmi(quotient,quotient); - emit_test(d1,d1); - emit_negmi(remainder,remainder); - } - if(opcode2[i]==0x1B) // DIVU - { - signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend - signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor - assert(d1>=0); - assert(d2>=0); - signed char quotient=get_reg(i_regs->regmap,LOREG); - signed char remainder=get_reg(i_regs->regmap,HIREG); - assert(quotient>=0); - assert(remainder>=0); - emit_mov(d1,remainder); - emit_movimm(0xffffffff,quotient); // div0 case - emit_test(d2,d2); - emit_jeq((int)out+40); // Division by zero -#ifdef HAVE_ARMV5 - emit_clz(d2,HOST_TEMPREG); - emit_movimm(1<<31,quotient); - emit_shl(d2,HOST_TEMPREG,d2); -#else - emit_movimm(0,HOST_TEMPREG); - emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_lslpls_imm(d2,1,d2); - emit_jns((int)out-2*4); - emit_movimm(1<<31,quotient); -#endif - emit_shr(quotient,HOST_TEMPREG,quotient); - emit_cmp(remainder,d2); - emit_subcs(remainder,d2,remainder); - emit_adcs(quotient,quotient,quotient); - emit_shrcc_imm(d2,1,d2); - emit_jcc((int)out-16); // -4 - } - } - else // 64-bit - assert(0); - } - else - { - // Multiply by zero is zero. - // MIPS does not have a divide by zero exception. - // The result is undefined, we return zero. - signed char hr=get_reg(i_regs->regmap,HIREG); - signed char lr=get_reg(i_regs->regmap,LOREG); - if(hr>=0) emit_zeroreg(hr); - if(lr>=0) emit_zeroreg(lr); - } -} -#define multdiv_assemble multdiv_assemble_arm - -static void do_preload_rhash(int r) { - // Don't need this for ARM. On x86, this puts the value 0xf8 into the - // register. On ARM the hash can be done with a single instruction (below) -} - -static void do_preload_rhtbl(int ht) { - emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); -} - -static void do_rhash(int rs,int rh) { - emit_andimm(rs,0xf8,rh); -} - -static void do_miniht_load(int ht,int rh) { - assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); - output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); -} - -static void do_miniht_jump(int rs,int rh,int ht) { - emit_cmp(rh,rs); - emit_ldreq_indexed(ht,4,15); - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - emit_mov(rs,7); - emit_jmp(jump_vaddr_reg[7]); - #else - emit_jmp(jump_vaddr_reg[rs]); - #endif -} - -static void do_miniht_insert(u_int return_address,int rt,int temp) { - #ifndef HAVE_ARMV7 - emit_movimm(return_address,rt); // PC into link register - add_to_linker((int)out,return_address,1); - emit_pcreladdr(temp); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); - #else - emit_movw(return_address&0x0000FFFF,rt); - add_to_linker((int)out,return_address,1); - emit_pcreladdr(temp); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); - emit_movt(return_address&0xFFFF0000,rt); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); - #endif -} - -static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) -{ - //if(dirty_pre==dirty) return; - int hr,reg; - for(hr=0;hr>(reg&63))&1) { - if(reg>0) { - if(((dirty_pre&~dirty)>>hr)&1) { - if(reg>0&®<34) { - emit_storereg(reg,hr); - if( ((is32_pre&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } - } - else if(reg>=64) { - emit_storereg(reg,hr); - } - } - } - } - } - } -} - - -/* using strd could possibly help but you'd have to allocate registers in pairs -static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) -{ - int hr; - int wrote=-1; - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG) { - if(pre[hr]!=entry[hr]) { - if(pre[hr]>=0) { - if((dirty>>hr)&1) { - if(get_reg(entry,pre[hr])<0) { - if(pre[hr]<64) { - if(!((u>>pre[hr])&1)) { - if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) { - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr+1); - emit_strdreg(pre[hr],hr); - } - else - emit_storereg(pre[hr],hr); - }else{ - emit_storereg(pre[hr],hr); - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr); - emit_storereg(pre[hr]|64,hr); - } - } - } - }else{ - if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { - emit_storereg(pre[hr],hr); - } - } - wrote=hr; - } - } - } - } - } - } - for(hr=0;hr=0) { - int nr; - if((nr=get_reg(entry,pre[hr]))>=0) { - emit_mov(hr,nr); - } - } - } - } - } -} -#define wb_invalidate wb_invalidate_arm -*/ - -static void mark_clear_cache(void *target) -{ - u_long offset = (char *)target - (char *)BASE_ADDR; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; - } -} - -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_int start,end; - for(j=0;j<32;j++) - { - if(bitmap&(1< + +#include "emu_if.h" +#include "pcsxmem.h" +#include "../../../psxhle.h" +#include "../../../r3000a.h" +#include "../../../cdrom.h" +#include "../../../psxdma.h" +#include "../../../mdec.h" +#include "../../../gte_arm.h" +#include "../../../gte_neon.h" +#define FLAGLESS +#include "../../../gte.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +//#define evprintf printf +#define evprintf(...) + +char invalid_code[0x100000]; +static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); +u32 event_cycles[PSXINT_COUNT]; + +static void schedule_timeslice(void) +{ + u32 i, c = psxRegs.cycle; + u32 irqs = psxRegs.interrupt; + s32 min, dif; + + min = PSXCLK; + for (i = 0; irqs != 0; i++, irqs >>= 1) { + if (!(irqs & 1)) + continue; + dif = event_cycles[i] - c; + //evprintf(" ev %d\n", dif); + if (0 < dif && dif < min) + min = dif; + } + next_interupt = c + min; +} + +typedef void (irq_func)(); + +static irq_func * const irq_funcs[] = { + [PSXINT_SIO] = sioInterrupt, + [PSXINT_CDR] = cdrInterrupt, + [PSXINT_CDREAD] = cdrReadInterrupt, + [PSXINT_GPUDMA] = gpuInterrupt, + [PSXINT_MDECOUTDMA] = mdec1Interrupt, + [PSXINT_SPUDMA] = spuInterrupt, + [PSXINT_MDECINDMA] = mdec0Interrupt, + [PSXINT_GPUOTCDMA] = gpuotcInterrupt, + [PSXINT_CDRDMA] = cdrDmaInterrupt, + [PSXINT_CDRLID] = cdrLidSeekInterrupt, + [PSXINT_CDRPLAY] = cdrPlayInterrupt, + [PSXINT_SPU_UPDATE] = spuUpdate, + [PSXINT_RCNT] = psxRcntUpdate, +}; + +/* local dupe of psxBranchTest, using event_cycles */ +static void irq_test(void) +{ + u32 irqs = psxRegs.interrupt; + u32 cycle = psxRegs.cycle; + u32 irq, irq_bits; + + // irq_funcs() may queue more irqs + psxRegs.interrupt = 0; + + for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { + if (!(irq_bits & 1)) + continue; + if ((s32)(cycle - event_cycles[irq]) >= 0) { + irqs &= ~(1 << irq); + irq_funcs[irq](); + } + } + psxRegs.interrupt |= irqs; + + if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { + psxException(0x400, 0); + pending_exception = 1; + } +} + +void gen_interupt() +{ + evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); + + irq_test(); + //psxBranchTest(); + //pending_exception = 1; + + schedule_timeslice(); + + evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, + next_interupt, next_interupt - psxRegs.cycle); +} + +// from interpreter +extern void MTC0(int reg, u32 val); + +void pcsx_mtc0(u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); + gen_interupt(); + if (Cause & Status & 0x0300) // possible sw irq + pending_exception = 1; +} + +void pcsx_mtc0_ds(u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); +} + +void new_dyna_before_save(void) +{ + psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat + + // psxRegs.intCycle is always maintained, no need to convert +} + +void new_dyna_after_save(void) +{ + psxRegs.interrupt |= 1 << PSXINT_RCNT; +} + +static void new_dyna_restore(void) +{ + int i; + for (i = 0; i < PSXINT_COUNT; i++) + event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; + + event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; + psxRegs.interrupt |= 1 << PSXINT_RCNT; + psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; + + new_dyna_pcsx_mem_load_state(); +} + +void new_dyna_freeze(void *f, int mode) +{ + const char header_save[8] = "ariblks"; + uint32_t addrs[1024 * 4]; + int32_t size = 0; + int bytes; + char header[8]; + + if (mode != 0) { // save + size = new_dynarec_save_blocks(addrs, sizeof(addrs)); + if (size == 0) + return; + + SaveFuncs.write(f, header_save, sizeof(header_save)); + SaveFuncs.write(f, &size, sizeof(size)); + SaveFuncs.write(f, addrs, size); + } + else { + new_dyna_restore(); + + bytes = SaveFuncs.read(f, header, sizeof(header)); + if (bytes != sizeof(header) || strcmp(header, header_save)) { + if (bytes > 0) + SaveFuncs.seek(f, -bytes, SEEK_CUR); + return; + } + SaveFuncs.read(f, &size, sizeof(size)); + if (size <= 0) + return; + if (size > sizeof(addrs)) { + bytes = size - sizeof(addrs); + SaveFuncs.seek(f, bytes, SEEK_CUR); + size = sizeof(addrs); + } + bytes = SaveFuncs.read(f, addrs, size); + if (bytes != size) + return; + + new_dynarec_load_blocks(addrs, size); + } + + //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); +} + +/* GTE stuff */ +void *gte_handlers[64]; + +void *gte_handlers_nf[64] = { + NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 + NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 + gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 + NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 + gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 + gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 +}; + +const char *gte_regnames[64] = { + NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 + NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 + "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 + NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 + "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 + "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 +}; + +/* from gte.txt.. not sure if this is any good. */ +const char gte_cycletab[64] = { + /* 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, + 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, + 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, +}; + +#define GCBIT(x) \ + (1ll << (32+x)) +#define GDBIT(x) \ + (1ll << (x)) +#define GCBITS3(b0,b1,b2) \ + (GCBIT(b0) | GCBIT(b1) | GCBIT(b2)) +#define GDBITS2(b0,b1) \ + (GDBIT(b0) | GDBIT(b1)) +#define GDBITS3(b0,b1,b2) \ + (GDBITS2(b0,b1) | GDBIT(b2)) +#define GDBITS4(b0,b1,b2,b3) \ + (GDBITS3(b0,b1,b2) | GDBIT(b3)) +#define GDBITS5(b0,b1,b2,b3,b4) \ + (GDBITS4(b0,b1,b2,b3) | GDBIT(b4)) +#define GDBITS6(b0,b1,b2,b3,b4,b5) \ + (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5)) +#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \ + (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6)) +#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \ + (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7)) +#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \ + (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8)) +#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \ + (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9)) + +const uint64_t gte_reg_reads[64] = { + [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19), + [GTE_NCLIP] = GDBITS3(12,13,14), + [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), + [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), + [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further? + [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), + [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22), + [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), + [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), + [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), + [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), + [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), + [GTE_SQR] = GDBITS3(9,10,11), + [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22), + [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19), + [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19), + [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19), + [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22), + [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27), + [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), +}; + +// note: this excludes gteFLAG that is always written to +const uint64_t gte_reg_writes[64] = { + [GTE_RTPS] = 0x0f0f7f00ll, + [GTE_NCLIP] = GDBIT(24), + [GTE_OP] = GDBITS6(9,10,11,25,26,27), + [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27), + [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_SQR] = GDBITS6(9,10,11,25,26,27), + [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_AVSZ3] = GDBITS2(7,24), + [GTE_AVSZ4] = GDBITS2(7,24), + [GTE_RTPT] = 0x0f0f7f00ll, + [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), +}; + +static int ari64_init() +{ + extern void (*psxCP2[64])(); + extern void psxNULL(); + extern unsigned char *out; + size_t i; + + new_dynarec_init(); + new_dyna_pcsx_mem_init(); + + for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) + if (psxCP2[i] != psxNULL) + gte_handlers[i] = psxCP2[i]; + +#if defined(__arm__) && !defined(DRC_DBG) + gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif + psxH_ptr = psxH; + zeromem_ptr = zero_mem; + scratch_buf_ptr = scratch_buf; + + SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); + SysPrintf("%08x/%08x/%08x/%08x/%08x\n", + psxM, psxH, psxR, mem_rtab, out); + + return 0; +} + +static void ari64_reset() +{ + printf("ari64_reset\n"); + new_dyna_pcsx_mem_reset(); + invalidate_all_pages(); + new_dyna_restore(); + pending_exception = 1; +} + +// execute until predefined leave points +// (HLE softcall exit and BIOS fastboot end) +static void ari64_execute_until() +{ + schedule_timeslice(); + + evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, + psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); + + new_dyna_start(); + + evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, + psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); +} + +static void ari64_execute() +{ + while (!stop) { + ari64_execute_until(); + evprintf("drc left @%08x\n", psxRegs.pc); + } +} + +static void ari64_clear(u32 addr, u32 size) +{ + u32 start, end, main_ram; + + size *= 4; /* PCSX uses DMA units (words) */ + + evprintf("ari64_clear %08x %04x\n", addr, size); + + /* check for RAM mirrors */ + main_ram = (addr & 0xffe00000) == 0x80000000; + + start = addr >> 12; + end = (addr + size) >> 12; + + for (; start <= end; start++) + if (!main_ram || !invalid_code[start]) + invalidate_block(start); +} + +static void ari64_shutdown() +{ + new_dynarec_cleanup(); + new_dyna_pcsx_mem_shutdown(); +} + +extern void intExecute(); +extern void intExecuteT(); +extern void intExecuteBlock(); +extern void intExecuteBlockT(); +#ifndef DRC_DBG +#define intExecuteT intExecute +#define intExecuteBlockT intExecuteBlock +#endif + +R3000Acpu psxRec = { + ari64_init, + ari64_reset, +#ifndef DRC_DISABLE + ari64_execute, + ari64_execute_until, +#else + intExecuteT, + intExecuteBlockT, +#endif + ari64_clear, + ari64_shutdown +}; + +// TODO: rm +#ifndef DRC_DBG +void do_insn_trace() {} +void do_insn_cmp() {} +#endif + +#ifdef DRC_DISABLE +unsigned int address; +int pending_exception, stop; +u32 next_interupt; +int new_dynarec_did_compile; +int cycle_multiplier; +int new_dynarec_hacks; +void *psxH_ptr; +void *zeromem_ptr; +u8 zero_mem[0x1000]; +unsigned char *out; +void *mem_rtab; +void *scratch_buf_ptr; +void new_dynarec_init() { (void)ari64_execute; } +void new_dyna_start() {} +void new_dynarec_cleanup() {} +void new_dynarec_clear_full() {} +void invalidate_all_pages() {} +void invalidate_block(unsigned int block) {} +void new_dyna_pcsx_mem_init(void) {} +void new_dyna_pcsx_mem_reset(void) {} +void new_dyna_pcsx_mem_load_state(void) {} +void new_dyna_pcsx_mem_shutdown(void) {} +int new_dynarec_save_blocks(void *save, int size) { return 0; } +void new_dynarec_load_blocks(const void *save, int size) {} +#endif + +#ifdef DRC_DBG + +#include +static FILE *f; +extern u32 last_io_addr; + +static void dump_mem(const char *fname, void *mem, size_t size) +{ + FILE *f1 = fopen(fname, "wb"); + if (f1 == NULL) + f1 = fopen(strrchr(fname, '/') + 1, "wb"); + fwrite(mem, 1, size, f1); + fclose(f1); +} + +static u32 memcheck_read(u32 a) +{ + if ((a >> 16) == 0x1f80) + // scratchpad/IO + return *(u32 *)(psxH + (a & 0xfffc)); + + if ((a >> 16) == 0x1f00) + // parallel + return *(u32 *)(psxP + (a & 0xfffc)); + +// if ((a & ~0xe0600000) < 0x200000) + // RAM + return *(u32 *)(psxM + (a & 0x1ffffc)); +} + +void do_insn_trace(void) +{ + static psxRegisters oldregs; + static u32 old_io_addr = (u32)-1; + static u32 old_io_data = 0xbad0c0de; + static u32 event_cycles_o[PSXINT_COUNT]; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_o = (void *)&oldregs; + u32 io_data; + int i; + u8 byte; + + //last_io_addr = 0x5e2c8; + if (f == NULL) + f = fopen("tracelog", "wb"); + + // log reg changes + oldregs.code = psxRegs.code; // don't care + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_o[i]) { + fwrite(&i, 1, 1, f); + fwrite(&allregs_p[i], 1, 4, f); + allregs_o[i] = allregs_p[i]; + } + } + // log event changes + for (i = 0; i < PSXINT_COUNT; i++) { + if (event_cycles[i] != event_cycles_o[i]) { + byte = 0xfc; + fwrite(&byte, 1, 1, f); + fwrite(&i, 1, 1, f); + fwrite(&event_cycles[i], 1, 4, f); + event_cycles_o[i] = event_cycles[i]; + } + } + // log last io + if (old_io_addr != last_io_addr) { + byte = 0xfd; + fwrite(&byte, 1, 1, f); + fwrite(&last_io_addr, 1, 4, f); + old_io_addr = last_io_addr; + } + io_data = memcheck_read(last_io_addr); + if (old_io_data != io_data) { + byte = 0xfe; + fwrite(&byte, 1, 1, f); + fwrite(&io_data, 1, 4, f); + old_io_data = io_data; + } + byte = 0xff; + fwrite(&byte, 1, 1, f); + +#if 0 + if (psxRegs.cycle == 190230) { + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000); + printf("dumped\n"); + exit(1); + } +#endif +} + +static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "lo", "hi", + "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7", + "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15", + "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23", + "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31", + + "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7", + "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15", + "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23", + "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31", + + "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7", + "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15", + "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23", + "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31", + + "PC", "code", "cycle", "interrupt", +}; + +static struct { + int reg; + u32 val, val_expect; + u32 pc, cycle; +} miss_log[64]; +static int miss_log_i; +#define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0])) +#define miss_log_mask (miss_log_len-1) + +static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle) +{ + miss_log[miss_log_i].reg = reg; + miss_log[miss_log_i].val = val; + miss_log[miss_log_i].val_expect = val_expect; + miss_log[miss_log_i].pc = pc; + miss_log[miss_log_i].cycle = cycle; + miss_log_i = (miss_log_i + 1) & miss_log_mask; +} + +void breakme() {} + +void do_insn_cmp(void) +{ + static psxRegisters rregs; + static u32 mem_addr, mem_val; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_e = (void *)&rregs; + static u32 ppc, failcount; + int i, ret, bad = 0, which_event = -1; + u32 ev_cycles = 0; + u8 code; + + if (f == NULL) + f = fopen("tracelog", "rb"); + + while (1) { + if ((ret = fread(&code, 1, 1, f)) <= 0) + break; + if (ret <= 0) + break; + if (code == 0xff) + break; + switch (code) { + case 0xfc: + which_event = 0; + fread(&which_event, 1, 1, f); + fread(&ev_cycles, 1, 4, f); + continue; + case 0xfd: + fread(&mem_addr, 1, 4, f); + continue; + case 0xfe: + fread(&mem_val, 1, 4, f); + continue; + } + fread(&allregs_e[code], 1, 4, f); + } + + if (ret <= 0) { + printf("EOF?\n"); + goto end; + } + + psxRegs.code = rregs.code; // don't care + psxRegs.cycle = rregs.cycle; + psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count + + //if (psxRegs.cycle == 166172) breakme(); + + if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && + mem_val == memcheck_read(mem_addr) + ) { + failcount = 0; + goto ok; + } + + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_e[i]) { + miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); + bad++; + } + } + + if (mem_val != memcheck_read(mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); + goto end; + } + + if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { + printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); + goto end; + } + + if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + static int last_mcycle; + if (last_mcycle != psxRegs.cycle >> 20) { + printf("%u\n", psxRegs.cycle); + last_mcycle = psxRegs.cycle >> 20; + } + failcount++; + goto ok; + } + +end: + for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) + printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", + regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, + miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle); + printf("-- %d\n", bad); + for (i = 0; i < 8; i++) + printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], + i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); + printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); + exit(1); +ok: + psxRegs.cycle = rregs.cycle + 2; // sync timing + ppc = psxRegs.pc; +} + +#endif diff --git a/libpcsxcore/new_dynarec/backends/psx/emu_if.h b/libpcsxcore/new_dynarec/backends/psx/emu_if.h new file mode 100644 index 0000000..d8c7990 --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/emu_if.h @@ -0,0 +1,108 @@ +#include "../../new_dynarec.h" +#include "../../../r3000a.h" + +extern char invalid_code[0x100000]; + +/* weird stuff */ +#define EAX 0 +#define ECX 1 + +/* same as psxRegs */ +extern int reg[]; + +/* same as psxRegs.GPR.n.* */ +extern int hi, lo; + +/* same as psxRegs.CP0.n.* */ +extern int reg_cop0[]; +#define Status psxRegs.CP0.n.Status +#define Cause psxRegs.CP0.n.Cause +#define EPC psxRegs.CP0.n.EPC +#define BadVAddr psxRegs.CP0.n.BadVAddr +#define Context psxRegs.CP0.n.Context +#define EntryHi psxRegs.CP0.n.EntryHi +#define Count psxRegs.cycle // psxRegs.CP0.n.Count + +/* COP2/GTE */ +enum gte_opcodes { + GTE_RTPS = 0x01, + GTE_NCLIP = 0x06, + GTE_OP = 0x0c, + GTE_DPCS = 0x10, + GTE_INTPL = 0x11, + GTE_MVMVA = 0x12, + GTE_NCDS = 0x13, + GTE_CDP = 0x14, + GTE_NCDT = 0x16, + GTE_NCCS = 0x1b, + GTE_CC = 0x1c, + GTE_NCS = 0x1e, + GTE_NCT = 0x20, + GTE_SQR = 0x28, + GTE_DCPL = 0x29, + GTE_DPCT = 0x2a, + GTE_AVSZ3 = 0x2d, + GTE_AVSZ4 = 0x2e, + GTE_RTPT = 0x30, + GTE_GPF = 0x3d, + GTE_GPL = 0x3e, + GTE_NCCT = 0x3f, +}; + +extern int reg_cop2d[], reg_cop2c[]; +extern void *gte_handlers[64]; +extern void *gte_handlers_nf[64]; +extern const char *gte_regnames[64]; +extern const char gte_cycletab[64]; +extern const uint64_t gte_reg_reads[64]; +extern const uint64_t gte_reg_writes[64]; + +/* dummy */ +extern int FCR0, FCR31; + +/* mem */ +extern void *mem_rtab; +extern void *mem_wtab; + +void jump_handler_read8(u32 addr, u32 *table, u32 cycles); +void jump_handler_read16(u32 addr, u32 *table, u32 cycles); +void jump_handler_read32(u32 addr, u32 *table, u32 cycles); +void jump_handler_write8(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write16(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); +void jump_handle_swl(u32 addr, u32 data, u32 cycles); +void jump_handle_swr(u32 addr, u32 data, u32 cycles); +void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); + +extern unsigned int address; +extern void *psxH_ptr; +extern void *zeromem_ptr; +extern void *scratch_buf_ptr; + +// same as invalid_code, just a region for ram write checks (inclusive) +extern u32 inv_code_start, inv_code_end; + +/* cycles/irqs */ +extern u32 next_interupt; +extern int pending_exception; + +/* called by drc */ +void pcsx_mtc0(u32 reg, u32 val); +void pcsx_mtc0_ds(u32 reg, u32 val); + +/* misc */ +extern void (*psxHLEt[])(); + +extern void SysPrintf(const char *fmt, ...); + +#ifdef RAM_FIXED +#define rdram ((u_int)0x80000000) +#else +#define rdram ((u_int)psxM) +#endif diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c new file mode 100644 index 0000000..647981e --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c @@ -0,0 +1,494 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2010-2011 + * + * This work is licensed under the terms of GNU GPL version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "../../../psxhw.h" +#include "../../../cdrom.h" +#include "../../../mdec.h" +#include "../../../gpu.h" +#include "../../../psxmem_map.h" +#include "emu_if.h" +#include "pcsxmem.h" + +#ifdef __thumb__ +#error the dynarec is incompatible with Thumb functions, +#error please add -marm to compile flags +#endif + +//#define memprintf printf +#define memprintf(...) + +static u32 *mem_readtab; +static u32 *mem_writetab; +static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; +static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; +//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; + +// When this is called in a loop, and 'h' is a function pointer, clang will crash. +#ifdef __clang__ +static __attribute__ ((noinline)) void map_item(u32 *out, const void *h, u32 flag) +#else +static void map_item(u32 *out, const void *h, u32 flag) +#endif +{ + u32 hv = (u32)h; + if (hv & 1) { + SysPrintf("FATAL: %p has LSB set\n", h); + abort(); + } + *out = (hv >> 1) | (flag << 31); +} + +// size must be power of 2, at least 4k +#define map_l1_mem(tab, i, addr, size, base) \ + map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) + +#define IOMEM32(a) (((a) & 0xfff) / 4) +#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) +#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) + +u8 zero_mem[0x1000]; + +u32 read_mem_dummy() +{ + return 0; +} + +static void write_mem_dummy(u32 data) +{ + memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); +} + +/* IO handlers */ +static u32 io_read_sio16() +{ + return sioRead8() | (sioRead8() << 8); +} + +static u32 io_read_sio32() +{ + return sioRead8() | (sioRead8() << 8) | (sioRead8() << 16) | (sioRead8() << 24); +} + +static void io_write_sio16(u32 value) +{ + sioWrite8((unsigned char)value); + sioWrite8((unsigned char)(value>>8)); +} + +static void io_write_sio32(u32 value) +{ + sioWrite8((unsigned char)value); + sioWrite8((unsigned char)(value >> 8)); + sioWrite8((unsigned char)(value >> 16)); + sioWrite8((unsigned char)(value >> 24)); +} + +#ifndef DRC_DBG + +static void map_rcnt_rcount0(u32 mode) +{ + if (mode & 0x100) { // pixel clock + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m0, 1); + } +} + +static void map_rcnt_rcount1(u32 mode) +{ + if (mode & 0x100) { // hcnt + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m0, 1); + } +} + +static void map_rcnt_rcount2(u32 mode) +{ + if (mode & 0x01) { // gate + map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); + map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); + } + else if (mode & 0x200) { // clk/8 + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m0, 1); + } +} + +#else +#define map_rcnt_rcount0(mode) +#define map_rcnt_rcount1(mode) +#define map_rcnt_rcount2(mode) +#endif + +#define make_rcnt_funcs(i) \ +static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ +static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ +static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ +static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ +static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); map_rcnt_rcount##i(val); } \ +static void io_rcnt_write_target##i(u32 val) { psxRcntWtarget(i, val & 0xffff); } + +make_rcnt_funcs(0) +make_rcnt_funcs(1) +make_rcnt_funcs(2) + +static void io_write_ireg16(u32 value) +{ + //if (Config.Sio) psxHu16ref(0x1070) |= 0x80; + if (Config.SpuIrq) psxHu16ref(0x1070) |= 0x200; + psxHu16ref(0x1070) &= value; +} + +static void io_write_imask16(u32 value) +{ + psxHu16ref(0x1074) = value; + if (psxHu16ref(0x1070) & value) + new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); +} + +static void io_write_ireg32(u32 value) +{ + //if (Config.Sio) psxHu32ref(0x1070) |= 0x80; + if (Config.SpuIrq) psxHu32ref(0x1070) |= 0x200; + psxHu32ref(0x1070) &= value; +} + +static void io_write_imask32(u32 value) +{ + psxHu32ref(0x1074) = value; + if (psxHu32ref(0x1070) & value) + new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); +} + +static void io_write_dma_icr32(u32 value) +{ + u32 tmp = value & 0x00ff803f; + tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; + if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) + || tmp & HW_DMA_ICR_BUS_ERROR) { + if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) + psxHu32ref(0x1070) |= SWAP32(8); + tmp |= HW_DMA_ICR_IRQ_SENT; + } + HW_DMA_ICR = SWAPu32(tmp); +} + +#define make_dma_func(n) \ +static void io_write_chcr##n(u32 value) \ +{ \ + HW_DMA##n##_CHCR = value; \ + if (value & 0x01000000 && HW_DMA_PCR & (8 << (n * 4))) { \ + psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, value); \ + } \ +} + +make_dma_func(0) +make_dma_func(1) +make_dma_func(2) +make_dma_func(3) +make_dma_func(4) +make_dma_func(6) + +static void io_spu_write16(u32 value) +{ + // meh + SPU_writeRegister(address, value, psxRegs.cycle); +} + +static void io_spu_write32(u32 value) +{ + SPUwriteRegister wfunc = SPU_writeRegister; + u32 a = address; + + wfunc(a, value & 0xffff, psxRegs.cycle); + wfunc(a + 2, value >> 16, psxRegs.cycle); +} + +static u32 io_gpu_read_status(void) +{ + u32 v; + + // meh2, syncing for img bit, might want to avoid it.. + gpuSyncPluginSR(); + v = HW_GPU_STATUS; + + // XXX: because of large timeslices can't use hSyncCount, using rough + // approximization instead. Perhaps better use hcounter code here or something. + if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) + v |= PSXGPU_LCF & (psxRegs.cycle << 20); + return v; +} + +static void io_gpu_write_status(u32 value) +{ + GPU_writeStatus(value); + gpuSyncPluginSR(); +} + +static void map_ram_write(void) +{ + int i; + + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); + } +} + +static void unmap_ram_write(void) +{ + int i; + + for (i = 0; i < (0x800000 >> 12); i++) { + map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); + map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); + map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); + } +} + +static void write_biu(u32 value) +{ + memprintf("write_biu %08x, %08x @%08x %u\n", address, value, psxRegs.pc, psxRegs.cycle); + + if (address != 0xfffe0130) + return; + + switch (value) { + case 0x800: case 0x804: + unmap_ram_write(); + break; + case 0: case 0x1e988: + map_ram_write(); + break; + default: + printf("write_biu: unexpected val: %08x\n", value); + break; + } +} + +void new_dyna_pcsx_mem_load_state(void) +{ + map_rcnt_rcount0(rcnts[0].mode); + map_rcnt_rcount1(rcnts[1].mode); + map_rcnt_rcount2(rcnts[2].mode); +} + +int pcsxmem_is_handler_dynamic(unsigned int addr) +{ + if ((addr & 0xfffff000) != 0x1f801000) + return 0; + + addr &= 0xffff; + return addr == 0x1100 || addr == 0x1110 || addr == 0x1120; +} + +void new_dyna_pcsx_mem_init(void) +{ + int i; + + // have to map these further to keep tcache close to .text + mem_readtab = psxMap(0x08000000, 0x200000 * 4, 0, MAP_TAG_LUTS); + if (mem_readtab == NULL) { + SysPrintf("failed to map mem tables\n"); + exit(1); + } + mem_writetab = mem_readtab + 0x100000; + + // 1st level lookup: + // 0: direct mem + // 1: use 2nd lookup + // 2nd level lookup: + // 0: direct mem variable + // 1: memhandler + + // default/unmapped memhandlers + for (i = 0; i < 0x100000; i++) { + //map_item(&mem_readtab[i], mem_unmrtab, 1); + map_l1_mem(mem_readtab, i, 0, 0x1000, zero_mem); + map_item(&mem_writetab[i], mem_unmwtab, 1); + } + + // RAM and it's mirrors + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); + } + map_ram_write(); + + // BIOS and it's mirrors + for (i = 0; i < (0x80000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x1fc00000, 0x80000, psxR); + map_l1_mem(mem_readtab, i, 0xbfc00000, 0x80000, psxR); + } + + // scratchpad + map_l1_mem(mem_readtab, 0, 0x1f800000, 0x1000, psxH); + map_l1_mem(mem_readtab, 0, 0x9f800000, 0x1000, psxH); + map_l1_mem(mem_writetab, 0, 0x1f800000, 0x1000, psxH); + map_l1_mem(mem_writetab, 0, 0x9f800000, 0x1000, psxH); + + // I/O + map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); + map_item(&mem_readtab[0x9f801000 >> 12], mem_iortab, 1); + map_item(&mem_readtab[0xbf801000 >> 12], mem_iortab, 1); + map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0x9f801000 >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0xbf801000 >> 12], mem_iowtab, 1); + + // L2 + // unmapped tables + for (i = 0; i < (1+2+4) * 0x1000 / 4; i++) + map_item(&mem_unmwtab[i], write_mem_dummy, 1); + + // fill IO tables + for (i = 0; i < 0x1000/4; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + + map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); + map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); +// map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); + map_item(&mem_iortab[IOMEM32(0x1814)], io_gpu_read_status, 1); + map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); + map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); + + map_item(&mem_iortab[IOMEM16(0x1040)], io_read_sio16, 1); + map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); + map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); + map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); + map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); + + map_item(&mem_iortab[IOMEM8(0x1040)], sioRead8, 1); + map_item(&mem_iortab[IOMEM8(0x1800)], cdrRead0, 1); + map_item(&mem_iortab[IOMEM8(0x1801)], cdrRead1, 1); + map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); + map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); + + // write(u32 data) + map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); + map_item(&mem_iowtab[IOMEM32(0x1070)], io_write_ireg32, 1); + map_item(&mem_iowtab[IOMEM32(0x1074)], io_write_imask32, 1); + map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); + map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); + map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); + map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); + map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); + map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); + map_item(&mem_iowtab[IOMEM32(0x10f4)], io_write_dma_icr32, 1); + map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); + map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); + map_item(&mem_iowtab[IOMEM32(0x1108)], io_rcnt_write_target0, 1); + map_item(&mem_iowtab[IOMEM32(0x1110)], io_rcnt_write_count1, 1); + map_item(&mem_iowtab[IOMEM32(0x1114)], io_rcnt_write_mode1, 1); + map_item(&mem_iowtab[IOMEM32(0x1118)], io_rcnt_write_target1, 1); + map_item(&mem_iowtab[IOMEM32(0x1120)], io_rcnt_write_count2, 1); + map_item(&mem_iowtab[IOMEM32(0x1124)], io_rcnt_write_mode2, 1); + map_item(&mem_iowtab[IOMEM32(0x1128)], io_rcnt_write_target2, 1); +// map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); + map_item(&mem_iowtab[IOMEM32(0x1814)], io_gpu_write_status, 1); + map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); + map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); + + map_item(&mem_iowtab[IOMEM16(0x1040)], io_write_sio16, 1); + map_item(&mem_iowtab[IOMEM16(0x1044)], sioWriteStat16, 1); + map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); + map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); + map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); + map_item(&mem_iowtab[IOMEM16(0x1070)], io_write_ireg16, 1); + map_item(&mem_iowtab[IOMEM16(0x1074)], io_write_imask16, 1); + map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); + map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); + map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); + map_item(&mem_iowtab[IOMEM16(0x1110)], io_rcnt_write_count1, 1); + map_item(&mem_iowtab[IOMEM16(0x1114)], io_rcnt_write_mode1, 1); + map_item(&mem_iowtab[IOMEM16(0x1118)], io_rcnt_write_target1, 1); + map_item(&mem_iowtab[IOMEM16(0x1120)], io_rcnt_write_count2, 1); + map_item(&mem_iowtab[IOMEM16(0x1124)], io_rcnt_write_mode2, 1); + map_item(&mem_iowtab[IOMEM16(0x1128)], io_rcnt_write_target2, 1); + + map_item(&mem_iowtab[IOMEM8(0x1040)], sioWrite8, 1); + map_item(&mem_iowtab[IOMEM8(0x1800)], cdrWrite0, 1); + map_item(&mem_iowtab[IOMEM8(0x1801)], cdrWrite1, 1); + map_item(&mem_iowtab[IOMEM8(0x1802)], cdrWrite2, 1); + map_item(&mem_iowtab[IOMEM8(0x1803)], cdrWrite3, 1); + + for (i = 0x1c00; i < 0x1e00; i += 2) { + map_item(&mem_iowtab[IOMEM16(i)], io_spu_write16, 1); + map_item(&mem_iowtab[IOMEM32(i)], io_spu_write32, 1); + } + + // misc + map_item(&mem_writetab[0xfffe0130 >> 12], mem_ffwtab, 1); + for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) + map_item(&mem_ffwtab[i], write_biu, 1); + + mem_rtab = mem_readtab; + mem_wtab = mem_writetab; + + new_dyna_pcsx_mem_load_state(); +} + +void new_dyna_pcsx_mem_reset(void) +{ + int i; + + // plugins might change so update the pointers + map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); + + for (i = 0x1c00; i < 0x1e00; i += 2) + map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); + + map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); +} + +void new_dyna_pcsx_mem_shutdown(void) +{ + psxUnmap(mem_readtab, 0x200000 * 4, MAP_TAG_LUTS); + mem_writetab = mem_readtab = NULL; +} diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h new file mode 100644 index 0000000..72892a8 --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h @@ -0,0 +1,9 @@ + +extern u8 zero_mem[0x1000]; + +void new_dyna_pcsx_mem_init(void); +void new_dyna_pcsx_mem_reset(void); +void new_dyna_pcsx_mem_load_state(void); +void new_dyna_pcsx_mem_shutdown(void); + +int pcsxmem_is_handler_dynamic(unsigned int addr); diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c b/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c new file mode 100644 index 0000000..305931a --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c @@ -0,0 +1,66 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of GNU GPL version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef DRC_DBG + +static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) +{ + if ((addr & 0xfffff000) == 0x1f801000) { + u_int t; + switch (addr & 0xffff) { + case 0x1120: // rcnt2 count + if (rt < 0) goto dont_care; + if (cc < 0) return 0; + emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); + emit_readword((int)&rcnts[2].cycleStart, rt); + emit_testimm(HOST_TEMPREG, 0x200); + emit_readword((int)&last_count, HOST_TEMPREG); + emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); + emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); + if (cc_adj) + emit_addimm(HOST_TEMPREG, cc_adj, rt); + emit_shrne_imm(rt, 3, rt); + mov_loadtype_adj(type!=LOADW_STUB?type:LOADH_STUB, rt, rt); + goto hit; + case 0x1104: + case 0x1114: + case 0x1124: // rcnt mode + if (rt < 0) return 0; + t = (addr >> 4) & 3; + emit_readword((int)&rcnts[t].mode, rt); + emit_andimm(rt, ~0x1800, HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); + mov_loadtype_adj(type, rt, rt); + goto hit; + } + } + else { + if (rt < 0) + goto dont_care; + } + + return 0; + +hit: + assem_debug("pcsx_direct_read %08x end\n", addr); + return 1; + +dont_care: + assem_debug("pcsx_direct_read %08x dummy\n", addr); + return 1; +} + +#else + +static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) +{ + return 0; +} + +#endif + +// vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c deleted file mode 100644 index 8aebd64..0000000 --- a/libpcsxcore/new_dynarec/emu_if.c +++ /dev/null @@ -1,695 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2010-2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include - -#include "emu_if.h" -#include "pcsxmem.h" -#include "../psxhle.h" -#include "../r3000a.h" -#include "../cdrom.h" -#include "../psxdma.h" -#include "../mdec.h" -#include "../gte_arm.h" -#include "../gte_neon.h" -#define FLAGLESS -#include "../gte.h" - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -//#define evprintf printf -#define evprintf(...) - -char invalid_code[0x100000]; -static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); -u32 event_cycles[PSXINT_COUNT]; - -static void schedule_timeslice(void) -{ - u32 i, c = psxRegs.cycle; - u32 irqs = psxRegs.interrupt; - s32 min, dif; - - min = PSXCLK; - for (i = 0; irqs != 0; i++, irqs >>= 1) { - if (!(irqs & 1)) - continue; - dif = event_cycles[i] - c; - //evprintf(" ev %d\n", dif); - if (0 < dif && dif < min) - min = dif; - } - next_interupt = c + min; -} - -typedef void (irq_func)(); - -static irq_func * const irq_funcs[] = { - [PSXINT_SIO] = sioInterrupt, - [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrReadInterrupt, - [PSXINT_GPUDMA] = gpuInterrupt, - [PSXINT_MDECOUTDMA] = mdec1Interrupt, - [PSXINT_SPUDMA] = spuInterrupt, - [PSXINT_MDECINDMA] = mdec0Interrupt, - [PSXINT_GPUOTCDMA] = gpuotcInterrupt, - [PSXINT_CDRDMA] = cdrDmaInterrupt, - [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY] = cdrPlayInterrupt, - [PSXINT_SPU_UPDATE] = spuUpdate, - [PSXINT_RCNT] = psxRcntUpdate, -}; - -/* local dupe of psxBranchTest, using event_cycles */ -static void irq_test(void) -{ - u32 irqs = psxRegs.interrupt; - u32 cycle = psxRegs.cycle; - u32 irq, irq_bits; - - // irq_funcs() may queue more irqs - psxRegs.interrupt = 0; - - for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { - if (!(irq_bits & 1)) - continue; - if ((s32)(cycle - event_cycles[irq]) >= 0) { - irqs &= ~(1 << irq); - irq_funcs[irq](); - } - } - psxRegs.interrupt |= irqs; - - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { - psxException(0x400, 0); - pending_exception = 1; - } -} - -void gen_interupt() -{ - evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); - - irq_test(); - //psxBranchTest(); - //pending_exception = 1; - - schedule_timeslice(); - - evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); -} - -// from interpreter -extern void MTC0(int reg, u32 val); - -void pcsx_mtc0(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); - gen_interupt(); - if (Cause & Status & 0x0300) // possible sw irq - pending_exception = 1; -} - -void pcsx_mtc0_ds(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); -} - -void new_dyna_before_save(void) -{ - psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat - - // psxRegs.intCycle is always maintained, no need to convert -} - -void new_dyna_after_save(void) -{ - psxRegs.interrupt |= 1 << PSXINT_RCNT; -} - -static void new_dyna_restore(void) -{ - int i; - for (i = 0; i < PSXINT_COUNT; i++) - event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; - - event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; - psxRegs.interrupt |= 1 << PSXINT_RCNT; - psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; - - new_dyna_pcsx_mem_load_state(); -} - -void new_dyna_freeze(void *f, int mode) -{ - const char header_save[8] = "ariblks"; - uint32_t addrs[1024 * 4]; - int32_t size = 0; - int bytes; - char header[8]; - - if (mode != 0) { // save - size = new_dynarec_save_blocks(addrs, sizeof(addrs)); - if (size == 0) - return; - - SaveFuncs.write(f, header_save, sizeof(header_save)); - SaveFuncs.write(f, &size, sizeof(size)); - SaveFuncs.write(f, addrs, size); - } - else { - new_dyna_restore(); - - bytes = SaveFuncs.read(f, header, sizeof(header)); - if (bytes != sizeof(header) || strcmp(header, header_save)) { - if (bytes > 0) - SaveFuncs.seek(f, -bytes, SEEK_CUR); - return; - } - SaveFuncs.read(f, &size, sizeof(size)); - if (size <= 0) - return; - if (size > sizeof(addrs)) { - bytes = size - sizeof(addrs); - SaveFuncs.seek(f, bytes, SEEK_CUR); - size = sizeof(addrs); - } - bytes = SaveFuncs.read(f, addrs, size); - if (bytes != size) - return; - - new_dynarec_load_blocks(addrs, size); - } - - //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); -} - -/* GTE stuff */ -void *gte_handlers[64]; - -void *gte_handlers_nf[64] = { - NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 - NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 - gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 - NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 - gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 - gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 - gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 - NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 -}; - -const char *gte_regnames[64] = { - NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 - NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 - "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 - NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 - "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 - "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 - "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 - NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 -}; - -/* from gte.txt.. not sure if this is any good. */ -const char gte_cycletab[64] = { - /* 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, - 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, - 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, -}; - -#define GCBIT(x) \ - (1ll << (32+x)) -#define GDBIT(x) \ - (1ll << (x)) -#define GCBITS3(b0,b1,b2) \ - (GCBIT(b0) | GCBIT(b1) | GCBIT(b2)) -#define GDBITS2(b0,b1) \ - (GDBIT(b0) | GDBIT(b1)) -#define GDBITS3(b0,b1,b2) \ - (GDBITS2(b0,b1) | GDBIT(b2)) -#define GDBITS4(b0,b1,b2,b3) \ - (GDBITS3(b0,b1,b2) | GDBIT(b3)) -#define GDBITS5(b0,b1,b2,b3,b4) \ - (GDBITS4(b0,b1,b2,b3) | GDBIT(b4)) -#define GDBITS6(b0,b1,b2,b3,b4,b5) \ - (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5)) -#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \ - (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6)) -#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \ - (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7)) -#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \ - (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8)) -#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \ - (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9)) - -const uint64_t gte_reg_reads[64] = { - [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19), - [GTE_NCLIP] = GDBITS3(12,13,14), - [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), - [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), - [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), - [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further? - [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), - [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22), - [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), - [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), - [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), - [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), - [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), - [GTE_SQR] = GDBITS3(9,10,11), - [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), - [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22), - [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19), - [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19), - [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19), - [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22), - [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27), - [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), -}; - -// note: this excludes gteFLAG that is always written to -const uint64_t gte_reg_writes[64] = { - [GTE_RTPS] = 0x0f0f7f00ll, - [GTE_NCLIP] = GDBIT(24), - [GTE_OP] = GDBITS6(9,10,11,25,26,27), - [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27), - [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_SQR] = GDBITS6(9,10,11,25,26,27), - [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_AVSZ3] = GDBITS2(7,24), - [GTE_AVSZ4] = GDBITS2(7,24), - [GTE_RTPT] = 0x0f0f7f00ll, - [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), -}; - -static int ari64_init() -{ - extern void (*psxCP2[64])(); - extern void psxNULL(); - extern unsigned char *out; - size_t i; - - new_dynarec_init(); - new_dyna_pcsx_mem_init(); - - for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != psxNULL) - gte_handlers[i] = psxCP2[i]; - -#if defined(__arm__) && !defined(DRC_DBG) - gte_handlers[0x06] = gteNCLIP_arm; -#ifdef HAVE_ARMV5 - gte_handlers_nf[0x01] = gteRTPS_nf_arm; - gte_handlers_nf[0x30] = gteRTPT_nf_arm; -#endif -#ifdef __ARM_NEON__ - // compiler's _nf version is still a lot slower than neon - // _nf_arm RTPS is roughly the same, RTPT slower - gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; -#endif -#endif -#ifdef DRC_DBG - memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); -#endif - psxH_ptr = psxH; - zeromem_ptr = zero_mem; - scratch_buf_ptr = scratch_buf; - - SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); - SysPrintf("%08x/%08x/%08x/%08x/%08x\n", - psxM, psxH, psxR, mem_rtab, out); - - return 0; -} - -static void ari64_reset() -{ - printf("ari64_reset\n"); - new_dyna_pcsx_mem_reset(); - invalidate_all_pages(); - new_dyna_restore(); - pending_exception = 1; -} - -// execute until predefined leave points -// (HLE softcall exit and BIOS fastboot end) -static void ari64_execute_until() -{ - schedule_timeslice(); - - evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); - - new_dyna_start(); - - evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); -} - -static void ari64_execute() -{ - while (!stop) { - ari64_execute_until(); - evprintf("drc left @%08x\n", psxRegs.pc); - } -} - -static void ari64_clear(u32 addr, u32 size) -{ - u32 start, end, main_ram; - - size *= 4; /* PCSX uses DMA units (words) */ - - evprintf("ari64_clear %08x %04x\n", addr, size); - - /* check for RAM mirrors */ - main_ram = (addr & 0xffe00000) == 0x80000000; - - start = addr >> 12; - end = (addr + size) >> 12; - - for (; start <= end; start++) - if (!main_ram || !invalid_code[start]) - invalidate_block(start); -} - -static void ari64_shutdown() -{ - new_dynarec_cleanup(); - new_dyna_pcsx_mem_shutdown(); -} - -extern void intExecute(); -extern void intExecuteT(); -extern void intExecuteBlock(); -extern void intExecuteBlockT(); -#ifndef DRC_DBG -#define intExecuteT intExecute -#define intExecuteBlockT intExecuteBlock -#endif - -R3000Acpu psxRec = { - ari64_init, - ari64_reset, -#ifndef DRC_DISABLE - ari64_execute, - ari64_execute_until, -#else - intExecuteT, - intExecuteBlockT, -#endif - ari64_clear, - ari64_shutdown -}; - -// TODO: rm -#ifndef DRC_DBG -void do_insn_trace() {} -void do_insn_cmp() {} -#endif - -#ifdef DRC_DISABLE -unsigned int address; -int pending_exception, stop; -u32 next_interupt; -int new_dynarec_did_compile; -int cycle_multiplier; -int new_dynarec_hacks; -void *psxH_ptr; -void *zeromem_ptr; -u8 zero_mem[0x1000]; -unsigned char *out; -void *mem_rtab; -void *scratch_buf_ptr; -void new_dynarec_init() { (void)ari64_execute; } -void new_dyna_start() {} -void new_dynarec_cleanup() {} -void new_dynarec_clear_full() {} -void invalidate_all_pages() {} -void invalidate_block(unsigned int block) {} -void new_dyna_pcsx_mem_init(void) {} -void new_dyna_pcsx_mem_reset(void) {} -void new_dyna_pcsx_mem_load_state(void) {} -void new_dyna_pcsx_mem_shutdown(void) {} -int new_dynarec_save_blocks(void *save, int size) { return 0; } -void new_dynarec_load_blocks(const void *save, int size) {} -#endif - -#ifdef DRC_DBG - -#include -static FILE *f; -extern u32 last_io_addr; - -static void dump_mem(const char *fname, void *mem, size_t size) -{ - FILE *f1 = fopen(fname, "wb"); - if (f1 == NULL) - f1 = fopen(strrchr(fname, '/') + 1, "wb"); - fwrite(mem, 1, size, f1); - fclose(f1); -} - -static u32 memcheck_read(u32 a) -{ - if ((a >> 16) == 0x1f80) - // scratchpad/IO - return *(u32 *)(psxH + (a & 0xfffc)); - - if ((a >> 16) == 0x1f00) - // parallel - return *(u32 *)(psxP + (a & 0xfffc)); - -// if ((a & ~0xe0600000) < 0x200000) - // RAM - return *(u32 *)(psxM + (a & 0x1ffffc)); -} - -void do_insn_trace(void) -{ - static psxRegisters oldregs; - static u32 old_io_addr = (u32)-1; - static u32 old_io_data = 0xbad0c0de; - static u32 event_cycles_o[PSXINT_COUNT]; - u32 *allregs_p = (void *)&psxRegs; - u32 *allregs_o = (void *)&oldregs; - u32 io_data; - int i; - u8 byte; - - //last_io_addr = 0x5e2c8; - if (f == NULL) - f = fopen("tracelog", "wb"); - - // log reg changes - oldregs.code = psxRegs.code; // don't care - for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { - if (allregs_p[i] != allregs_o[i]) { - fwrite(&i, 1, 1, f); - fwrite(&allregs_p[i], 1, 4, f); - allregs_o[i] = allregs_p[i]; - } - } - // log event changes - for (i = 0; i < PSXINT_COUNT; i++) { - if (event_cycles[i] != event_cycles_o[i]) { - byte = 0xfc; - fwrite(&byte, 1, 1, f); - fwrite(&i, 1, 1, f); - fwrite(&event_cycles[i], 1, 4, f); - event_cycles_o[i] = event_cycles[i]; - } - } - // log last io - if (old_io_addr != last_io_addr) { - byte = 0xfd; - fwrite(&byte, 1, 1, f); - fwrite(&last_io_addr, 1, 4, f); - old_io_addr = last_io_addr; - } - io_data = memcheck_read(last_io_addr); - if (old_io_data != io_data) { - byte = 0xfe; - fwrite(&byte, 1, 1, f); - fwrite(&io_data, 1, 4, f); - old_io_data = io_data; - } - byte = 0xff; - fwrite(&byte, 1, 1, f); - -#if 0 - if (psxRegs.cycle == 190230) { - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000); - printf("dumped\n"); - exit(1); - } -#endif -} - -static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", - "lo", "hi", - "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7", - "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15", - "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23", - "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31", - - "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7", - "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15", - "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23", - "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31", - - "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7", - "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15", - "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23", - "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31", - - "PC", "code", "cycle", "interrupt", -}; - -static struct { - int reg; - u32 val, val_expect; - u32 pc, cycle; -} miss_log[64]; -static int miss_log_i; -#define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0])) -#define miss_log_mask (miss_log_len-1) - -static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle) -{ - miss_log[miss_log_i].reg = reg; - miss_log[miss_log_i].val = val; - miss_log[miss_log_i].val_expect = val_expect; - miss_log[miss_log_i].pc = pc; - miss_log[miss_log_i].cycle = cycle; - miss_log_i = (miss_log_i + 1) & miss_log_mask; -} - -void breakme() {} - -void do_insn_cmp(void) -{ - static psxRegisters rregs; - static u32 mem_addr, mem_val; - u32 *allregs_p = (void *)&psxRegs; - u32 *allregs_e = (void *)&rregs; - static u32 ppc, failcount; - int i, ret, bad = 0, which_event = -1; - u32 ev_cycles = 0; - u8 code; - - if (f == NULL) - f = fopen("tracelog", "rb"); - - while (1) { - if ((ret = fread(&code, 1, 1, f)) <= 0) - break; - if (ret <= 0) - break; - if (code == 0xff) - break; - switch (code) { - case 0xfc: - which_event = 0; - fread(&which_event, 1, 1, f); - fread(&ev_cycles, 1, 4, f); - continue; - case 0xfd: - fread(&mem_addr, 1, 4, f); - continue; - case 0xfe: - fread(&mem_val, 1, 4, f); - continue; - } - fread(&allregs_e[code], 1, 4, f); - } - - if (ret <= 0) { - printf("EOF?\n"); - goto end; - } - - psxRegs.code = rregs.code; // don't care - psxRegs.cycle = rregs.cycle; - psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count - - //if (psxRegs.cycle == 166172) breakme(); - - if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && - mem_val == memcheck_read(mem_addr) - ) { - failcount = 0; - goto ok; - } - - for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { - if (allregs_p[i] != allregs_e[i]) { - miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); - bad++; - } - } - - if (mem_val != memcheck_read(mem_addr)) { - printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); - goto end; - } - - if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { - printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); - goto end; - } - - if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { - static int last_mcycle; - if (last_mcycle != psxRegs.cycle >> 20) { - printf("%u\n", psxRegs.cycle); - last_mcycle = psxRegs.cycle >> 20; - } - failcount++; - goto ok; - } - -end: - for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) - printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", - regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, - miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle); - printf("-- %d\n", bad); - for (i = 0; i < 8; i++) - printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], - i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); - printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); - exit(1); -ok: - psxRegs.cycle = rregs.cycle + 2; // sync timing - ppc = psxRegs.pc; -} - -#endif diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h deleted file mode 100644 index 73f842b..0000000 --- a/libpcsxcore/new_dynarec/emu_if.h +++ /dev/null @@ -1,108 +0,0 @@ -#include "new_dynarec.h" -#include "../r3000a.h" - -extern char invalid_code[0x100000]; - -/* weird stuff */ -#define EAX 0 -#define ECX 1 - -/* same as psxRegs */ -extern int reg[]; - -/* same as psxRegs.GPR.n.* */ -extern int hi, lo; - -/* same as psxRegs.CP0.n.* */ -extern int reg_cop0[]; -#define Status psxRegs.CP0.n.Status -#define Cause psxRegs.CP0.n.Cause -#define EPC psxRegs.CP0.n.EPC -#define BadVAddr psxRegs.CP0.n.BadVAddr -#define Context psxRegs.CP0.n.Context -#define EntryHi psxRegs.CP0.n.EntryHi -#define Count psxRegs.cycle // psxRegs.CP0.n.Count - -/* COP2/GTE */ -enum gte_opcodes { - GTE_RTPS = 0x01, - GTE_NCLIP = 0x06, - GTE_OP = 0x0c, - GTE_DPCS = 0x10, - GTE_INTPL = 0x11, - GTE_MVMVA = 0x12, - GTE_NCDS = 0x13, - GTE_CDP = 0x14, - GTE_NCDT = 0x16, - GTE_NCCS = 0x1b, - GTE_CC = 0x1c, - GTE_NCS = 0x1e, - GTE_NCT = 0x20, - GTE_SQR = 0x28, - GTE_DCPL = 0x29, - GTE_DPCT = 0x2a, - GTE_AVSZ3 = 0x2d, - GTE_AVSZ4 = 0x2e, - GTE_RTPT = 0x30, - GTE_GPF = 0x3d, - GTE_GPL = 0x3e, - GTE_NCCT = 0x3f, -}; - -extern int reg_cop2d[], reg_cop2c[]; -extern void *gte_handlers[64]; -extern void *gte_handlers_nf[64]; -extern const char *gte_regnames[64]; -extern const char gte_cycletab[64]; -extern const uint64_t gte_reg_reads[64]; -extern const uint64_t gte_reg_writes[64]; - -/* dummy */ -extern int FCR0, FCR31; - -/* mem */ -extern void *mem_rtab; -extern void *mem_wtab; - -void jump_handler_read8(u32 addr, u32 *table, u32 cycles); -void jump_handler_read16(u32 addr, u32 *table, u32 cycles); -void jump_handler_read32(u32 addr, u32 *table, u32 cycles); -void jump_handler_write8(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write16(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); -void jump_handle_swl(u32 addr, u32 data, u32 cycles); -void jump_handle_swr(u32 addr, u32 data, u32 cycles); -void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); - -extern unsigned int address; -extern void *psxH_ptr; -extern void *zeromem_ptr; -extern void *scratch_buf_ptr; - -// same as invalid_code, just a region for ram write checks (inclusive) -extern u32 inv_code_start, inv_code_end; - -/* cycles/irqs */ -extern u32 next_interupt; -extern int pending_exception; - -/* called by drc */ -void pcsx_mtc0(u32 reg, u32 val); -void pcsx_mtc0_ds(u32 reg, u32 val); - -/* misc */ -extern void (*psxHLEt[])(); - -extern void SysPrintf(const char *fmt, ...); - -#ifdef RAM_FIXED -#define rdram ((u_int)0x80000000) -#else -#define rdram ((u_int)psxM) -#endif diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S deleted file mode 100644 index 147b0df..0000000 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ /dev/null @@ -1,866 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * linkage_arm.s for PCSX * - * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2013 Gražvydas "notaz" Ignotas * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include "arm_features.h" -#include "new_dynarec_config.h" -#include "linkage_offsets.h" - - -#ifdef __MACH__ -#define dynarec_local ESYM(dynarec_local) -#define add_link ESYM(add_link) -#define new_recompile_block ESYM(new_recompile_block) -#define get_addr ESYM(get_addr) -#define get_addr_ht ESYM(get_addr_ht) -#define clean_blocks ESYM(clean_blocks) -#define gen_interupt ESYM(gen_interupt) -#define psxException ESYM(psxException) -#define execI ESYM(execI) -#define invalidate_addr ESYM(invalidate_addr) -#endif - - .bss - .align 4 - .global dynarec_local - .type dynarec_local, %object - .size dynarec_local, LO_dynarec_local_size -dynarec_local: - .space LO_dynarec_local_size - -#define DRC_VAR_(name, vname, size_) \ - vname = dynarec_local + LO_##name; \ - .global vname; \ - .type vname, %object; \ - .size vname, size_ - -#define DRC_VAR(name, size_) \ - DRC_VAR_(name, ESYM(name), size_) - -DRC_VAR(next_interupt, 4) -DRC_VAR(cycle_count, 4) -DRC_VAR(last_count, 4) -DRC_VAR(pending_exception, 4) -DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) -DRC_VAR(address, 4) -DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) - -/* psxRegs */ -DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) -DRC_VAR(reg_cop0, 128) -DRC_VAR(reg_cop2d, 128) -DRC_VAR(reg_cop2c, 128) -DRC_VAR(pcaddr, 4) -@DRC_VAR(code, 4) -@DRC_VAR(cycle, 4) -@DRC_VAR(interrupt, 4) -@DRC_VAR(intCycle, 256) - -DRC_VAR(rcnts, 7*4*4) -DRC_VAR(mem_rtab, 4) -DRC_VAR(mem_wtab, 4) -DRC_VAR(psxH_ptr, 4) -DRC_VAR(zeromem_ptr, 4) -DRC_VAR(inv_code_start, 4) -DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) -DRC_VAR(scratch_buf_ptr, 4) -@DRC_VAR(align0, 12) /* unused/alignment */ -DRC_VAR(mini_ht, 256) -DRC_VAR(restore_candidate, 512) - -/* unused */ -DRC_VAR(FCR0, 4) -DRC_VAR(FCR31, 4) - -#ifdef __MACH__ - .data - .align 2 -ptr_jump_in: - .word ESYM(jump_in) -ptr_jump_dirty: - .word ESYM(jump_dirty) -ptr_hash_table: - .word ESYM(hash_table) -#endif - - - .syntax unified - .text - .align 2 - -#ifndef HAVE_ARMV5 -.macro blx rd - mov lr, pc - bx \rd -.endm -#endif - -.macro load_varadr reg var -#if defined(HAVE_ARMV7) && !defined(__PIC__) - movw \reg, #:lower16:\var - movt \reg, #:upper16:\var -#elif defined(HAVE_ARMV7) && defined(__MACH__) - movw \reg, #:lower16:(\var-(1678f+8)) - movt \reg, #:upper16:(\var-(1678f+8)) -1678: - add \reg, pc -#else - ldr \reg, =\var -#endif -.endm - -.macro load_varadr_ext reg var -#if defined(HAVE_ARMV7) && defined(__MACH__) && defined(__PIC__) - movw \reg, #:lower16:(ptr_\var-(1678f+8)) - movt \reg, #:upper16:(ptr_\var-(1678f+8)) -1678: - ldr \reg, [pc, \reg] -#else - load_varadr \reg \var -#endif -.endm - -.macro mov_16 reg imm -#ifdef HAVE_ARMV7 - movw \reg, #\imm -#else - mov \reg, #(\imm & 0x00ff) - orr \reg, #(\imm & 0xff00) -#endif -.endm - -.macro mov_24 reg imm -#ifdef HAVE_ARMV7 - movw \reg, #(\imm & 0xffff) - movt \reg, #(\imm >> 16) -#else - mov \reg, #(\imm & 0x0000ff) - orr \reg, #(\imm & 0x00ff00) - orr \reg, #(\imm & 0xff0000) -#endif -.endm - -/* r0 = virtual target address */ -/* r1 = instruction to patch */ -.macro dyna_linker_main -#ifndef NO_WRITE_EXEC - load_varadr_ext r3, jump_in - /* get_page */ - lsr r2, r0, #12 - mov r6, #4096 - bic r2, r2, #0xe0000 - sub r6, r6, #1 - cmp r2, #0x1000 - ldr r7, [r1] - biclt r2, #0x0e00 - and r6, r6, r2 - cmp r2, #2048 - add r12, r7, #2 - orrcs r2, r6, #2048 - ldr r5, [r3, r2, lsl #2] - lsl r12, r12, #8 - add r6, r1, r12, asr #6 - mov r8, #0 - /* jump_in lookup */ -1: - movs r4, r5 - beq 2f - ldr r3, [r5] /* ll_entry .vaddr */ - ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ - teq r3, r0 - bne 1b - teq r4, r6 - moveq pc, r4 /* Stale i-cache */ - mov r8, r4 - b 1b /* jump_in may have dupes, continue search */ -2: - tst r8, r8 - beq 3f /* r0 not in jump_in */ - - mov r5, r1 - mov r1, r6 - bl add_link - sub r2, r8, r5 - and r1, r7, #0xff000000 - lsl r2, r2, #6 - sub r1, r1, #2 - add r1, r1, r2, lsr #8 - str r1, [r5] - mov pc, r8 -3: - /* hash_table lookup */ - cmp r2, #2048 - load_varadr_ext r3, jump_dirty - eor r4, r0, r0, lsl #16 - lslcc r2, r0, #9 - load_varadr_ext r6, hash_table - lsr r4, r4, #12 - lsrcc r2, r2, #21 - bic r4, r4, #15 - ldr r5, [r3, r2, lsl #2] - ldr r7, [r6, r4]! - teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] - teq r7, r0 - ldreq pc, [r6, #12] - /* jump_dirty lookup */ -6: - movs r4, r5 - beq 8f - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne 6b -7: - ldr r1, [r4, #8] - /* hash_table insert */ - ldr r2, [r6] - ldr r3, [r6, #4] - str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] - str r3, [r6, #12] - mov pc, r1 -8: -#else - /* XXX: should be able to do better than this... */ - bl get_addr_ht - mov pc, r0 -#endif -.endm - - -FUNCTION(dyna_linker): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - dyna_linker_main - - mov r4, r0 - mov r5, r1 - bl new_recompile_block - tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker - /* pagefault */ - mov r1, r0 - mov r2, #8 - .size dyna_linker, .-dyna_linker - -FUNCTION(exec_pagefault): - /* r0 = instruction pointer */ - /* r1 = fault address */ - /* r2 = cause */ - ldr r3, [fp, #LO_reg_cop0+48] /* Status */ - mvn r6, #0xF000000F - ldr r4, [fp, #LO_reg_cop0+16] /* Context */ - bic r6, r6, #0x0F800000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r3, r3, #2 - str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ - bic r4, r4, r6 - str r3, [fp, #LO_reg_cop0+48] /* Status */ - and r5, r6, r1, lsr #9 - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - and r1, r1, r6, lsl #9 - str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ - orr r4, r4, r5 - str r4, [fp, #LO_reg_cop0+16] /* Context */ - mov r0, #0x80000000 - bl get_addr_ht - mov pc, r0 - .size exec_pagefault, .-exec_pagefault - -/* Special dynamic linker for the case where a page fault - may occur in a branch delay slot */ -FUNCTION(dyna_linker_ds): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - dyna_linker_main - - mov r4, r0 - bic r0, r0, #7 - mov r5, r1 - orr r0, r0, #1 - bl new_recompile_block - tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker_ds - /* pagefault */ - bic r1, r0, #7 - mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ - sub r0, r1, #4 - b exec_pagefault - .size dyna_linker_ds, .-dyna_linker_ds - - .align 2 - -FUNCTION(jump_vaddr_r0): - eor r2, r0, r0, lsl #16 - b jump_vaddr - .size jump_vaddr_r0, .-jump_vaddr_r0 -FUNCTION(jump_vaddr_r1): - eor r2, r1, r1, lsl #16 - mov r0, r1 - b jump_vaddr - .size jump_vaddr_r1, .-jump_vaddr_r1 -FUNCTION(jump_vaddr_r2): - mov r0, r2 - eor r2, r2, r2, lsl #16 - b jump_vaddr - .size jump_vaddr_r2, .-jump_vaddr_r2 -FUNCTION(jump_vaddr_r3): - eor r2, r3, r3, lsl #16 - mov r0, r3 - b jump_vaddr - .size jump_vaddr_r3, .-jump_vaddr_r3 -FUNCTION(jump_vaddr_r4): - eor r2, r4, r4, lsl #16 - mov r0, r4 - b jump_vaddr - .size jump_vaddr_r4, .-jump_vaddr_r4 -FUNCTION(jump_vaddr_r5): - eor r2, r5, r5, lsl #16 - mov r0, r5 - b jump_vaddr - .size jump_vaddr_r5, .-jump_vaddr_r5 -FUNCTION(jump_vaddr_r6): - eor r2, r6, r6, lsl #16 - mov r0, r6 - b jump_vaddr - .size jump_vaddr_r6, .-jump_vaddr_r6 -FUNCTION(jump_vaddr_r8): - eor r2, r8, r8, lsl #16 - mov r0, r8 - b jump_vaddr - .size jump_vaddr_r8, .-jump_vaddr_r8 -FUNCTION(jump_vaddr_r9): - eor r2, r9, r9, lsl #16 - mov r0, r9 - b jump_vaddr - .size jump_vaddr_r9, .-jump_vaddr_r9 -FUNCTION(jump_vaddr_r10): - eor r2, r10, r10, lsl #16 - mov r0, r10 - b jump_vaddr - .size jump_vaddr_r10, .-jump_vaddr_r10 -FUNCTION(jump_vaddr_r12): - eor r2, r12, r12, lsl #16 - mov r0, r12 - b jump_vaddr - .size jump_vaddr_r12, .-jump_vaddr_r12 -FUNCTION(jump_vaddr_r7): - eor r2, r7, r7, lsl #16 - add r0, r7, #0 - .size jump_vaddr_r7, .-jump_vaddr_r7 -FUNCTION(jump_vaddr): - load_varadr_ext r1, hash_table - mvn r3, #15 - and r2, r3, r2, lsr #12 - ldr r2, [r1, r2]! - teq r2, r0 - ldreq pc, [r1, #4] - ldr r2, [r1, #8] - teq r2, r0 - ldreq pc, [r1, #12] - str r10, [fp, #LO_cycle_count] - bl get_addr - ldr r10, [fp, #LO_cycle_count] - mov pc, r0 - .size jump_vaddr, .-jump_vaddr - - .align 2 - -FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] -FUNCTION(verify_code_vm): -FUNCTION(verify_code): - /* r1 = source */ - /* r2 = target */ - /* r3 = length */ - tst r3, #4 - mov r4, #0 - add r3, r1, r3 - mov r5, #0 - ldrne r4, [r1], #4 - mov r12, #0 - ldrne r5, [r2], #4 - teq r1, r3 - beq .D3 -.D2: - ldr r7, [r1], #4 - eor r9, r4, r5 - ldr r8, [r2], #4 - orrs r9, r9, r12 - bne .D4 - ldr r4, [r1], #4 - eor r12, r7, r8 - ldr r5, [r2], #4 - cmp r1, r3 - bcc .D2 - teq r7, r8 -.D3: - teqeq r4, r5 -.D4: - ldr r8, [fp, #LO_branch_target] - moveq pc, lr -.D5: - bl get_addr - mov pc, r0 - .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm - - .align 2 -FUNCTION(cc_interrupt): - ldr r0, [fp, #LO_last_count] - mov r1, #0 - mov r2, #0x1fc - add r10, r0, r10 - str r1, [fp, #LO_pending_exception] - and r2, r2, r10, lsr #17 - add r3, fp, #LO_restore_candidate - str r10, [fp, #LO_cycle] /* PCSX cycles */ -@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ - ldr r4, [r2, r3] - mov r10, lr - tst r4, r4 - bne .E4 -.E1: - bl gen_interupt - mov lr, r10 - ldr r10, [fp, #LO_cycle] - ldr r0, [fp, #LO_next_interupt] - ldr r1, [fp, #LO_pending_exception] - ldr r2, [fp, #LO_stop] - str r0, [fp, #LO_last_count] - sub r10, r10, r0 - tst r2, r2 - ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - tst r1, r1 - moveq pc, lr -.E2: - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - mov pc, r0 -.E4: - /* Move 'dirty' blocks to the 'clean' list */ - lsl r5, r2, #3 - str r1, [r2, r3] -.E5: - lsrs r4, r4, #1 - mov r0, r5 - add r5, r5, #1 - blcs clean_blocks - tst r5, #31 - bne .E5 - b .E1 - .size cc_interrupt, .-cc_interrupt - - .align 2 -FUNCTION(do_interrupt): - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - add r10, r10, #2 - mov pc, r0 - .size do_interrupt, .-do_interrupt - - .align 2 -FUNCTION(fp_exception): - mov r2, #0x10000000 -.E7: - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - add r2, r2, #0x2c - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov r2, #0x90000000 /* Set high bit if delay slot */ - b .E7 - .size fp_exception_ds, .-fp_exception_ds - - .align 2 -FUNCTION(jump_syscall): - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - mov r2, #0x20 - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size jump_syscall, .-jump_syscall - .align 2 - - .align 2 -FUNCTION(jump_syscall_hle): - str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ - ldr r2, [fp, #LO_last_count] - mov r1, #0 /* in delay slot */ - add r2, r2, r10 - mov r0, #0x20 /* cause */ - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bl psxException - - /* note: psxException might do recursive recompiler call from it's HLE code, - * so be ready for this */ -pcsx_return: - ldr r1, [fp, #LO_next_interupt] - ldr r10, [fp, #LO_cycle] - ldr r0, [fp, #LO_pcaddr] - sub r10, r10, r1 - str r1, [fp, #LO_last_count] - bl get_addr_ht - mov pc, r0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bx r1 - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - b execI - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(new_dyna_leave): - ldr r0, [fp, #LO_last_count] - add r12, fp, #28 - add r10, r0, r10 - str r10, [fp, #LO_cycle] - ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - .size new_dyna_leave, .-new_dyna_leave - - .align 2 -FUNCTION(invalidate_addr_r0): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - b invalidate_addr_call - .size invalidate_addr_r0, .-invalidate_addr_r0 - .align 2 -FUNCTION(invalidate_addr_r1): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r1 - b invalidate_addr_call - .size invalidate_addr_r1, .-invalidate_addr_r1 - .align 2 -FUNCTION(invalidate_addr_r2): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r2 - b invalidate_addr_call - .size invalidate_addr_r2, .-invalidate_addr_r2 - .align 2 -FUNCTION(invalidate_addr_r3): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r3 - b invalidate_addr_call - .size invalidate_addr_r3, .-invalidate_addr_r3 - .align 2 -FUNCTION(invalidate_addr_r4): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r4 - b invalidate_addr_call - .size invalidate_addr_r4, .-invalidate_addr_r4 - .align 2 -FUNCTION(invalidate_addr_r5): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r5 - b invalidate_addr_call - .size invalidate_addr_r5, .-invalidate_addr_r5 - .align 2 -FUNCTION(invalidate_addr_r6): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r6 - b invalidate_addr_call - .size invalidate_addr_r6, .-invalidate_addr_r6 - .align 2 -FUNCTION(invalidate_addr_r7): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r7 - b invalidate_addr_call - .size invalidate_addr_r7, .-invalidate_addr_r7 - .align 2 -FUNCTION(invalidate_addr_r8): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r8 - b invalidate_addr_call - .size invalidate_addr_r8, .-invalidate_addr_r8 - .align 2 -FUNCTION(invalidate_addr_r9): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r9 - b invalidate_addr_call - .size invalidate_addr_r9, .-invalidate_addr_r9 - .align 2 -FUNCTION(invalidate_addr_r10): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r10 - b invalidate_addr_call - .size invalidate_addr_r10, .-invalidate_addr_r10 - .align 2 -FUNCTION(invalidate_addr_r12): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r12 - .size invalidate_addr_r12, .-invalidate_addr_r12 - .align 2 -invalidate_addr_call: - ldr r12, [fp, #LO_inv_code_start] - ldr lr, [fp, #LO_inv_code_end] - cmp r0, r12 - cmpcs lr, r0 - blcc invalidate_addr - ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} - .size invalidate_addr_call, .-invalidate_addr_call - - .align 2 -FUNCTION(new_dyna_start): - /* ip is stored to conform EABI alignment */ - stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} - load_varadr fp, dynarec_local - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - ldr r1, [fp, #LO_next_interupt] - ldr r10, [fp, #LO_cycle] - str r1, [fp, #LO_last_count] - sub r10, r10, r1 - mov pc, r0 - .size new_dyna_start, .-new_dyna_start - -/* --------------------------------------- */ - -.align 2 - -.macro pcsx_read_mem readop tab_shift - /* r0 = address, r1 = handler_tab, r2 = cycles */ - lsl r3, r0, #20 - lsr r3, #(20+\tab_shift) - ldr r12, [fp, #LO_last_count] - ldr r1, [r1, r3, lsl #2] - add r2, r2, r12 - lsls r1, #1 -.if \tab_shift == 1 - lsl r3, #1 - \readop r0, [r1, r3] -.else - \readop r0, [r1, r3, lsl #\tab_shift] -.endif - movcc pc, lr - str r2, [fp, #LO_cycle] - bx r1 -.endm - -FUNCTION(jump_handler_read8): - add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_read_mem ldrbcc, 0 - -FUNCTION(jump_handler_read16): - add r1, #0x1000/4*4 @ shift to r16 part - pcsx_read_mem ldrhcc, 1 - -FUNCTION(jump_handler_read32): - pcsx_read_mem ldrcc, 2 - - -.macro pcsx_write_mem wrtop tab_shift - /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ - lsl r12,r0, #20 - lsr r12, #(20+\tab_shift) - ldr r3, [r3, r12, lsl #2] - str r0, [fp, #LO_address] @ some handlers still need it.. - lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store -.if \tab_shift == 1 - lsl r12, #1 - \wrtop r1, [r3, r12] -.else - \wrtop r1, [r3, r12, lsl #\tab_shift] -.endif - movcc pc, lr - ldr r12, [fp, #LO_last_count] - mov r0, r1 - add r2, r2, r12 - push {r2, lr} - str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 -.endm - -FUNCTION(jump_handler_write8): - add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_write_mem strbcc, 0 - -FUNCTION(jump_handler_write16): - add r3, #0x1000/4*4 @ shift to r16 part - pcsx_write_mem strhcc, 1 - -FUNCTION(jump_handler_write32): - pcsx_write_mem strcc, 2 - -FUNCTION(jump_handler_write_h): - /* r0 = address, r1 = data, r2 = cycles, r3 = handler */ - ldr r12, [fp, #LO_last_count] - str r0, [fp, #LO_address] @ some handlers still need it.. - add r2, r2, r12 - mov r0, r1 - push {r2, lr} - str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 - -FUNCTION(jump_handle_swl): - /* r0 = address, r1 = data, r2 = cycles */ - ldr r3, [fp, #LO_mem_wtab] - mov r12,r0,lsr #12 - ldr r3, [r3, r12, lsl #2] - lsls r3, #1 - bcs 4f - add r3, r0, r3 - mov r0, r2 - tst r3, #2 - beq 101f - tst r3, #1 - beq 2f -3: - str r1, [r3, #-3] - bx lr -2: - lsr r2, r1, #8 - lsr r1, #24 - strh r2, [r3, #-2] - strb r1, [r3] - bx lr -101: - tst r3, #1 - lsrne r1, #16 @ 1 - lsreq r12, r1, #24 @ 0 - strhne r1, [r3, #-1] - strbeq r12, [r3] - bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? - - -FUNCTION(jump_handle_swr): - /* r0 = address, r1 = data, r2 = cycles */ - ldr r3, [fp, #LO_mem_wtab] - mov r12,r0,lsr #12 - ldr r3, [r3, r12, lsl #2] - lsls r3, #1 - bcs 4f - add r3, r0, r3 - and r12,r3, #3 - mov r0, r2 - cmp r12,#2 - strbgt r1, [r3] @ 3 - strheq r1, [r3] @ 2 - cmp r12,#1 - strlt r1, [r3] @ 0 - bxne lr - lsr r2, r1, #8 @ 1 - strb r1, [r3] - strh r2, [r3, #1] - bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? - - -.macro rcntx_read_mode0 num - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart - mov r0, r2, lsl #16 - sub r0, r0, r3, lsl #16 - lsr r0, #16 - bx lr -.endm - -FUNCTION(rcnt0_read_count_m0): - rcntx_read_mode0 0 - -FUNCTION(rcnt1_read_count_m0): - rcntx_read_mode0 1 - -FUNCTION(rcnt2_read_count_m0): - rcntx_read_mode0 2 - -FUNCTION(rcnt0_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*0] @ cycleStart - mov_16 r1, 0x3334 - sub r2, r2, r3 - mul r0, r1, r2 @ /= 5 - lsr r0, #16 - bx lr - -FUNCTION(rcnt1_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*1] - mov_24 r1, 0x1e6cde - sub r2, r2, r3 - umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd - bx lr - -FUNCTION(rcnt2_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*2] - mov r0, r2, lsl #16-3 - sub r0, r0, r3, lsl #16-3 - lsr r0, #16 @ /= 8 - bx lr - -@ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h deleted file mode 100644 index f7e1911..0000000 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ /dev/null @@ -1,41 +0,0 @@ - -#define LO_next_interupt 64 -#define LO_cycle_count (LO_next_interupt + 4) -#define LO_last_count (LO_cycle_count + 4) -#define LO_pending_exception (LO_last_count + 4) -#define LO_stop (LO_pending_exception + 4) -#define LO_invc_ptr (LO_stop + 4) -#define LO_address (LO_invc_ptr + 4) -#define LO_psxRegs (LO_address + 4) -#define LO_reg (LO_psxRegs) -#define LO_lo (LO_reg + 128) -#define LO_hi (LO_lo + 4) -#define LO_reg_cop0 (LO_hi + 4) -#define LO_reg_cop2d (LO_reg_cop0 + 128) -#define LO_reg_cop2c (LO_reg_cop2d + 128) -#define LO_PC (LO_reg_cop2c + 128) -#define LO_pcaddr (LO_PC) -#define LO_code (LO_PC + 4) -#define LO_cycle (LO_code + 4) -#define LO_interrupt (LO_cycle + 4) -#define LO_intCycle (LO_interrupt + 4) -#define LO_psxRegs_end (LO_intCycle + 256) -#define LO_rcnts (LO_psxRegs_end) -#define LO_rcnts_end (LO_rcnts + 7*4*4) -#define LO_mem_rtab (LO_rcnts_end) -#define LO_mem_wtab (LO_mem_rtab + 4) -#define LO_psxH_ptr (LO_mem_wtab + 4) -#define LO_zeromem_ptr (LO_psxH_ptr + 4) -#define LO_inv_code_start (LO_zeromem_ptr + 4) -#define LO_inv_code_end (LO_inv_code_start + 4) -#define LO_branch_target (LO_inv_code_end + 4) -#define LO_scratch_buf_ptr (LO_branch_target + 4) -#define LO_align0 (LO_scratch_buf_ptr + 4) -#define LO_mini_ht (LO_align0 + 12) -#define LO_restore_candidate (LO_mini_ht + 256) -#define LO_dynarec_local_size (LO_restore_candidate + 512) - -#define LO_FCR0 (LO_align0) -#define LO_FCR31 (LO_align0) - -#define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d8c2372..02f335c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -36,7 +36,7 @@ int getVMBlock(); #endif #include "new_dynarec_config.h" -#include "emu_if.h" //emulator interface +#include "backends/psx/emu_if.h" //emulator interface //#define DISASM //#define assem_debug printf @@ -51,7 +51,7 @@ int getVMBlock(); #include "assem_x64.h" #endif #ifdef __arm__ -#include "assem_arm.h" +#include "arm/assem_arm.h" #endif #ifdef VITA @@ -774,7 +774,7 @@ void alloc_all(struct regstat *cur,int i) #include "assem_x64.c" #endif #ifdef __arm__ -#include "assem_arm.c" +#include "arm/assem_arm.c" #endif // Add virtual address mapping to linked list diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c deleted file mode 100644 index 9376ff4..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2010-2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include -#include "../psxhw.h" -#include "../cdrom.h" -#include "../mdec.h" -#include "../gpu.h" -#include "../psxmem_map.h" -#include "emu_if.h" -#include "pcsxmem.h" - -#ifdef __thumb__ -#error the dynarec is incompatible with Thumb functions, -#error please add -marm to compile flags -#endif - -//#define memprintf printf -#define memprintf(...) - -static u32 *mem_readtab; -static u32 *mem_writetab; -static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; -static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; -//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; - -// When this is called in a loop, and 'h' is a function pointer, clang will crash. -#ifdef __clang__ -static __attribute__ ((noinline)) void map_item(u32 *out, const void *h, u32 flag) -#else -static void map_item(u32 *out, const void *h, u32 flag) -#endif -{ - u32 hv = (u32)h; - if (hv & 1) { - SysPrintf("FATAL: %p has LSB set\n", h); - abort(); - } - *out = (hv >> 1) | (flag << 31); -} - -// size must be power of 2, at least 4k -#define map_l1_mem(tab, i, addr, size, base) \ - map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) - -#define IOMEM32(a) (((a) & 0xfff) / 4) -#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) -#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) - -u8 zero_mem[0x1000]; - -u32 read_mem_dummy() -{ - return 0; -} - -static void write_mem_dummy(u32 data) -{ - memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); -} - -/* IO handlers */ -static u32 io_read_sio16() -{ - return sioRead8() | (sioRead8() << 8); -} - -static u32 io_read_sio32() -{ - return sioRead8() | (sioRead8() << 8) | (sioRead8() << 16) | (sioRead8() << 24); -} - -static void io_write_sio16(u32 value) -{ - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)(value>>8)); -} - -static void io_write_sio32(u32 value) -{ - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)(value >> 8)); - sioWrite8((unsigned char)(value >> 16)); - sioWrite8((unsigned char)(value >> 24)); -} - -#ifndef DRC_DBG - -static void map_rcnt_rcount0(u32 mode) -{ - if (mode & 0x100) { // pixel clock - map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m0, 1); - } -} - -static void map_rcnt_rcount1(u32 mode) -{ - if (mode & 0x100) { // hcnt - map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m0, 1); - } -} - -static void map_rcnt_rcount2(u32 mode) -{ - if (mode & 0x01) { // gate - map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); - map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); - } - else if (mode & 0x200) { // clk/8 - map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m0, 1); - } -} - -#else -#define map_rcnt_rcount0(mode) -#define map_rcnt_rcount1(mode) -#define map_rcnt_rcount2(mode) -#endif - -#define make_rcnt_funcs(i) \ -static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ -static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ -static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ -static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ -static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); map_rcnt_rcount##i(val); } \ -static void io_rcnt_write_target##i(u32 val) { psxRcntWtarget(i, val & 0xffff); } - -make_rcnt_funcs(0) -make_rcnt_funcs(1) -make_rcnt_funcs(2) - -static void io_write_ireg16(u32 value) -{ - //if (Config.Sio) psxHu16ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu16ref(0x1070) |= 0x200; - psxHu16ref(0x1070) &= value; -} - -static void io_write_imask16(u32 value) -{ - psxHu16ref(0x1074) = value; - if (psxHu16ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_ireg32(u32 value) -{ - //if (Config.Sio) psxHu32ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu32ref(0x1070) |= 0x200; - psxHu32ref(0x1070) &= value; -} - -static void io_write_imask32(u32 value) -{ - psxHu32ref(0x1074) = value; - if (psxHu32ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_dma_icr32(u32 value) -{ - u32 tmp = value & 0x00ff803f; - tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; - if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) - || tmp & HW_DMA_ICR_BUS_ERROR) { - if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) - psxHu32ref(0x1070) |= SWAP32(8); - tmp |= HW_DMA_ICR_IRQ_SENT; - } - HW_DMA_ICR = SWAPu32(tmp); -} - -#define make_dma_func(n) \ -static void io_write_chcr##n(u32 value) \ -{ \ - HW_DMA##n##_CHCR = value; \ - if (value & 0x01000000 && HW_DMA_PCR & (8 << (n * 4))) { \ - psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, value); \ - } \ -} - -make_dma_func(0) -make_dma_func(1) -make_dma_func(2) -make_dma_func(3) -make_dma_func(4) -make_dma_func(6) - -static void io_spu_write16(u32 value) -{ - // meh - SPU_writeRegister(address, value, psxRegs.cycle); -} - -static void io_spu_write32(u32 value) -{ - SPUwriteRegister wfunc = SPU_writeRegister; - u32 a = address; - - wfunc(a, value & 0xffff, psxRegs.cycle); - wfunc(a + 2, value >> 16, psxRegs.cycle); -} - -static u32 io_gpu_read_status(void) -{ - u32 v; - - // meh2, syncing for img bit, might want to avoid it.. - gpuSyncPluginSR(); - v = HW_GPU_STATUS; - - // XXX: because of large timeslices can't use hSyncCount, using rough - // approximization instead. Perhaps better use hcounter code here or something. - if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) - v |= PSXGPU_LCF & (psxRegs.cycle << 20); - return v; -} - -static void io_gpu_write_status(u32 value) -{ - GPU_writeStatus(value); - gpuSyncPluginSR(); -} - -static void map_ram_write(void) -{ - int i; - - for (i = 0; i < (0x800000 >> 12); i++) { - map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); - } -} - -static void unmap_ram_write(void) -{ - int i; - - for (i = 0; i < (0x800000 >> 12); i++) { - map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); - } -} - -static void write_biu(u32 value) -{ - memprintf("write_biu %08x, %08x @%08x %u\n", address, value, psxRegs.pc, psxRegs.cycle); - - if (address != 0xfffe0130) - return; - - switch (value) { - case 0x800: case 0x804: - unmap_ram_write(); - break; - case 0: case 0x1e988: - map_ram_write(); - break; - default: - printf("write_biu: unexpected val: %08x\n", value); - break; - } -} - -void new_dyna_pcsx_mem_load_state(void) -{ - map_rcnt_rcount0(rcnts[0].mode); - map_rcnt_rcount1(rcnts[1].mode); - map_rcnt_rcount2(rcnts[2].mode); -} - -int pcsxmem_is_handler_dynamic(unsigned int addr) -{ - if ((addr & 0xfffff000) != 0x1f801000) - return 0; - - addr &= 0xffff; - return addr == 0x1100 || addr == 0x1110 || addr == 0x1120; -} - -void new_dyna_pcsx_mem_init(void) -{ - int i; - - // have to map these further to keep tcache close to .text - mem_readtab = psxMap(0x08000000, 0x200000 * 4, 0, MAP_TAG_LUTS); - if (mem_readtab == NULL) { - SysPrintf("failed to map mem tables\n"); - exit(1); - } - mem_writetab = mem_readtab + 0x100000; - - // 1st level lookup: - // 0: direct mem - // 1: use 2nd lookup - // 2nd level lookup: - // 0: direct mem variable - // 1: memhandler - - // default/unmapped memhandlers - for (i = 0; i < 0x100000; i++) { - //map_item(&mem_readtab[i], mem_unmrtab, 1); - map_l1_mem(mem_readtab, i, 0, 0x1000, zero_mem); - map_item(&mem_writetab[i], mem_unmwtab, 1); - } - - // RAM and it's mirrors - for (i = 0; i < (0x800000 >> 12); i++) { - map_l1_mem(mem_readtab, i, 0x80000000, 0x200000, psxM); - map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); - map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); - } - map_ram_write(); - - // BIOS and it's mirrors - for (i = 0; i < (0x80000 >> 12); i++) { - map_l1_mem(mem_readtab, i, 0x1fc00000, 0x80000, psxR); - map_l1_mem(mem_readtab, i, 0xbfc00000, 0x80000, psxR); - } - - // scratchpad - map_l1_mem(mem_readtab, 0, 0x1f800000, 0x1000, psxH); - map_l1_mem(mem_readtab, 0, 0x9f800000, 0x1000, psxH); - map_l1_mem(mem_writetab, 0, 0x1f800000, 0x1000, psxH); - map_l1_mem(mem_writetab, 0, 0x9f800000, 0x1000, psxH); - - // I/O - map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0x9f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0xbf801000 >> 12], mem_iortab, 1); - map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0x9f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0xbf801000 >> 12], mem_iowtab, 1); - - // L2 - // unmapped tables - for (i = 0; i < (1+2+4) * 0x1000 / 4; i++) - map_item(&mem_unmwtab[i], write_mem_dummy, 1); - - // fill IO tables - for (i = 0; i < 0x1000/4; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - for (; i < 0x1000/4 + 0x1000/2; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - for (; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - - map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); - map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); - map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); - map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); - map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); - map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); - map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); - map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); -// map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - map_item(&mem_iortab[IOMEM32(0x1814)], io_gpu_read_status, 1); - map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); - map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); - - map_item(&mem_iortab[IOMEM16(0x1040)], io_read_sio16, 1); - map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); - map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); - map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); - map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); - map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); - map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); - map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); - map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); - map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); - map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); - - map_item(&mem_iortab[IOMEM8(0x1040)], sioRead8, 1); - map_item(&mem_iortab[IOMEM8(0x1800)], cdrRead0, 1); - map_item(&mem_iortab[IOMEM8(0x1801)], cdrRead1, 1); - map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); - map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); - - // write(u32 data) - map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); - map_item(&mem_iowtab[IOMEM32(0x1070)], io_write_ireg32, 1); - map_item(&mem_iowtab[IOMEM32(0x1074)], io_write_imask32, 1); - map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); - map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); - map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); - map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); - map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); - map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); - map_item(&mem_iowtab[IOMEM32(0x10f4)], io_write_dma_icr32, 1); - map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); - map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); - map_item(&mem_iowtab[IOMEM32(0x1108)], io_rcnt_write_target0, 1); - map_item(&mem_iowtab[IOMEM32(0x1110)], io_rcnt_write_count1, 1); - map_item(&mem_iowtab[IOMEM32(0x1114)], io_rcnt_write_mode1, 1); - map_item(&mem_iowtab[IOMEM32(0x1118)], io_rcnt_write_target1, 1); - map_item(&mem_iowtab[IOMEM32(0x1120)], io_rcnt_write_count2, 1); - map_item(&mem_iowtab[IOMEM32(0x1124)], io_rcnt_write_mode2, 1); - map_item(&mem_iowtab[IOMEM32(0x1128)], io_rcnt_write_target2, 1); -// map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); - map_item(&mem_iowtab[IOMEM32(0x1814)], io_gpu_write_status, 1); - map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); - map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); - - map_item(&mem_iowtab[IOMEM16(0x1040)], io_write_sio16, 1); - map_item(&mem_iowtab[IOMEM16(0x1044)], sioWriteStat16, 1); - map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); - map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); - map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); - map_item(&mem_iowtab[IOMEM16(0x1070)], io_write_ireg16, 1); - map_item(&mem_iowtab[IOMEM16(0x1074)], io_write_imask16, 1); - map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); - map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); - map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); - map_item(&mem_iowtab[IOMEM16(0x1110)], io_rcnt_write_count1, 1); - map_item(&mem_iowtab[IOMEM16(0x1114)], io_rcnt_write_mode1, 1); - map_item(&mem_iowtab[IOMEM16(0x1118)], io_rcnt_write_target1, 1); - map_item(&mem_iowtab[IOMEM16(0x1120)], io_rcnt_write_count2, 1); - map_item(&mem_iowtab[IOMEM16(0x1124)], io_rcnt_write_mode2, 1); - map_item(&mem_iowtab[IOMEM16(0x1128)], io_rcnt_write_target2, 1); - - map_item(&mem_iowtab[IOMEM8(0x1040)], sioWrite8, 1); - map_item(&mem_iowtab[IOMEM8(0x1800)], cdrWrite0, 1); - map_item(&mem_iowtab[IOMEM8(0x1801)], cdrWrite1, 1); - map_item(&mem_iowtab[IOMEM8(0x1802)], cdrWrite2, 1); - map_item(&mem_iowtab[IOMEM8(0x1803)], cdrWrite3, 1); - - for (i = 0x1c00; i < 0x1e00; i += 2) { - map_item(&mem_iowtab[IOMEM16(i)], io_spu_write16, 1); - map_item(&mem_iowtab[IOMEM32(i)], io_spu_write32, 1); - } - - // misc - map_item(&mem_writetab[0xfffe0130 >> 12], mem_ffwtab, 1); - for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) - map_item(&mem_ffwtab[i], write_biu, 1); - - mem_rtab = mem_readtab; - mem_wtab = mem_writetab; - - new_dyna_pcsx_mem_load_state(); -} - -void new_dyna_pcsx_mem_reset(void) -{ - int i; - - // plugins might change so update the pointers - map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - - for (i = 0x1c00; i < 0x1e00; i += 2) - map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); - - map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); -} - -void new_dyna_pcsx_mem_shutdown(void) -{ - psxUnmap(mem_readtab, 0x200000 * 4, MAP_TAG_LUTS); - mem_writetab = mem_readtab = NULL; -} diff --git a/libpcsxcore/new_dynarec/pcsxmem.h b/libpcsxcore/new_dynarec/pcsxmem.h deleted file mode 100644 index 72892a8..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem.h +++ /dev/null @@ -1,9 +0,0 @@ - -extern u8 zero_mem[0x1000]; - -void new_dyna_pcsx_mem_init(void); -void new_dyna_pcsx_mem_reset(void); -void new_dyna_pcsx_mem_load_state(void); -void new_dyna_pcsx_mem_shutdown(void); - -int pcsxmem_is_handler_dynamic(unsigned int addr); diff --git a/libpcsxcore/new_dynarec/pcsxmem_inline.c b/libpcsxcore/new_dynarec/pcsxmem_inline.c deleted file mode 100644 index 305931a..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem_inline.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef DRC_DBG - -static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) -{ - if ((addr & 0xfffff000) == 0x1f801000) { - u_int t; - switch (addr & 0xffff) { - case 0x1120: // rcnt2 count - if (rt < 0) goto dont_care; - if (cc < 0) return 0; - emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); - emit_readword((int)&rcnts[2].cycleStart, rt); - emit_testimm(HOST_TEMPREG, 0x200); - emit_readword((int)&last_count, HOST_TEMPREG); - emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); - emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); - if (cc_adj) - emit_addimm(HOST_TEMPREG, cc_adj, rt); - emit_shrne_imm(rt, 3, rt); - mov_loadtype_adj(type!=LOADW_STUB?type:LOADH_STUB, rt, rt); - goto hit; - case 0x1104: - case 0x1114: - case 0x1124: // rcnt mode - if (rt < 0) return 0; - t = (addr >> 4) & 3; - emit_readword((int)&rcnts[t].mode, rt); - emit_andimm(rt, ~0x1800, HOST_TEMPREG); - emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); - mov_loadtype_adj(type, rt, rt); - goto hit; - } - } - else { - if (rt < 0) - goto dont_care; - } - - return 0; - -hit: - assem_debug("pcsx_direct_read %08x end\n", addr); - return 1; - -dont_care: - assem_debug("pcsx_direct_read %08x dummy\n", addr); - return 1; -} - -#else - -static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) -{ - return 0; -} - -#endif - -// vim:shiftwidth=2:expandtab -- cgit v1.2.3 From d404093f31d5cc0a60aa8d32389e4d10be303204 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Tue, 27 Sep 2016 22:54:15 +0200 Subject: Update new_dynarec - working towards a common shared ari64 codebase for both mupen64plus and pcsx rearmed - see libretro/ari64 --- libpcsxcore/new_dynarec/new_dynarec.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 02f335c..588eaaa 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -45,10 +45,10 @@ int getVMBlock(); #define inv_debug(...) #ifdef __i386__ -#include "assem_x86.h" +#include "x86/assem_x86.h" #endif #ifdef __x86_64__ -#include "assem_x64.h" +#include "x64/assem_x64.h" #endif #ifdef __arm__ #include "arm/assem_arm.h" @@ -768,10 +768,10 @@ void alloc_all(struct regstat *cur,int i) } #ifdef __i386__ -#include "assem_x86.c" +#include "x86/assem_x86.c" #endif #ifdef __x86_64__ -#include "assem_x64.c" +#include "x64/assem_x64.c" #endif #ifdef __arm__ #include "arm/assem_arm.c" @@ -1700,7 +1700,8 @@ void syscall_alloc(struct regstat *current,int i) void delayslot_alloc(struct regstat *current,int i) { - switch(itype[i]) { + switch(itype[i]) + { case UJUMP: case CJUMP: case SJUMP: @@ -1850,7 +1851,8 @@ void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32 } } -int mchecksum() +#if 0 +static int mchecksum(void) { //if(!tracedebug) return 0; int i; @@ -1863,7 +1865,8 @@ int mchecksum() } return sum; } -int rchecksum() + +static int rchecksum(void) { int i; int sum=0; @@ -1871,7 +1874,8 @@ int rchecksum() sum^=((u_int *)reg)[i]; return sum; } -void rlist() + +static void rlist(void) { int i; printf("TRACE: "); @@ -1880,12 +1884,12 @@ void rlist() printf("\n"); } -void enabletrace() +static void enabletrace(void) { tracedebug=1; } -void memdebug(int i) +static void memdebug(int i) { //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]); //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum()); @@ -1910,6 +1914,7 @@ void memdebug(int i) } //printf("TRACE: %x\n",(&i)[-1]); } +#endif void alu_assemble(int i,struct regstat *i_regs) { -- cgit v1.2.3 From 92d7982639bc07d3d6e9467427eb2db8c3fee29d Mon Sep 17 00:00:00 2001 From: twinaphex Date: Wed, 28 Sep 2016 02:15:37 +0200 Subject: (new_dynarec) Update --- libpcsxcore/new_dynarec/new_dynarec.c | 18 +++++++++++------- libpcsxcore/new_dynarec/new_dynarec.h | 10 +++++----- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 588eaaa..3083e83 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1055,19 +1055,23 @@ void invalidate_addr(u_int addr) // This is called when loading a save state. // Anything could have changed, so invalidate everything. -void invalidate_all_pages() +void invalidate_all_pages(void) { u_int page; for(page=0;page<4096;page++) invalidate_page(page); for(page=0;page<1048576;page++) - if(!invalid_code[page]) { + { + if(!invalid_code[page]) + { restore_candidate[(page&2047)>>3]|=1<<(page&7); restore_candidate[((page&2047)>>3)+256]|=1<<(page&7); } - #ifdef USE_MINI_HT + } + +#ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); - #endif +#endif } // Add an entry to jump_out after making a link @@ -7026,7 +7030,7 @@ static int new_dynarec_test(void) // clear the state completely, instead of just marking // things invalid like invalidate_all_pages() does -void new_dynarec_clear_full() +void new_dynarec_clear_full(void) { int n; out=(u_char *)BASE_ADDR; @@ -7047,7 +7051,7 @@ void new_dynarec_clear_full() for(n=0;n<4096;n++) ll_clear(jump_dirty+n); } -void new_dynarec_init() +void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); @@ -7103,7 +7107,7 @@ void new_dynarec_init() SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } -void new_dynarec_cleanup() +void new_dynarec_cleanup(void) { int n; #if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC) diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index ddc84a5..e7eb247 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -11,12 +11,12 @@ extern int cycle_multiplier; // 100 for 1.0 #define NDHACK_GTE_NO_FLAGS (1<<2) extern int new_dynarec_hacks; -void new_dynarec_init(); -void new_dynarec_cleanup(); -void new_dynarec_clear_full(); -void new_dyna_start(); +void new_dynarec_init(void); +void new_dynarec_cleanup(void); +void new_dynarec_clear_full(void); +void new_dyna_start(void); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); -void invalidate_all_pages(); +void invalidate_all_pages(void); void invalidate_block(unsigned int block); -- cgit v1.2.3 From 0bfdd1aadcf7674776186d7cb81d802296a4d96a Mon Sep 17 00:00:00 2001 From: twinaphex Date: Wed, 28 Sep 2016 03:03:19 +0200 Subject: (new_dynarec) Cleanups --- libpcsxcore/new_dynarec/new_dynarec.c | 191 ++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 76 deletions(-) (limited to 'libpcsxcore/new_dynarec') diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 3083e83..059730a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -366,14 +366,16 @@ static u_int get_vpage(u_int vaddr) // This is called from the recompiled JR/JALR instructions void *get_addr(u_int vaddr) { - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); - struct ll_entry *head; + struct ll_entry *head = NULL; + u_int page = get_page(vaddr); + u_int vpage = get_vpage(vaddr); //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page); head=jump_in[page]; - while(head!=NULL) { - if(head->vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); + while(head!=NULL) + { + if(head->vaddr==vaddr) + { + //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; ht_bin[3]=ht_bin[1]; ht_bin[2]=ht_bin[0]; @@ -384,39 +386,47 @@ void *get_addr(u_int vaddr) head=head->next; } head=jump_dirty[vpage]; - while(head!=NULL) { - if(head->vaddr==vaddr) { + while(head!=NULL) + { + if(head->vaddr==vaddr) + { //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); // Don't restore blocks which are about to expire from the cache if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) - if(verify_dirty(head->addr)) { - //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); - invalid_code[vaddr>>12]=0; - inv_code_start=inv_code_end=~0; - if(vpage<2048) { - restore_candidate[vpage>>3]|=1<<(vpage&7); - } - else restore_candidate[page>>3]|=1<<(page&7); - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) { - ht_bin[1]=(u_int)head->addr; // Replace existing entry - } - else + if(verify_dirty(head->addr)) { - ht_bin[3]=ht_bin[1]; - ht_bin[2]=ht_bin[0]; - ht_bin[1]=(int)head->addr; - ht_bin[0]=vaddr; + //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); + invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; + if(vpage<2048) + { + restore_candidate[vpage>>3]|=1<<(vpage&7); + } + else + { + restore_candidate[page>>3]|=1<<(page&7); + } + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + + if(ht_bin[0]==vaddr) + ht_bin[1]=(u_int)head->addr; // Replace existing entry + else + { + ht_bin[3]=ht_bin[1]; + ht_bin[2]=ht_bin[0]; + ht_bin[1]=(int)head->addr; + ht_bin[0]=vaddr; + } + return head->addr; } - return head->addr; - } } head=head->next; } //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr); int r=new_recompile_block(vaddr); - if(r==0) return get_addr(vaddr); - // Execute in unmapped page, generate pagefault execption + if(r==0) + return get_addr(vaddr); + // Execute in unmapped page, generate pagefault exception Status|=2; Cause=(vaddr<<31)|0x8; EPC=(vaddr&1)?vaddr-5:vaddr; @@ -425,6 +435,7 @@ void *get_addr(u_int vaddr) EntryHi=BadVAddr&0xFFFFE000; return get_addr_ht(0x80000000); } + // Look up address in hash table first void *get_addr_ht(u_int vaddr) { @@ -948,23 +959,26 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) assert(last2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + if(vpage>2047||(head->vaddr>>12)==block) + { // Ignore vaddr hash collision get_bounds((int)head->addr,&start,&end); //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) + { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) + { if((((start-(u_int)rdram)>>12)&2047)>12)&2047; if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; } @@ -1096,37 +1113,48 @@ void clean_blocks(u_int page) struct ll_entry *head; inv_debug("INV: clean_blocks page=%d\n",page); head=jump_dirty[page]; - while(head!=NULL) { - if(!invalid_code[head->vaddr>>12]) { + while(head!=NULL) + { + if(!invalid_code[head->vaddr>>12]) + { // Don't restore blocks which are about to expire from the cache - if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) + { u_int start,end; - if(verify_dirty(head->addr)) { + if(verify_dirty(head->addr)) + { //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); u_int i; u_int inv=0; get_bounds((int)head->addr,&start,&end); - if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) { + if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) + { inv|=invalid_code[i]; } } - else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { + else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) + { inv=1; } - if(!inv) { + if(!inv) + { void * clean_addr=(void *)get_clean_addr((int)head->addr); - if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) + { u_int ppage=page; inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; - if(ht_bin[0]==head->vaddr) { + if(ht_bin[0]==head->vaddr) + { ht_bin[1]=(u_int)clean_addr; // Replace existing entry } - if(ht_bin[2]==head->vaddr) { + if(ht_bin[2]==head->vaddr) + { ht_bin[3]=(u_int)clean_addr; // Replace existing entry } } @@ -1138,15 +1166,17 @@ void clean_blocks(u_int page) } } - -void mov_alloc(struct regstat *current,int i) +static void mov_alloc(struct regstat *current,int i) { // Note: Don't need to actually alloc the source registers - if((~current->is32>>rs1[i])&1) { + if((~current->is32>>rs1[i])&1) + { //alloc_reg64(current,i,rs1[i]); alloc_reg64(current,i,rt1[i]); current->is32&=~(1LL<is32|=(1LL<