From e230dbf6ca642eb6cc660e5d581fa4d16a85934b Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 10 Jul 2016 02:09:08 +0300 Subject: libretro: try to prevent bad builds --- Makefile.libretro | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Makefile.libretro b/Makefile.libretro index d566e23..cbebc5d 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -182,6 +182,17 @@ MAIN_LDFLAGS += -shared MAIN_LDLIBS += -lm -lz EXTRA_LDFLAGS = +# try to autodetect stuff for the lazy +ifndef ARCH +ARCH = $(shell $(CC) -dumpmachine | awk -F- '{print $$1}') +endif +ifndef HAVE_NEON +HAVE_NEON = $(shell $(CC) -E -dD - < /dev/null 2> /dev/null | grep -q __ARM_NEON__ && echo 1 || echo 0) +endif +ifeq ($(shell ld -v 2> /dev/null | awk '{print $$1}'),GNU) +MAIN_LDFLAGS += -Wl,--no-undefined +endif + TARGET ?= libretro.so PLATFORM = libretro BUILTIN_GPU ?= peops -- cgit v1.2.3 From 6cdf212f2cbb240dfbe3a8af23bebf0960ba5574 Mon Sep 17 00:00:00 2001 From: vanfanel Date: Wed, 3 Aug 2016 02:27:58 +0200 Subject: Please, don't use for loop initial declarations because it forces us to use higher C std --- frontend/libretro.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index b13f9c7..5bf737f 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -473,7 +473,7 @@ static void update_controller_port_device(unsigned port, unsigned device) static void update_multitap() { struct retro_variable var; - int auto_case; + int auto_case, port; var.value = NULL; var.key = "pcsx_rearmed_multitap1"; @@ -494,7 +494,7 @@ static void update_multitap() { // If a gamepad is plugged after port 2, we need a first multitap. multitap1 = 0; - for (int port = 2; port < PORTS_NUMBER; port++) + for (port = 2; port < PORTS_NUMBER; port++) multitap1 |= in_type[port] != PSE_PAD_TYPE_NONE; } @@ -517,7 +517,7 @@ static void update_multitap() { // If a gamepad is plugged after port 4, we need a second multitap. multitap2 = 0; - for (int port = 4; port < PORTS_NUMBER; port++) + for (port = 4; port < PORTS_NUMBER; port++) multitap2 |= in_type[port] != PSE_PAD_TYPE_NONE; } } @@ -1204,6 +1204,7 @@ static const unsigned short retro_psx_map[] = { static void update_variables(bool in_flight) { struct retro_variable var; + int i; var.value = NULL; var.key = "pcsx_rearmed_frameskip"; @@ -1223,7 +1224,7 @@ static void update_variables(bool in_flight) Config.PsxType = 1; } - for (int i = 0; i < PORTS_NUMBER; i++) + for (i = 0; i < PORTS_NUMBER; i++) update_controller_port_variable(i); update_multitap(); -- cgit v1.2.3 From 0702a1f36ab283a6f6de9f3e2e473cd03a78bdf5 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sat, 6 Aug 2016 21:04:49 +0200 Subject: (Vita) Try to enable dynarec now --- Makefile.libretro | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.libretro b/Makefile.libretro index 1cb1df9..b019cb1 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -124,8 +124,8 @@ else ifeq ($(platform), vita) HAVE_NEON = 1 BUILTIN_GPU = neon -# USE_DYNAREC = 1 -# DRC_CACHE_BASE = 0 + USE_DYNAREC = 1 + DRC_CACHE_BASE = 1 ARCH = arm STATIC_LINKING = 1 -- cgit v1.2.3 From 1cc8c854f8161cd0251a9b92929ad01584031ed3 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Sat, 6 Aug 2016 21:06:10 +0200 Subject: (Vita) Have to set DRC_CACHE_BASE to 0 for now --- Makefile.libretro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.libretro b/Makefile.libretro index b019cb1..79d71c5 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -125,7 +125,7 @@ else ifeq ($(platform), vita) BUILTIN_GPU = neon USE_DYNAREC = 1 - DRC_CACHE_BASE = 1 + DRC_CACHE_BASE = 0 ARCH = arm STATIC_LINKING = 1 -- cgit v1.2.3 From 1a5fd79401ac52789fad34c6b852b947200a6334 Mon Sep 17 00:00:00 2001 From: frangarcj Date: Tue, 6 Sep 2016 13:49:55 +0200 Subject: (VITA) Some dynarec --- frontend/libretro.c | 83 +++++++++++++ frontend/vita/pthread.h | 156 +++++++++++++++++++++---- frontend/vita/sys/mman.h | 29 +++-- libpcsxcore/new_dynarec/new_dynarec.c | 214 ++++++++++++++++++---------------- 4 files changed, 352 insertions(+), 130 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 5bf737f..75f9b98 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -256,6 +256,85 @@ void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag) } #endif +#ifdef VITA +typedef struct +{ + void* buffer; + uint32_t target_map; + size_t size; + enum psxMapTag tag; +}psx_map_t; + +psx_map_t custom_psx_maps[] = { + {NULL, NULL, 0x210000, MAP_TAG_RAM}, // 0x80000000 + {NULL, NULL, 0x010000, MAP_TAG_OTHER}, // 0x1f800000 + {NULL, NULL, 0x080000, MAP_TAG_OTHER}, // 0x1fc00000 + {NULL, NULL, 0x800000, MAP_TAG_LUTS}, // 0x08000000 + {NULL, NULL, 0x200000, MAP_TAG_VRAM}, // 0x00000000 +}; + +void* pl_vita_mmap(unsigned long addr, size_t size, int is_fixed, + enum psxMapTag tag) +{ + (void)is_fixed; + (void)addr; + + + psx_map_t* custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->size == size) && (custom_map->tag == tag)) + { + int block, ret; + char blockname[32]; + sprintf(blockname, "CODE 0x%08X",tag); + + block = sceKernelAllocMemBlockForVM(blockname, size); + if(block<=0){ + sceClibPrintf("could not alloc mem block @0x%08X 0x%08X \n", block, tag); + exit(1); + } + + // get base address + ret = sceKernelGetMemBlockBase(block, &custom_map->buffer); + if (ret < 0) + { + sceClibPrintf("could get address @0x%08X 0x%08X 0x%08X \n", block, ret, tag); + exit(1); + } + + custom_map->target_map = block; + + return custom_map->buffer; + } + } + + + return malloc(size); +} + +void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) +{ + (void)tag; + + psx_map_t* custom_map = custom_psx_maps; + + for (; custom_map->size; custom_map++) + { + if ((custom_map->buffer == ptr)) + { + sceKernelFreeMemBlock(custom_map->target_map); + custom_map->buffer = NULL; + custom_map->target_map = NULL; + return; + } + } + + free(ptr); +} +#endif + static void *pl_mmap(unsigned int size) { return psxMap(0, size, 0, MAP_TAG_VRAM); @@ -1474,6 +1553,10 @@ void retro_init(void) #ifdef _3DS psxMapHook = pl_3ds_mmap; psxUnmapHook = pl_3ds_munmap; +#endif +#ifdef VITA + psxMapHook = pl_vita_mmap; + psxUnmapHook = pl_vita_munmap; #endif ret = emu_core_preinit(); #ifdef _3DS diff --git a/frontend/vita/pthread.h b/frontend/vita/pthread.h index c18b20b..e1afdc5 100644 --- a/frontend/vita/pthread.h +++ b/frontend/vita/pthread.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2015 The RetroArch team +/* Copyright (C) 2010-2016 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this file (psp_pthread.h). @@ -26,6 +26,7 @@ #ifdef VITA #include +#include #else #include #include @@ -34,13 +35,20 @@ #include #include -#define STACKSIZE (64 * 1024) +#define STACKSIZE (8 * 1024) typedef SceUID pthread_t; typedef SceUID pthread_mutex_t; typedef void* pthread_mutexattr_t; typedef int pthread_attr_t; -typedef SceUID pthread_cond_t; + +typedef struct +{ + SceUID mutex; + SceUID sema; + int waiting; +} pthread_cond_t; + typedef SceUID pthread_condattr_t; /* Use pointer values to create unique names for threads/mutexes */ @@ -65,14 +73,15 @@ static int psp_thread_wrap(SceSize args, void *argp) static INLINE int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg) { - sprintf(name_buffer, "0x%08X", (uint32_t) thread); + sprintf(name_buffer, "0x%08X", (unsigned int) thread); - *thread = sceKernelCreateThread(name_buffer, - psp_thread_wrap, 0x20, STACKSIZE, 0, #ifdef VITA - 0, + *thread = sceKernelCreateThread(name_buffer, psp_thread_wrap, + 0x10000100, 0x10000, 0, 0, NULL); +#else + *thread = sceKernelCreateThread(name_buffer, + psp_thread_wrap, 0x20, STACKSIZE, 0, NULL); #endif - NULL); sthread_args_struct sthread_args; sthread_args.arg = arg; @@ -84,10 +93,13 @@ static INLINE int pthread_create(pthread_t *thread, static INLINE int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) { - sprintf(name_buffer, "0x%08X", (uint32_t) mutex); + sprintf(name_buffer, "0x%08X", (unsigned int) mutex); #ifdef VITA - return *mutex = sceKernelCreateMutex(name_buffer, 0, 0, 0); + *mutex = sceKernelCreateMutex(name_buffer, 0, 0, 0); + if(*mutex<0) + return *mutex; + return 0; #else return *mutex = sceKernelCreateSema(name_buffer, 0, 1, 1, NULL); #endif @@ -105,7 +117,9 @@ static INLINE int pthread_mutex_destroy(pthread_mutex_t *mutex) static INLINE int pthread_mutex_lock(pthread_mutex_t *mutex) { #ifdef VITA - return sceKernelLockMutex(*mutex, 1, 0); + int ret = sceKernelLockMutex(*mutex, 1, 0); + return ret; + #else /* FIXME: stub */ return 1; @@ -115,7 +129,8 @@ static INLINE int pthread_mutex_lock(pthread_mutex_t *mutex) static INLINE int pthread_mutex_unlock(pthread_mutex_t *mutex) { #ifdef VITA - return sceKernelUnlockMutex(*mutex, 1); + int ret = sceKernelUnlockMutex(*mutex, 1); + return ret; #else /* FIXME: stub */ return 1; @@ -125,16 +140,18 @@ static INLINE int pthread_mutex_unlock(pthread_mutex_t *mutex) static INLINE int pthread_join(pthread_t thread, void **retval) { - int exit_status; - SceUInt timeout = (SceUInt)-1; #ifdef VITA - sceKernelWaitThreadEnd(thread, &exit_status, &timeout); + int res = sceKernelWaitThreadEnd(thread, 0, 0); + if (res < 0) + return res; + return sceKernelDeleteThread(thread); #else + SceUInt timeout = (SceUInt)-1; sceKernelWaitThreadEnd(thread, &timeout); exit_status = sceKernelGetThreadExitStatus(thread); -#endif sceKernelDeleteThread(thread); return exit_status; +#endif } static INLINE int pthread_mutex_trylock(pthread_mutex_t *mutex) @@ -150,51 +167,142 @@ static INLINE int pthread_mutex_trylock(pthread_mutex_t *mutex) static INLINE int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { +#ifdef VITA + int ret = pthread_mutex_lock(&cond->mutex); + if (ret < 0) + return ret; + ++cond->waiting; + pthread_mutex_unlock(mutex); + pthread_mutex_unlock(&cond->mutex); + + ret = sceKernelWaitSema(cond->sema, 1, 0); + if (ret < 0) + sceClibPrintf("Premature wakeup: %08X", ret); + pthread_mutex_lock(mutex); + return ret; +#else + /* FIXME: stub */ sceKernelDelayThread(10000); return 1; +#endif } static INLINE int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) { - //FIXME: stub +#ifdef VITA + int ret = pthread_mutex_lock(&cond->mutex); + if (ret < 0) + return ret; + ++cond->waiting; + pthread_mutex_unlock(mutex); + pthread_mutex_unlock(&cond->mutex); + + SceUInt timeout = 0; + + timeout = abstime->tv_sec; + timeout += abstime->tv_nsec / 1.0e6; + + ret = sceKernelWaitSema(cond->sema, 1, &timeout); + if (ret < 0) + sceClibPrintf("Premature wakeup: %08X", ret); + pthread_mutex_lock(mutex); + return ret; + +#else + /* FIXME: stub */ return 1; +#endif } static INLINE int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) { - //FIXME: stub +#ifdef VITA + + pthread_mutex_init(&cond->mutex,NULL); + if(cond->mutex<0){ + return cond->mutex; + } + sprintf(name_buffer, "0x%08X", (unsigned int) cond); + //cond->sema = sceKernelCreateCond(name_buffer, 0, cond->mutex, 0); + cond->sema = sceKernelCreateSema(name_buffer, 0, 0, 1, 0); + if(cond->sema<0){ + pthread_mutex_destroy(&cond->mutex); + return cond->sema; + } + + cond->waiting = 0; + + + return 0; + + +#else + /* FIXME: stub */ return 1; +#endif } static INLINE int pthread_cond_signal(pthread_cond_t *cond) { - //FIXME: stub +#ifdef VITA + pthread_mutex_lock(&cond->mutex); + if (cond->waiting) + { + --cond->waiting; + sceKernelSignalSema(cond->sema, 1); + } + pthread_mutex_unlock(&cond->mutex); + return 0; +#else + /* FIXME: stub */ return 1; +#endif } static INLINE int pthread_cond_broadcast(pthread_cond_t *cond) { - //FIXME: stub + /* FIXME: stub */ return 1; } static INLINE int pthread_cond_destroy(pthread_cond_t *cond) { - //FIXME: stub - return 1; +#ifdef VITA + int ret = sceKernelDeleteSema(cond->sema); + if(ret < 0) + return ret; + + return sceKernelDeleteMutex(cond->mutex); +#else + /* FIXME: stub */ + return 1; +#endif } static INLINE int pthread_detach(pthread_t thread) { - return 1; + return 0; } static INLINE void pthread_exit(void *retval) { - (void)retval; +#ifdef VITA + sceKernelExitDeleteThread(sceKernelGetThreadId()); +#endif +} + +static INLINE pthread_t pthread_self(void) +{ + /* zero 20-mar-2016: untested */ + return sceKernelGetThreadId(); +} + +static INLINE int pthread_equal(pthread_t t1, pthread_t t2) +{ + return t1 == t2; } #endif //_PSP_PTHREAD_WRAP__ diff --git a/frontend/vita/sys/mman.h b/frontend/vita/sys/mman.h index 66467f4..89da513 100644 --- a/frontend/vita/sys/mman.h +++ b/frontend/vita/sys/mman.h @@ -18,19 +18,32 @@ extern "C" { static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { - (void)addr; (void)prot; (void)flags; (void)fd; (void)offset; - void* addr_out; + int block, ret; + + block = sceKernelAllocMemBlockForVM("code", len); + if(block<=0){ + sceClibPrintf("could not alloc mem block @0x%08X 0x%08X \n", block, len); + exit(1); + } + + // get base address + ret = sceKernelGetMemBlockBase(block, &addr); + if (ret < 0) + { + sceClibPrintf("could get address @0x%08X 0x%08X \n", block, addr); + exit(1); + } + - addr_out = malloc(len); - if(!addr_out) + if(!addr) return MAP_FAILED; - return addr_out; + return addr; } static inline int mprotect(void *addr, size_t len, int prot) @@ -43,8 +56,9 @@ static inline int mprotect(void *addr, size_t len, int prot) static inline int munmap(void *addr, size_t len) { - free(addr); - return 0; + int uid = sceKernelFindMemBlockByAddr(addr, len); + + return sceKernelFreeMemBlock(uid); } @@ -53,4 +67,3 @@ static inline int munmap(void *addr, size_t len) #endif #endif // MMAN_H - diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 65c8f03..1c0ab56 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -56,6 +56,13 @@ static void __clear_cache(void *start, void *end) { #elif defined(_3DS) #include "3ds_utils.h" #define __clear_cache(start,end) svcFlushProcessDataCache(0xFFFF8001, start, (u32)(end)-(u32)(start)) +#elif defined(VITA) +#define __clear_cache vita_clear_cache +static void __clear_cache(void *start, void *end) { + size_t len = (char *)end - (char *)start; + int block = sceKernelFindMemBlockByAddr(start,len); + sceKernelSyncVMDomain(block, start, len); +} #endif #define MAXBLOCK 4096 @@ -196,7 +203,7 @@ struct ll_entry #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -314,18 +321,18 @@ static void tlb_hacks() { u_int addr; int n; - switch (ROM_HEADER->Country_code&0xFF) + switch (ROM_HEADER->Country_code&0xFF) { case 0x45: // U addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E + break; + case 0x4A: // J + addr=0x34b70; + break; + case 0x50: // E addr=0x329f0; - break; - default: + break; + default: // Unknown country code addr=0; break; @@ -526,7 +533,7 @@ static void flush_dirty_uppers(struct regstat *cur) for (hr=0;hrdirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -683,7 +690,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -786,7 +793,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -827,7 +834,7 @@ void mult64(uint64_t m1,uint64_t m2) unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; int sign = 0; - + if (m1 < 0) { op2 = -m1; @@ -840,22 +847,22 @@ void mult64(uint64_t m1,uint64_t m2) sign = 1 - sign; } else op4 = m2; - + op1 = op2 & 0xFFFFFFFF; op2 = (op2 >> 32) & 0xFFFFFFFF; op3 = op4 & 0xFFFFFFFF; op4 = (op4 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); if (sign) @@ -871,25 +878,25 @@ void multu64(uint64_t m1,uint64_t m2) unsigned long long int op1, op2, op3, op4; unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; - + op1 = m1 & 0xFFFFFFFF; op2 = (m1 >> 32) & 0xFFFFFFFF; op3 = m2 & 0xFFFFFFFF; op4 = (m2 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - + //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); } @@ -1011,7 +1018,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -1107,7 +1114,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; #ifndef DISABLE_TLB @@ -3386,7 +3393,7 @@ void storelr_assemble(int i,struct regstat *i_regs) if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); gen_tlb_addr_w(temp,map); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif }else{ // using tlb @@ -4159,7 +4166,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int mgr=MGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4757,7 +4764,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -5194,7 +5201,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -5228,7 +5235,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -5254,7 +5261,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5275,10 +5282,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -5329,7 +5336,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5358,7 +5365,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -5492,7 +5499,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -5547,7 +5554,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -5619,7 +5626,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5749,7 +5756,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5932,7 +5939,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -6019,7 +6026,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -6267,7 +6274,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6775,14 +6782,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -7045,7 +7052,7 @@ static void provisional_32bit() int i,j; uint64_t is32=1; uint64_t lastbranch=1; - + for(i=0;i0) { @@ -7082,13 +7089,13 @@ static void provisional_32bit() uint64_t temp_is32=is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) //temp_is32&=branch_regs[j].is32; temp_is32&=p32[j]; } for(j=i;j=0;i--) { int hr; @@ -7373,7 +7380,7 @@ static void provisional_r32() } //requires_32bit[i]=r32; pr32[i]=r32; - + // Dirty registers which are 32-bit, require 32-bit input // as they will be written as 32-bit values for(hr=0;hristart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7974,17 +7981,28 @@ void new_dynarec_init() { SysPrintf("Init new dynarec\n"); out=(u_char *)BASE_ADDR; -#if BASE_ADDR_FIXED +#if defined(VITA) + if (mmap (out, 1< %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -8246,7 +8264,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -8906,7 +8924,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -8936,7 +8954,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0;j--) { - if(ba[j]==start+i*4+4) + if(ba[j]==start+i*4+4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9059,12 +9077,12 @@ int new_recompile_block(int addr) uint64_t temp_is32=current.is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4+8) + if(ba[j]==start+i*4+8) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9172,7 +9190,7 @@ int new_recompile_block(int addr) } } else { // First instruction expects CCREG to be allocated - if(i==0&&hr==HOST_CCREG) + if(i==0&&hr==HOST_CCREG) regs[i].regmap_entry[hr]=CCREG; else regs[i].regmap_entry[hr]=-1; @@ -9507,7 +9525,7 @@ int new_recompile_block(int addr) pagespan_alloc(¤t,i); break; } - + // Drop the upper half of registers that have become 32-bit current.uu|=current.is32&((1LL<=0;i--) { int hr; @@ -10065,7 +10083,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -10255,10 +10273,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -10428,10 +10446,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -10504,7 +10522,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -10546,7 +10564,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -10726,7 +10744,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -10901,7 +10919,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -10925,16 +10943,16 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ #ifndef FORCE32 provisional_r32(); u_int r32=0; - + for (i=slen-1;i>=0;i--) { int hr; @@ -11030,7 +11048,7 @@ int new_recompile_block(int addr) if((regs[i].was32>>dep2[i])&1) r32|=1LL<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -11618,9 +11636,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -11662,7 +11680,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From d9e6b42f224f10d8b6f2454da1f5d80735f8e0d6 Mon Sep 17 00:00:00 2001 From: frangarcj Date: Tue, 6 Sep 2016 13:54:38 +0200 Subject: (VITA) Another dynarec fix --- libpcsxcore/new_dynarec/new_dynarec.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1c0ab56..56c8a0b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7980,17 +7980,18 @@ void new_dynarec_clear_full() void new_dynarec_init() { SysPrintf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; -#if defined(VITA) - if (mmap (out, 1<>3)+256]|=1<<(page&7); } #ifdef __arm__ + #if defined(VITA) + sceKernelCloseVMDomain(); + #endif __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<buffer = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; custom_map->target_map = block; return custom_map->buffer; -- cgit v1.2.3 From 9b400da1a7d44efda870462121b3e36ab42e134b Mon Sep 17 00:00:00 2001 From: orbea Date: Tue, 6 Sep 2016 17:47:31 -0700 Subject: Use DEBUG=1 properly for debug builds --- Makefile | 6 ++++-- Makefile.libretro | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 89788e6..627ccf4 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,10 @@ # default stuff goes here, so that config can override TARGET ?= pcsx -CFLAGS += -Wall -ggdb -Iinclude -ffast-math -ifndef DEBUG +CFLAGS += -Wall -Iinclude -ffast-math +ifeq ($(DEBUG), 1) +CFLAGS += -O0 -ggdb -DOPENGL_DEBUG +else CFLAGS += -O2 -DNDEBUG endif CXXFLAGS += $(CFLAGS) diff --git a/Makefile.libretro b/Makefile.libretro index 79d71c5..01c1fcf 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -1,5 +1,7 @@ # Makefile for PCSX ReARMed (libretro) +DEBUG=0 + ifeq ($(platform),) platform = unix ifeq ($(shell uname -a),) -- cgit v1.2.3 From 0f97d2d8b4a9ce8aba382a922f13d3f6e32dadcb Mon Sep 17 00:00:00 2001 From: orbea Date: Thu, 8 Sep 2016 09:39:34 -0700 Subject: OPENGL_DEBUG is not needed --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 627ccf4..dc4e07d 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ TARGET ?= pcsx CFLAGS += -Wall -Iinclude -ffast-math ifeq ($(DEBUG), 1) -CFLAGS += -O0 -ggdb -DOPENGL_DEBUG +CFLAGS += -O0 -ggdb else CFLAGS += -O2 -DNDEBUG endif -- cgit v1.2.3 From f82bcc681184536aa6d3ee410b938671c1ba2ead Mon Sep 17 00:00:00 2001 From: twinaphex Date: Fri, 9 Sep 2016 07:19:14 +0200 Subject: redports backport --- Makefile.libretro | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile.libretro b/Makefile.libretro index 01c1fcf..4875aaf 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -24,7 +24,6 @@ CFLAGS ?= TARGET_NAME := pcsx_rearmed LIBZ := -lz LIBPTHREAD := -lpthread -LIBDL := -ldl MMAP_WIN32=0 EXTRA_LDFLAGS = -- cgit v1.2.3 From 71e490c5930e6e5f71d1f2d5165c3a801ac46be1 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 19:22:58 +0300 Subject: drc: drop heaps of dead code I've kept it around to keep the code similar to Ari64's version, so that it would be easier to merge back his fixes. However Mupen64plus has long reformatted the code and it kind of went different direction anyway, so there is no point to keep all this code now. --- libpcsxcore/new_dynarec/assem_arm.c | 1339 +-------------------------------- libpcsxcore/new_dynarec/assem_arm.h | 9 - libpcsxcore/new_dynarec/fpu.c | 394 ---------- libpcsxcore/new_dynarec/fpu.h | 74 -- libpcsxcore/new_dynarec/new_dynarec.c | 1303 +------------------------------- 5 files changed, 35 insertions(+), 3084 deletions(-) delete mode 100644 libpcsxcore/new_dynarec/fpu.c delete mode 100644 libpcsxcore/new_dynarec/fpu.h diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 708c8ae..c2f65ee 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -19,7 +19,6 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#ifdef PCSX #include "../gte.h" #define FLAGLESS #include "../gte.h" @@ -27,7 +26,6 @@ #include "../gte_arm.h" #include "../gte_neon.h" #include "pcnt.h" -#endif #include "arm_features.h" #if !BASE_ADDR_FIXED @@ -46,9 +44,6 @@ extern int pcaddr; extern int pending_exception; extern int branch_target; extern uint64_t readmem_dword; -#ifdef MUPEN64 -extern precomp_instr fake_pc; -#endif extern void *dynarec_local; extern u_int memory_map[1048576]; extern u_int mini_ht[32][2]; @@ -119,8 +114,6 @@ const u_int invalidate_addr_reg[16] = { 0, 0}; -#include "fpu.h" - unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; /* Linker */ @@ -266,18 +259,6 @@ int verify_dirty(int addr) #endif if((*ptr&0xFF000000)!=0xeb000000) ptr++; assert((*ptr&0xFF000000)==0xeb000000); // bl instruction -#ifndef DISABLE_TLB - u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl - if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) { - unsigned int page=source>>12; - unsigned int map_value=memory_map[page]; - if(map_value>=0x80000000) return 0; - while(page<((source+len-1)>>12)) { - if((memory_map[++page]<<2)!=(map_value<<2)) return 0; - } - source = source+(map_value<<2); - } -#endif //printf("verify_dirty: %x %x %x\n",source,copy,len); return !memcmp((void *)source,(void *)copy,len); } @@ -322,13 +303,6 @@ void get_bounds(int addr,u_int *start,u_int *end) #endif if((*ptr&0xFF000000)!=0xeb000000) ptr++; assert((*ptr&0xFF000000)==0xeb000000); // bl instruction -#ifndef DISABLE_TLB - u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl - if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) { - if(memory_map[source>>12]>=0x80000000) source = 0; - else source = source+(memory_map[source>>12]<<2); - } -#endif *start=source; *end=source+len; } @@ -1037,13 +1011,11 @@ void emit_pcreladdr(u_int rt) void emit_loadreg(int r, int hr) { -#ifdef FORCE32 if(r&64) { SysPrintf("64bit load in 32bit mode!\n"); assert(0); return; } -#endif if((r&63)==0) emit_zeroreg(hr); else { @@ -1062,13 +1034,11 @@ void emit_loadreg(int r, int hr) } void emit_storereg(int r, int hr) { -#ifdef FORCE32 if(r&64) { SysPrintf("64bit store in 32bit mode!\n"); assert(0); return; } -#endif int addr=((int)reg)+((r&63)<>4); if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); @@ -2671,12 +2641,6 @@ void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i) emit_movimm(value,HOST_TEMPREG); } emit_storereg(i_regmap[hr],HOST_TEMPREG); -#ifndef FORCE32 - if((i_is32>>i_regmap[hr])&1) { - if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - } -#endif } } } @@ -2832,10 +2796,8 @@ static void mov_loadtype_adj(int type,int rs,int rt) } } -#ifdef PCSX #include "pcsxmem.h" #include "pcsxmem_inline.c" -#endif do_readstub(int n) { @@ -2859,7 +2821,6 @@ do_readstub(int n) rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); -#ifdef PCSX int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; reglist|=(1<=0); - int ftable=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - ftable=(int)readmemb; - if(type==LOADH_STUB||type==LOADHU_STUB) - ftable=(int)readmemh; - if(type==LOADW_STUB) - ftable=(int)readmem; -#ifndef FORCE32 - if(type==LOADD_STUB) - ftable=(int)readmemd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif - //emit_readword((int)&last_count,temp); - //emit_add(cc,temp,cc); - //emit_writeword(cc,(int)&Count); - //emit_mov(15,14); - emit_call((int)&indirect_jump_indexed); - //emit_callreg(rs); - //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15); -#ifndef PCSX - // We really shouldn't need to update the count here, - // but not doing so causes random crashes... - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } -#endif - //emit_popa(); - restore_regs(reglist); - //if((cc=get_reg(regmap,CCREG))>=0) { - // emit_loadreg(CCREG,cc); - //} - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { - assert(rt>=0); - if(type==LOADB_STUB) - emit_movsbl((int)&readmem_dword,rt); - if(type==LOADBU_STUB) - emit_movzbl((int)&readmem_dword,rt); - if(type==LOADH_STUB) - emit_movswl((int)&readmem_dword,rt); - if(type==LOADHU_STUB) - emit_movzwl((int)&readmem_dword,rt); - if(type==LOADW_STUB) - emit_readword((int)&readmem_dword,rt); - if(type==LOADD_STUB) { - emit_readword((int)&readmem_dword,rt); - if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); - } - } - emit_jmp(stubs[n][2]); // return address -#endif // !PCSX } -#ifdef PCSX // return memhandler, or get directly accessable address and return 0 u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) { @@ -3028,7 +2908,6 @@ u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) return l2<<1; } } -#endif inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { @@ -3037,7 +2916,6 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); -#ifdef PCSX u_int handler,host_addr=0,is_dynamic,far_call=0; int cc=get_reg(regmap,CCREG); if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) @@ -3111,87 +2989,6 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i } } restore_regs(reglist); -#else // if !PCSX - int ftable=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - ftable=(int)readmemb; - if(type==LOADH_STUB||type==LOADHU_STUB) - ftable=(int)readmemh; - if(type==LOADW_STUB) - ftable=(int)readmem; -#ifndef FORCE32 - if(type==LOADD_STUB) - ftable=(int)readmemd; -#endif - assert(ftable!=0); - if(target==0) - emit_movimm(addr,rs); - emit_writeword(rs,(int)&address); - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Theoretically we can have a pagefault here, if the TLB has never - // been enabled and the address is outside the range 80000000..BFFFFFFF - // Write out the registers so the pagefault can be handled. This is - // a very rare case and likely represents a bug. - int ds=regmap!=regs[i].regmap; - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); - if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); - else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); - } -#endif - //emit_shrimm(rs,16,1); - int cc=get_reg(regmap,CCREG); - if(cc<0) { - emit_loadreg(CCREG,2); - } - //emit_movimm(ftable,0); - emit_movimm(((u_int *)ftable)[addr>>16],0); - //emit_readword((int)&last_count,12); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Pagefault address - int ds=regmap!=regs[i].regmap; - emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); - } -#endif - //emit_add(12,2,2); - //emit_writeword(2,(int)&Count); - //emit_call(((u_int *)ftable)[addr>>16]); - emit_call((int)&indirect_jump); -#ifndef PCSX - // We really shouldn't need to update the count here, - // but not doing so causes random crashes... - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } -#endif - //emit_popa(); - restore_regs(reglist); - if(rt>=0) { - if(type==LOADB_STUB) - emit_movsbl((int)&readmem_dword,rt); - if(type==LOADBU_STUB) - emit_movzbl((int)&readmem_dword,rt); - if(type==LOADH_STUB) - emit_movswl((int)&readmem_dword,rt); - if(type==LOADHU_STUB) - emit_movzwl((int)&readmem_dword,rt); - if(type==LOADW_STUB) - emit_readword((int)&readmem_dword,rt); - if(type==LOADD_STUB) { - emit_readword((int)&readmem_dword,rt); - if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); - } - } -#endif // !PCSX } do_writestub(int n) @@ -3217,7 +3014,6 @@ do_writestub(int n) } assert(rs>=0); assert(rt>=0); -#ifdef PCSX int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; int reglist2=reglist|(1<=0); - int ftable=0; - if(type==STOREB_STUB) - ftable=(int)writememb; - if(type==STOREH_STUB) - ftable=(int)writememh; - if(type==STOREW_STUB) - ftable=(int)writemem; -#ifndef FORCE32 - if(type==STORED_STUB) - ftable=(int)writememd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_shrimm(rs,16,rs); - //emit_movmem_indexedx4(ftable,rs,rs); - if(type==STOREB_STUB) - emit_writebyte(rt,(int)&byte); - if(type==STOREH_STUB) - emit_writehword(rt,(int)&hword); - if(type==STOREW_STUB) - emit_writeword(rt,(int)&word); - if(type==STORED_STUB) { -#ifndef FORCE32 - emit_writeword(rt,(int)&dword); - emit_writeword(r?rth:rt,(int)&dword+4); -#else - SysPrintf("STORED_STUB\n"); -#endif - } - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - int real_rs=get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif - //emit_readword((int)&last_count,temp); - //emit_addimm(cc,2*stubs[n][5]+2,cc); - //emit_add(cc,temp,cc); - //emit_writeword(cc,(int)&Count); - emit_call((int)&indirect_jump_indexed); - //emit_callreg(rs); - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } - //emit_popa(); - restore_regs(reglist); - //if((cc=get_reg(regmap,CCREG))>=0) { - // emit_loadreg(CCREG,cc); - //} - emit_jmp(stubs[n][2]); // return address -#endif // !PCSX } inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) @@ -3359,7 +3082,6 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); -#ifdef PCSX u_int handler,host_addr=0; handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); if (handler==0) { @@ -3388,82 +3110,6 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); -#else // if !pcsx - int ftable=0; - if(type==STOREB_STUB) - ftable=(int)writememb; - if(type==STOREH_STUB) - ftable=(int)writememh; - if(type==STOREW_STUB) - ftable=(int)writemem; -#ifndef FORCE32 - if(type==STORED_STUB) - ftable=(int)writememd; -#endif - assert(ftable!=0); - emit_writeword(rs,(int)&address); - //emit_shrimm(rs,16,rs); - //emit_movmem_indexedx4(ftable,rs,rs); - if(type==STOREB_STUB) - emit_writebyte(rt,(int)&byte); - if(type==STOREH_STUB) - emit_writehword(rt,(int)&hword); - if(type==STOREW_STUB) - emit_writeword(rt,(int)&word); - if(type==STORED_STUB) { -#ifndef FORCE32 - emit_writeword(rt,(int)&dword); - emit_writeword(target?rth:rt,(int)&dword+4); -#else - SysPrintf("STORED_STUB\n"); -#endif - } - //emit_pusha(); - save_regs(reglist); -#ifndef PCSX - // rearmed note: load_all_consts prevents BIOS boot, some bug? - if((signed int)addr>=(signed int)0xC0000000) { - // Theoretically we can have a pagefault here, if the TLB has never - // been enabled and the address is outside the range 80000000..BFFFFFFF - // Write out the registers so the pagefault can be handled. This is - // a very rare case and likely represents a bug. - int ds=regmap!=regs[i].regmap; - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); - if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); - else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); - } -#endif - //emit_shrimm(rs,16,1); - int cc=get_reg(regmap,CCREG); - if(cc<0) { - emit_loadreg(CCREG,2); - } - //emit_movimm(ftable,0); - emit_movimm(((u_int *)ftable)[addr>>16],0); - //emit_readword((int)&last_count,12); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); -#ifndef PCSX - if((signed int)addr>=(signed int)0xC0000000) { - // Pagefault address - int ds=regmap!=regs[i].regmap; - emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); - } -#endif - //emit_add(12,2,2); - //emit_writeword(2,(int)&Count); - //emit_call(((u_int *)ftable)[addr>>16]); - emit_call((int)&indirect_jump); - emit_readword((int)&Count,HOST_TEMPREG); - emit_readword((int)&next_interupt,2); - emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG); - emit_writeword(2,(int)&last_count); - emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc); - if(cc<0) { - emit_storereg(CCREG,HOST_TEMPREG); - } - //emit_popa(); - restore_regs(reglist); -#endif } do_unalignedwritestub(int n) @@ -3506,14 +3152,6 @@ do_unalignedwritestub(int n) emit_writeword(temp2,(int)&address); save_regs(reglist); -#ifndef PCSX - ds=i_regs!=®s[i]; - real_rs=get_reg(i_regmap,rs1[i]); - u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); - if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1)<<1)+ds,3); -#endif emit_call((int)&indirect_jump_indexed); restore_regs(reglist); @@ -3587,10 +3221,7 @@ do_invstub(int n) int do_dirty_stub(int i) { assem_debug("do_dirty_stub %x\n",start+i*4); - u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start; - #ifdef PCSX - addr=(u_int)source; - #endif + u_int addr=(u_int)source; // Careful about the code output here, verify_dirty needs to parse it. #ifndef HAVE_ARMV7 emit_loadlp(addr,1); @@ -3651,93 +3282,6 @@ do_cop1stub(int n) emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); } -#ifndef DISABLE_TLB - -/* TLB */ - -int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr) -{ - if(c) { - if((signed int)addr>=(signed int)0xC0000000) { - // address_generation already loaded the const - emit_readword_dualindexedx4(FP,map,map); - } - else - return -1; // No mapping - } - else { - assert(s!=map); - emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map); - emit_addsr12(map,s,map); - // Schedule this while we wait on the load - //if(x) emit_xorimm(s,x,ar); - if(shift>=0) emit_shlimm(s,3,shift); - if(~a) emit_andimm(s,a,ar); - emit_readword_dualindexedx4(FP,map,map); - } - return map; -} -int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr) -{ - if(!c||(signed int)addr>=(signed int)0xC0000000) { - emit_test(map,map); - *jaddr=(int)out; - emit_js(0); - } - return map; -} - -int gen_tlb_addr_r(int ar, int map) { - if(map>=0) { - assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]); - output_w32(0xe0800100|rd_rn_rm(ar,ar,map)); - } -} - -int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr) -{ - if(c) { - if(addr<0x80800000||addr>=0xC0000000) { - // address_generation already loaded the const - emit_readword_dualindexedx4(FP,map,map); - } - else - return -1; // No mapping - } - else { - assert(s!=map); - emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map); - emit_addsr12(map,s,map); - // Schedule this while we wait on the load - //if(x) emit_xorimm(s,x,ar); - emit_readword_dualindexedx4(FP,map,map); - } - return map; -} -int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr) -{ - if(!c||addr<0x80800000||addr>=0xC0000000) { - emit_testimm(map,0x40000000); - *jaddr=(int)out; - emit_jne(0); - } -} - -int gen_tlb_addr_w(int ar, int map) { - if(map>=0) { - assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]); - output_w32(0xe0800100|rd_rn_rm(ar,ar,map)); - } -} - -// Generate the address of the memory_map entry, relative to dynarec_local -generate_map_const(u_int addr,int reg) { - //printf("generate_map_const(%x,%s)\n",addr,regname[reg]); - emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg); -} - -#else - static int do_tlb_r(int a, ...) { return 0; } static int do_tlb_r_branch(int a, ...) { return 0; } static int gen_tlb_addr_r(int a, ...) { return 0; } @@ -3745,8 +3289,6 @@ static int do_tlb_w(int a, ...) { return 0; } static int do_tlb_w_branch(int a, ...) { return 0; } static int gen_tlb_addr_w(int a, ...) { return 0; } -#endif // DISABLE_TLB - /* Special assem */ void shift_assemble_arm(int i,struct regstat *i_regs) @@ -3859,7 +3401,6 @@ void shift_assemble_arm(int i,struct regstat *i_regs) } } -#ifdef PCSX static void speculate_mov(int rs,int rt) { if(rt!=0) { @@ -3977,13 +3518,10 @@ static int get_ptr_mem_type(u_int a) return MTYPE_A000; return MTYPE_8000; } -#endif static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { int jaddr,type=0; - -#ifdef PCSX int mr=rs1[i]; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -4022,7 +3560,6 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) type=0; } } -#endif if(type==0) { @@ -4112,7 +3649,6 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) map=get_reg(i_regs->regmap,TLREG); assert(map>=0); reglist&=~(1<>11)&0x1f; //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&rt1[i]!=0) { -#ifdef MUPEN64 - emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0); - emit_movimm((source[i]>>11)&0x1f,1); - emit_writeword(0,(int)&PC); - emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); - if(copr==9) { - emit_readword((int)&last_count,ECX); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - } - emit_call((int)MFC0); - emit_readword((int)&readmem_dword,t); -#else emit_readword((int)®_cop0+copr*4,t); -#endif } } else if(opcode2[i]==4) // MTC0 @@ -4231,16 +3750,7 @@ void cop0_assemble(int i,struct regstat *i_regs) signed char s=get_reg(i_regs->regmap,rs1[i]); char copr=(source[i]>>11)&0x1f; assert(s>=0); -#ifdef MUPEN64 - emit_writeword(s,(int)&readmem_dword); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); - emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0); - emit_movimm((source[i]>>11)&0x1f,1); - emit_writeword(0,(int)&PC); - emit_writebyte(1,(int)&(fake_pc.f.r.nrd)); -#else wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); -#endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&last_count,HOST_TEMPREG); emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc @@ -4253,7 +3763,6 @@ void cop0_assemble(int i,struct regstat *i_regs) // The interrupt must be taken immediately, because a subsequent // instruction might disable interrupts again. if(copr==12||copr==13) { -#ifdef PCSX if (is_delayslot) { // burn cycles to cause cc_interrupt, which will // reschedule next_interupt. Relies on CCREG from above. @@ -4267,7 +3776,6 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_loadreg(rs1[i],s); return; } -#endif emit_movimm(start+i*4+4,HOST_TEMPREG); emit_writeword(HOST_TEMPREG,(int)&pcaddr); emit_movimm(0,HOST_TEMPREG); @@ -4275,16 +3783,12 @@ void cop0_assemble(int i,struct regstat *i_regs) } //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); //else -#ifdef PCSX if(s==HOST_CCREG) emit_loadreg(rs1[i],1); else if(s!=1) emit_mov(s,1); emit_movimm(copr,0); emit_call((int)pcsx_mtc0); -#else - emit_call((int)MTC0); -#endif if(copr==9||copr==11||copr==12||copr==13) { emit_readword((int)&Count,HOST_CCREG); emit_readword((int)&next_interupt,HOST_TEMPREG); @@ -4307,25 +3811,6 @@ void cop0_assemble(int i,struct regstat *i_regs) else { assert(opcode2[i]==0x10); -#ifndef DISABLE_TLB - if((source[i]&0x3f)==0x01) // TLBR - emit_call((int)TLBR); - if((source[i]&0x3f)==0x02) // TLBWI - emit_call((int)TLBWI_new); - if((source[i]&0x3f)==0x06) { // TLBWR - // The TLB entry written by TLBWR is dependent on the count, - // so update the cycle count - emit_readword((int)&last_count,ECX); - if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - emit_call((int)TLBWR_new); - } - if((source[i]&0x3f)==0x08) // TLBP - emit_call((int)TLBP); -#endif -#ifdef PCSX if((source[i]&0x3f)==0x10) // RFE { emit_readword((int)&Status,0); @@ -4334,15 +3819,6 @@ void cop0_assemble(int i,struct regstat *i_regs) emit_orrshr_imm(1,2,0); emit_writeword(0,(int)&Status); } -#else - if((source[i]&0x3f)==0x18) // ERET - { - int count=ccadj[i]; - if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here? - emit_jmp((int)jump_eret); - } -#endif } } @@ -4667,624 +4143,23 @@ void cop1_unusable(int i,struct regstat *i_regs) void cop1_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - if (opcode2[i]==0) { // MFC1 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl); - emit_readword_indexed(0,tl,tl); - } - } - else if (opcode2[i]==1) { // DMFC1 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - signed char th=get_reg(i_regs->regmap,rt1[i]|64); - if(tl>=0) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl); - if(th>=0) emit_readword_indexed(4,tl,th); - emit_readword_indexed(0,tl,tl); - } - } - else if (opcode2[i]==4) { // MTC1 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - signed char temp=get_reg(i_regs->regmap,-1); - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_writeword_indexed(sl,0,temp); - } - else if (opcode2[i]==5) { // DMTC1 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl; - signed char temp=get_reg(i_regs->regmap,-1); - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_writeword_indexed(sh,4,temp); - emit_writeword_indexed(sl,0,temp); - } - else if (opcode2[i]==2) // CFC1 - { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0) { - u_int copr=(source[i]>>11)&0x1f; - if(copr==0) emit_readword((int)&FCR0,tl); - if(copr==31) emit_readword((int)&FCR31,tl); - } - } - else if (opcode2[i]==6) // CTC1 - { - signed char sl=get_reg(i_regs->regmap,rs1[i]); - u_int copr=(source[i]>>11)&0x1f; - assert(sl>=0); - if(copr==31) - { - emit_writeword(sl,(int)&FCR31); - // Set the rounding mode - //FIXME - //char temp=get_reg(i_regs->regmap,-1); - //emit_andimm(sl,3,temp); - //emit_fldcw_indexed((int)&rounding_modes,temp); - } - } -#else cop1_unusable(i, i_regs); -#endif } void fconv_assemble_arm(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,15); - emit_ftosizs(15,15); // float->int, truncate - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsts(15,temp); - return; - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - emit_ftosizd(7,13); // double->int, truncate - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsts(13,temp); - return; - } - - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fsitos(13,15); - emit_fsts(15,temp); - return; - } - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - emit_fsitod(13,7); - emit_vstr(7,temp); - return; - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,13); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - emit_fcvtds(13,7); - emit_vstr(7,temp); - return; - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - emit_fcvtsd(7,13); - emit_fsts(13,temp); - return; - } - #endif - - // C emulation code - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_w); - } - if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_w); - } - if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_l); - } - if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_l); - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_d_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_l_s); - } - - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_s_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)cvt_l_d); - } - - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_l_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_w_s); - } - if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_w_s); - } - - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_l_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)round_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)trunc_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)ceil_w_d); - } - if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - emit_call((int)floor_w_d); - } - - restore_regs(reglist); -#else cop1_unusable(i, i_regs); -#endif } #define fconv_assemble fconv_assemble_arm void fcomp_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char fs=get_reg(i_regs->regmap,FSREG); - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char cs=get_reg(i_regs->regmap,CSREG); - assert(cs>=0); - emit_testimm(cs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - if((source[i]&0x3f)==0x30) { - emit_andimm(fs,~0x800000,fs); - return; - } - - if((source[i]&0x3e)==0x38) { - // sf/ngle - these should throw exceptions for NaNs - emit_andimm(fs,~0x800000,fs); - return; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_orimm(fs,0x800000,fs); - emit_flds(temp,14); - emit_flds(HOST_TEMPREG,15); - emit_fcmps(14,15); - emit_fmstat(); - if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s - if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s - if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s - if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s - if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s - if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s - if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s - if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s - if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s - if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s - if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s - if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s - if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s - return; - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_orimm(fs,0x800000,fs); - emit_vldr(temp,6); - emit_vldr(HOST_TEMPREG,7); - emit_fcmpd(6,7); - emit_fmstat(); - if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d - if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d - if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d - if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d - if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d - if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d - if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d - if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d - if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d - if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d - if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d - if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d - if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d - return; - } - #endif - - // C only - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG); - if((source[i]&0x3f)==0x30) emit_call((int)c_f_s); - if((source[i]&0x3f)==0x31) emit_call((int)c_un_s); - if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s); - if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s); - if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s); - if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s); - if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s); - if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s); - if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s); - if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s); - if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s); - if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s); - if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s); - if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s); - if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s); - if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],ARG2_REG); - if((source[i]&0x3f)==0x30) emit_call((int)c_f_d); - if((source[i]&0x3f)==0x31) emit_call((int)c_un_d); - if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d); - if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d); - if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d); - if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d); - if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d); - if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d); - if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d); - if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d); - if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d); - if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d); - if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d); - if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d); - if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d); - if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d); - } - restore_regs(reglist); - emit_loadreg(FSREG,fs); -#else cop1_unusable(i, i_regs); -#endif } void float_assemble(int i,struct regstat *i_regs) { -#ifndef DISABLE_COP1 - signed char temp=get_reg(i_regs->regmap,-1); - assert(temp>=0); - // Check cop1 unusable - if(!cop1_usable) { - signed char cs=get_reg(i_regs->regmap,CSREG); - assert(cs>=0); - emit_testimm(cs,0x20000000); - int jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - - #if(defined(__VFP_FP__) && !defined(__SOFTFP__)) - if((source[i]&0x3f)==6) // mov - { - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG); - emit_readword_indexed(0,temp,temp); - emit_writeword_indexed(temp,0,HOST_TEMPREG); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG); - emit_vldr(temp,7); - emit_vstr(7,HOST_TEMPREG); - } - } - return; - } - - if((source[i]&0x3f)>3) - { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - emit_flds(temp,15); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==4) // sqrt - emit_fsqrts(15,15); - if((source[i]&0x3f)==5) // abs - emit_fabss(15,15); - if((source[i]&0x3f)==7) // neg - emit_fnegs(15,15); - emit_fsts(15,temp); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - emit_vldr(temp,7); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==4) // sqrt - emit_fsqrtd(7,7); - if((source[i]&0x3f)==5) // abs - emit_fabsd(7,7); - if((source[i]&0x3f)==7) // neg - emit_fnegd(7,7); - emit_vstr(7,temp); - } - return; - } - if((source[i]&0x3f)<4) - { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp); - } - if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp); - } - if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) { - if(opcode2[i]==0x10) { - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_flds(temp,15); - emit_flds(HOST_TEMPREG,13); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - } - if((source[i]&0x3f)==0) emit_fadds(15,13,15); - if((source[i]&0x3f)==1) emit_fsubs(15,13,15); - if((source[i]&0x3f)==2) emit_fmuls(15,13,15); - if((source[i]&0x3f)==3) emit_fdivs(15,13,15); - if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) { - emit_fsts(15,HOST_TEMPREG); - }else{ - emit_fsts(15,temp); - } - } - else if(opcode2[i]==0x11) { - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG); - emit_vldr(temp,7); - emit_vldr(HOST_TEMPREG,6); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - } - if((source[i]&0x3f)==0) emit_faddd(7,6,7); - if((source[i]&0x3f)==1) emit_fsubd(7,6,7); - if((source[i]&0x3f)==2) emit_fmuld(7,6,7); - if((source[i]&0x3f)==3) emit_fdivd(7,6,7); - if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) { - emit_vstr(7,HOST_TEMPREG); - }else{ - emit_vstr(7,temp); - } - } - } - else { - if(opcode2[i]==0x10) { - emit_flds(temp,15); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==0) emit_fadds(15,15,15); - if((source[i]&0x3f)==1) emit_fsubs(15,15,15); - if((source[i]&0x3f)==2) emit_fmuls(15,15,15); - if((source[i]&0x3f)==3) emit_fdivs(15,15,15); - emit_fsts(15,temp); - } - else if(opcode2[i]==0x11) { - emit_vldr(temp,7); - if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) { - emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp); - } - if((source[i]&0x3f)==0) emit_faddd(7,7,7); - if((source[i]&0x3f)==1) emit_fsubd(7,7,7); - if((source[i]&0x3f)==2) emit_fmuld(7,7,7); - if((source[i]&0x3f)==3) emit_fdivd(7,7,7); - emit_vstr(7,temp); - } - } - return; - } - #endif - - u_int hr,reglist=0; - for(hr=0;hrregmap[hr]>=0) reglist|=1<>11)&0x1f],ARG1_REG); - if((source[i]&0x3f)<4) { - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG); - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG); - }else{ - emit_readword((int)®_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG); - } - switch(source[i]&0x3f) - { - case 0x00: emit_call((int)add_s);break; - case 0x01: emit_call((int)sub_s);break; - case 0x02: emit_call((int)mul_s);break; - case 0x03: emit_call((int)div_s);break; - case 0x04: emit_call((int)sqrt_s);break; - case 0x05: emit_call((int)abs_s);break; - case 0x06: emit_call((int)mov_s);break; - case 0x07: emit_call((int)neg_s);break; - } - restore_regs(reglist); - } - if(opcode2[i]==0x11) { // Double precision - save_regs(reglist); - emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],ARG1_REG); - if((source[i]&0x3f)<4) { - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],ARG2_REG); - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG); - }else{ - emit_readword((int)®_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG); - } - switch(source[i]&0x3f) - { - case 0x00: emit_call((int)add_d);break; - case 0x01: emit_call((int)sub_d);break; - case 0x02: emit_call((int)mul_d);break; - case 0x03: emit_call((int)div_d);break; - case 0x04: emit_call((int)sqrt_d);break; - case 0x05: emit_call((int)abs_d);break; - case 0x06: emit_call((int)mov_d);break; - case 0x07: emit_call((int)neg_d);break; - } - restore_regs(reglist); - } -#else cop1_unusable(i, i_regs); -#endif } void multdiv_assemble_arm(int i,struct regstat *i_regs) @@ -5397,183 +4272,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) } } else // 64-bit -#ifndef FORCE32 - { - if(opcode2[i]==0x1C) // DMULT - { - assert(opcode2[i]!=0x1C); - signed char m1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char m1l=get_reg(i_regs->regmap,rs1[i]); - signed char m2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char m2l=get_reg(i_regs->regmap,rs2[i]); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - emit_pushreg(m2h); - emit_pushreg(m2l); - emit_pushreg(m1h); - emit_pushreg(m1l); - emit_call((int)&mult64); - emit_popreg(m1l); - emit_popreg(m1h); - emit_popreg(m2l); - emit_popreg(m2h); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1D) // DMULTU - { - signed char m1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char m1l=get_reg(i_regs->regmap,rs1[i]); - signed char m2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char m2l=get_reg(i_regs->regmap,rs2[i]); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - save_regs(CALLER_SAVE_REGS); - if(m1l!=0) emit_mov(m1l,0); - if(m1h==0) emit_readword((int)&dynarec_local,1); - else if(m1h>1) emit_mov(m1h,1); - if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2); - else if(m2l>2) emit_mov(m2l,2); - if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3); - else if(m2h>3) emit_mov(m2h,3); - emit_call((int)&multu64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - /*signed char temp=get_reg(i_regs->regmap,-1); - signed char rh=get_reg(i_regs->regmap,HIREG|64); - signed char rl=get_reg(i_regs->regmap,HIREG); - assert(m1h>=0); - assert(m2h>=0); - assert(m1l>=0); - assert(m2l>=0); - assert(temp>=0); - //emit_mov(m1l,EAX); - //emit_mul(m2l); - emit_umull(rl,rh,m1l,m2l); - emit_storereg(LOREG,rl); - emit_mov(rh,temp); - //emit_mov(m1h,EAX); - //emit_mul(m2l); - emit_umull(rl,rh,m1h,m2l); - emit_adds(rl,temp,temp); - emit_adcimm(rh,0,rh); - emit_storereg(HIREG,rh); - //emit_mov(m2h,EAX); - //emit_mul(m1l); - emit_umull(rl,rh,m1l,m2h); - emit_adds(rl,temp,temp); - emit_adcimm(rh,0,rh); - emit_storereg(LOREG|64,temp); - emit_mov(rh,temp); - //emit_mov(m2h,EAX); - //emit_mul(m1h); - emit_umull(rl,rh,m1h,m2h); - emit_adds(rl,temp,rl); - emit_loadreg(HIREG,temp); - emit_adcimm(rh,0,rh); - emit_adds(rl,temp,rl); - emit_adcimm(rh,0,rh); - // DEBUG - /* - emit_pushreg(m2h); - emit_pushreg(m2l); - emit_pushreg(m1h); - emit_pushreg(m1l); - emit_call((int)&multu64); - emit_popreg(m1l); - emit_popreg(m1h); - emit_popreg(m2l); - emit_popreg(m2h); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG - if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG - */ - // Shouldn't be necessary - //char loh=get_reg(i_regs->regmap,LOREG|64); - //char lol=get_reg(i_regs->regmap,LOREG); - //if(loh>=0) emit_loadreg(LOREG|64,loh); - //if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1E) // DDIV - { - signed char d1h=get_reg(i_regs->regmap,rs1[i]|64); - signed char d1l=get_reg(i_regs->regmap,rs1[i]); - signed char d2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char d2l=get_reg(i_regs->regmap,rs2[i]); - assert(d1h>=0); - assert(d2h>=0); - assert(d1l>=0); - assert(d2l>=0); - save_regs(CALLER_SAVE_REGS); - if(d1l!=0) emit_mov(d1l,0); - if(d1h==0) emit_readword((int)&dynarec_local,1); - else if(d1h>1) emit_mov(d1h,1); - if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2); - else if(d2l>2) emit_mov(d2l,2); - if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); - else if(d2h>3) emit_mov(d2h,3); - emit_call((int)&div64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - if(opcode2[i]==0x1F) // DDIVU - { - //u_int hr,reglist=0; - //for(hr=0;hrregmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<regmap,rs1[i]|64); - signed char d1l=get_reg(i_regs->regmap,rs1[i]); - signed char d2h=get_reg(i_regs->regmap,rs2[i]|64); - signed char d2l=get_reg(i_regs->regmap,rs2[i]); - assert(d1h>=0); - assert(d2h>=0); - assert(d1l>=0); - assert(d2l>=0); - save_regs(CALLER_SAVE_REGS); - if(d1l!=0) emit_mov(d1l,0); - if(d1h==0) emit_readword((int)&dynarec_local,1); - else if(d1h>1) emit_mov(d1h,1); - if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2); - else if(d2l>2) emit_mov(d2l,2); - if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); - else if(d2h>3) emit_mov(d2h,3); - emit_call((int)&divu64); - restore_regs(CALLER_SAVE_REGS); - signed char hih=get_reg(i_regs->regmap,HIREG|64); - signed char hil=get_reg(i_regs->regmap,HIREG); - signed char loh=get_reg(i_regs->regmap,LOREG|64); - signed char lol=get_reg(i_regs->regmap,LOREG); - if(hih>=0) emit_loadreg(HIREG|64,hih); - if(hil>=0) emit_loadreg(HIREG,hil); - if(loh>=0) emit_loadreg(LOREG|64,loh); - if(lol>=0) emit_loadreg(LOREG,lol); - } - } -#else - assert(0); -#endif + assert(0); } else { @@ -5634,32 +4333,6 @@ void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -// Sign-extend to 64 bits and write out upper half of a register -// This is useful where we have a 32-bit value in a register, and want to -// keep it in a 32-bit register, but can't guarantee that it won't be read -// as a 64-bit value later. -void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu) -{ -#ifndef FORCE32 - if(is32_pre==is32) return; - int hr,reg; - for(hr=0;hr=0) { - if((dirty>>hr)&1) { - if( ((is32_pre&~is32&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } - } - } - //} - } - } -#endif -} - void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; @@ -5777,13 +4450,7 @@ void do_clear_cache() } // CPU-architecture-specific initialization -void arch_init() { -#ifndef DISABLE_COP1 - rounding_modes[0]=0x0<<22; // round - rounding_modes[1]=0x3<<22; // trunc - rounding_modes[2]=0x1<<22; // ceil - rounding_modes[3]=0x2<<22; // floor -#endif +static void arch_init() { } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 2254638..2d10ac7 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -9,11 +9,6 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 #define HAVE_CONDITIONAL_CALL 1 -#define DISABLE_TLB 1 -//#define MUPEN64 -#define FORCE32 1 -#define DISABLE_COP1 1 -#define PCSX 1 #define RAM_SIZE 0x200000 #ifndef __ARM_ARCH_7A__ @@ -25,11 +20,7 @@ #define BASE_ADDR_FIXED 0 #endif -#ifdef FORCE32 #define REG_SHIFT 2 -#else -#define REG_SHIFT 3 -#endif /* ARM calling convention: r0-r3, r12: caller-save diff --git a/libpcsxcore/new_dynarec/fpu.c b/libpcsxcore/new_dynarec/fpu.c deleted file mode 100644 index a189a53..0000000 --- a/libpcsxcore/new_dynarec/fpu.c +++ /dev/null @@ -1,394 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus - fpu.c * - * Copyright (C) 2010 Ari64 * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include - -extern int FCR0, FCR31; - -void cvt_s_w(int *source,float *dest) -{ - *dest = *source; -} -void cvt_d_w(int *source,double *dest) -{ - *dest = *source; -} -void cvt_s_l(long long *source,float *dest) -{ - *dest = *source; -} -void cvt_d_l(long long *source,double *dest) -{ - *dest = *source; -} -void cvt_d_s(float *source,double *dest) -{ - *dest = *source; -} -void cvt_s_d(double *source,float *dest) -{ - *dest = *source; -} - -void round_l_s(float *source,long long *dest) -{ - *dest = roundf(*source); -} -void round_w_s(float *source,int *dest) -{ - *dest = roundf(*source); -} -void trunc_l_s(float *source,long long *dest) -{ - *dest = truncf(*source); -} -void trunc_w_s(float *source,int *dest) -{ - *dest = truncf(*source); -} -void ceil_l_s(float *source,long long *dest) -{ - *dest = ceilf(*source); -} -void ceil_w_s(float *source,int *dest) -{ - *dest = ceilf(*source); -} -void floor_l_s(float *source,long long *dest) -{ - *dest = floorf(*source); -} -void floor_w_s(float *source,int *dest) -{ - *dest = floorf(*source); -} - -void round_l_d(double *source,long long *dest) -{ - *dest = round(*source); -} -void round_w_d(double *source,int *dest) -{ - *dest = round(*source); -} -void trunc_l_d(double *source,long long *dest) -{ - *dest = trunc(*source); -} -void trunc_w_d(double *source,int *dest) -{ - *dest = trunc(*source); -} -void ceil_l_d(double *source,long long *dest) -{ - *dest = ceil(*source); -} -void ceil_w_d(double *source,int *dest) -{ - *dest = ceil(*source); -} -void floor_l_d(double *source,long long *dest) -{ - *dest = floor(*source); -} -void floor_w_d(double *source,int *dest) -{ - *dest = floor(*source); -} - -void cvt_w_s(float *source,int *dest) -{ - switch(FCR31&3) - { - case 0: round_w_s(source,dest);return; - case 1: trunc_w_s(source,dest);return; - case 2: ceil_w_s(source,dest);return; - case 3: floor_w_s(source,dest);return; - } -} -void cvt_w_d(double *source,int *dest) -{ - switch(FCR31&3) - { - case 0: round_w_d(source,dest);return; - case 1: trunc_w_d(source,dest);return; - case 2: ceil_w_d(source,dest);return; - case 3: floor_w_d(source,dest);return; - } -} -void cvt_l_s(float *source,long long *dest) -{ - switch(FCR31&3) - { - case 0: round_l_s(source,dest);return; - case 1: trunc_l_s(source,dest);return; - case 2: ceil_l_s(source,dest);return; - case 3: floor_l_s(source,dest);return; - } -} -void cvt_l_d(double *source,long long *dest) -{ - switch(FCR31&3) - { - case 0: round_l_d(source,dest);return; - case 1: trunc_l_d(source,dest);return; - case 2: ceil_l_d(source,dest);return; - case 3: floor_l_d(source,dest);return; - } -} - -void c_f_s() -{ - FCR31 &= ~0x800000; -} -void c_un_s(float *source,float *target) -{ - FCR31=(isnan(*source) || isnan(*target)) ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_eq_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ueq_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_olt_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ult_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_ole_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ule_s(float *source,float *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_sf_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} -void c_ngle_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} - -void c_seq_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngl_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_lt_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_nge_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_le_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngt_s(float *source,float *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_f_d() -{ - FCR31 &= ~0x800000; -} -void c_un_d(double *source,double *target) -{ - FCR31=(isnan(*source) || isnan(*target)) ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_eq_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ueq_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_olt_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ult_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_ole_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31&=~0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ule_d(double *source,double *target) -{ - if (isnan(*source) || isnan(*target)) {FCR31|=0x800000;return;} - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_sf_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} -void c_ngle_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31&=~0x800000; -} - -void c_seq_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngl_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source==*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_lt_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_nge_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<*target ? FCR31|0x800000 : FCR31&~0x800000; -} - -void c_le_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} -void c_ngt_d(double *source,double *target) -{ - //if (isnan(*source) || isnan(*target)) // FIXME - exception - FCR31 = *source<=*target ? FCR31|0x800000 : FCR31&~0x800000; -} - - -void add_s(float *source1,float *source2,float *target) -{ - *target=(*source1)+(*source2); -} -void sub_s(float *source1,float *source2,float *target) -{ - *target=(*source1)-(*source2); -} -void mul_s(float *source1,float *source2,float *target) -{ - *target=(*source1)*(*source2); -} -void div_s(float *source1,float *source2,float *target) -{ - *target=(*source1)/(*source2); -} -void sqrt_s(float *source,float *target) -{ - *target=sqrtf(*source); -} -void abs_s(float *source,float *target) -{ - *target=fabsf(*source); -} -void mov_s(float *source,float *target) -{ - *target=*source; -} -void neg_s(float *source,float *target) -{ - *target=-(*source); -} -void add_d(double *source1,double *source2,double *target) -{ - *target=(*source1)+(*source2); -} -void sub_d(double *source1,double *source2,double *target) -{ - *target=(*source1)-(*source2); -} -void mul_d(double *source1,double *source2,double *target) -{ - *target=(*source1)*(*source2); -} -void div_d(double *source1,double *source2,double *target) -{ - *target=(*source1)/(*source2); -} -void sqrt_d(double *source,double *target) -{ - *target=sqrt(*source); -} -void abs_d(double *source,double *target) -{ - *target=fabs(*source); -} -void mov_d(double *source,double *target) -{ - *target=*source; -} -void neg_d(double *source,double *target) -{ - *target=-(*source); -} - diff --git a/libpcsxcore/new_dynarec/fpu.h b/libpcsxcore/new_dynarec/fpu.h deleted file mode 100644 index 881ddbe..0000000 --- a/libpcsxcore/new_dynarec/fpu.h +++ /dev/null @@ -1,74 +0,0 @@ -void cvt_s_w(int *source,float *dest); -void cvt_d_w(int *source,double *dest); -void cvt_s_l(long long *source,float *dest); -void cvt_d_l(long long *source,double *dest); -void cvt_w_s(float *source,int *dest); -void cvt_w_d(double *source,int *dest); -void cvt_l_s(float *source,long long *dest); -void cvt_l_d(double *source,long long *dest); -void cvt_d_s(float *source,double *dest); -void cvt_s_d(double *source,float *dest); -void round_l_s(float *source,long long *dest); -void round_w_s(float *source,int *dest); -void trunc_l_s(float *source,long long *dest); -void trunc_w_s(float *source,int *dest); -void ceil_l_s(float *source,long long *dest); -void ceil_w_s(float *source,int *dest); -void floor_l_s(float *source,long long *dest); -void floor_w_s(float *source,int *dest); -void round_l_d(double *source,long long *dest); -void round_w_d(double *source,int *dest); -void trunc_l_d(double *source,long long *dest); -void trunc_w_d(double *source,int *dest); -void ceil_l_d(double *source,long long *dest); -void ceil_w_d(double *source,int *dest); -void floor_l_d(double *source,long long *dest); -void floor_w_d(double *source,int *dest); -void c_f_s(); -void c_un_s(float *source,float *target); -void c_eq_s(float *source,float *target); -void c_ueq_s(float *source,float *target); -void c_olt_s(float *source,float *target); -void c_ult_s(float *source,float *target); -void c_ole_s(float *source,float *target); -void c_ule_s(float *source,float *target); -void c_sf_s(float *source,float *target); -void c_ngle_s(float *source,float *target); -void c_seq_s(float *source,float *target); -void c_ngl_s(float *source,float *target); -void c_lt_s(float *source,float *target); -void c_nge_s(float *source,float *target); -void c_le_s(float *source,float *target); -void c_ngt_s(float *source,float *target); -void c_f_d(); -void c_un_d(double *source,double *target); -void c_eq_d(double *source,double *target); -void c_ueq_d(double *source,double *target); -void c_olt_d(double *source,double *target); -void c_ult_d(double *source,double *target); -void c_ole_d(double *source,double *target); -void c_ule_d(double *source,double *target); -void c_sf_d(double *source,double *target); -void c_ngle_d(double *source,double *target); -void c_seq_d(double *source,double *target); -void c_ngl_d(double *source,double *target); -void c_lt_d(double *source,double *target); -void c_nge_d(double *source,double *target); -void c_le_d(double *source,double *target); -void c_ngt_d(double *source,double *target); -void add_s(float *source1,float *source2,float *target); -void sub_s(float *source1,float *source2,float *target); -void mul_s(float *source1,float *source2,float *target); -void div_s(float *source1,float *source2,float *target); -void sqrt_s(float *source,float *target); -void abs_s(float *source,float *target); -void mov_s(float *source,float *target); -void neg_s(float *source,float *target); -void add_d(double *source1,double *source2,double *target); -void sub_d(double *source1,double *source2,double *target); -void mul_d(double *source1,double *source2,double *target); -void div_d(double *source1,double *source2,double *target); -void sqrt_d(double *source,double *target); -void abs_d(double *source,double *target); -void mov_d(double *source,double *target); -void neg_d(double *source,double *target); diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 5120df0..f1034e6 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -147,11 +147,7 @@ struct ll_entry char shadow[1048576] __attribute__((aligned(16))); void *copy; int expirep; -#ifndef PCSX - u_int using_tlb; -#else static const u_int using_tlb=0; -#endif int new_dynarec_did_compile; int new_dynarec_hacks; u_int stop_after_jal; @@ -303,87 +299,21 @@ static int CLOCK_ADJUST(int x) return (x * cycle_multiplier + s * 50) / 100; } -static void tlb_hacks() -{ -#ifndef DISABLE_TLB - // Goldeneye hack - if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0) - { - u_int addr; - int n; - switch (ROM_HEADER->Country_code&0xFF) - { - case 0x45: // U - addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E - addr=0x329f0; - break; - default: - // Unknown country code - addr=0; - break; - } - u_int rom_addr=(u_int)rom; - #ifdef ROM_COPY - // Since memory_map is 32-bit, on 64-bit systems the rom needs to be - // in the lower 4G of memory to use this hack. Copy it if necessary. - if((void *)rom>(void *)0xffffffff) { - munmap(ROM_COPY, 67108864); - if(mmap(ROM_COPY, 12582912, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0) <= 0) {printf("mmap() failed\n");} - memcpy(ROM_COPY,rom,12582912); - rom_addr=(u_int)ROM_COPY; - } - #endif - if(addr) { - for(n=0x7F000;n<0x80000;n++) { - memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000; - } - } - } -#endif -} - static u_int get_page(u_int vaddr) { -#ifndef PCSX - u_int page=(vaddr^0x80000000)>>12; -#else u_int page=vaddr&~0xe0000000; if (page < 0x1000000) page &= ~0x0e00000; // RAM mirrors page>>=12; -#endif -#ifndef DISABLE_TLB - if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12; -#endif if(page>2048) page=2048+(page&2047); return page; } -#ifndef PCSX -static u_int get_vpage(u_int vaddr) -{ - u_int vpage=(vaddr^0x80000000)>>12; -#ifndef DISABLE_TLB - if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead -#endif - if(vpage>2048) vpage=2048+(vpage&2047); - return vpage; -} -#else // no virtual mem in PCSX static u_int get_vpage(u_int vaddr) { return get_page(vaddr); } -#endif // Get address from virtual address // This is called from the recompiled JR/JALR instructions @@ -416,16 +346,7 @@ void *get_addr(u_int vaddr) //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; inv_code_start=inv_code_end=~0; -#ifndef DISABLE_TLB - memory_map[vaddr>>12]|=0x40000000; -#endif if(vpage<2048) { -#ifndef DISABLE_TLB - if(tlb_LUT_r[vaddr>>12]) { - invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0; - memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000; - } -#endif restore_candidate[vpage>>3]|=1<<(vpage&7); } else restore_candidate[page>>3]|=1<<(page&7); @@ -802,119 +723,6 @@ void alloc_all(struct regstat *cur,int i) } } -#ifndef FORCE32 -void div64(int64_t dividend,int64_t divisor) -{ - lo=dividend/divisor; - hi=dividend%divisor; - //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} -void divu64(uint64_t dividend,uint64_t divisor) -{ - lo=dividend/divisor; - hi=dividend%divisor; - //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} - -void mult64(uint64_t m1,uint64_t m2) -{ - unsigned long long int op1, op2, op3, op4; - unsigned long long int result1, result2, result3, result4; - unsigned long long int temp1, temp2, temp3, temp4; - int sign = 0; - - if (m1 < 0) - { - op2 = -m1; - sign = 1 - sign; - } - else op2 = m1; - if (m2 < 0) - { - op4 = -m2; - sign = 1 - sign; - } - else op4 = m2; - - op1 = op2 & 0xFFFFFFFF; - op2 = (op2 >> 32) & 0xFFFFFFFF; - op3 = op4 & 0xFFFFFFFF; - op4 = (op4 >> 32) & 0xFFFFFFFF; - - temp1 = op1 * op3; - temp2 = (temp1 >> 32) + op1 * op4; - temp3 = op2 * op3; - temp4 = (temp3 >> 32) + op2 * op4; - - result1 = temp1 & 0xFFFFFFFF; - result2 = temp2 + (temp3 & 0xFFFFFFFF); - result3 = (result2 >> 32) + temp4; - result4 = (result3 >> 32); - - lo = result1 | (result2 << 32); - hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - if (sign) - { - hi = ~hi; - if (!lo) hi++; - else lo = ~lo + 1; - } -} - -void multu64(uint64_t m1,uint64_t m2) -{ - unsigned long long int op1, op2, op3, op4; - unsigned long long int result1, result2, result3, result4; - unsigned long long int temp1, temp2, temp3, temp4; - - op1 = m1 & 0xFFFFFFFF; - op2 = (m1 >> 32) & 0xFFFFFFFF; - op3 = m2 & 0xFFFFFFFF; - op4 = (m2 >> 32) & 0xFFFFFFFF; - - temp1 = op1 * op3; - temp2 = (temp1 >> 32) + op1 * op4; - temp3 = op2 * op3; - temp4 = (temp3 >> 32) + op2 * op4; - - result1 = temp1 & 0xFFFFFFFF; - result2 = temp2 + (temp3 & 0xFFFFFFFF); - result3 = (result2 >> 32) + temp4; - result4 = (result3 >> 32); - - lo = result1 | (result2 << 32); - hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - - //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) - // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); -} - -uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits) -{ - if(bits) { - original<<=64-bits; - original>>=64-bits; - loaded<<=bits; - original|=loaded; - } - else original=loaded; - return original; -} -uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits) -{ - if(bits^56) { - original>>=64-(bits^56); - original<<=64-(bits^56); - loaded>>=bits^56; - original|=loaded; - } - else original=loaded; - return original; -} -#endif - #ifdef __i386__ #include "assem_x86.c" #endif @@ -1107,18 +915,6 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) // Don't trap writes invalid_code[block]=1; -#ifndef DISABLE_TLB - // If there is a valid TLB entry for this page, remove write protect - if(tlb_LUT_w[block]) { - assert(tlb_LUT_r[block]==tlb_LUT_w[block]); - // CHECK: Is this right? - memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2; - u_int real_block=tlb_LUT_w[block]>>12; - invalid_code[real_block]=1; - if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2; - } - else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2; -#endif #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); @@ -1147,14 +943,6 @@ void invalidate_block(u_int block) if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; } } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif } head=head->next; } @@ -1163,7 +951,6 @@ void invalidate_block(u_int block) void invalidate_addr(u_int addr) { -#ifdef PCSX //static int rhits; // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } @@ -1217,7 +1004,6 @@ void invalidate_addr(u_int addr) return; } } -#endif invalidate_block(addr>>12); } @@ -1239,19 +1025,6 @@ void invalidate_all_pages() #ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); #endif - #ifndef DISABLE_TLB - // TLB - for(page=0;page<0x100000;page++) { - if(tlb_LUT_r[page]) { - memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2; - if(!tlb_LUT_w[page]||!invalid_code[page]) - memory_map[page]|=0x40000000; // Write protect - } - else memory_map[page]=-1; - if(page==0x80000) page=0xC0000; - } - tlb_hacks(); - #endif } // Add an entry to jump_out after making a link @@ -1290,13 +1063,6 @@ void clean_blocks(u_int page) inv|=invalid_code[i]; } } -#ifndef DISABLE_TLB - if((signed int)head->vaddr>=(signed int)0xC0000000) { - u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2)); - //printf("addr=%x start=%x end=%x\n",addr,start,end); - if(addr=end) inv=1; - } -#endif else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { inv=1; } @@ -1304,9 +1070,6 @@ void clean_blocks(u_int page) void * clean_addr=(void *)get_clean_addr((int)head->addr); if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { u_int ppage=page; -#ifndef DISABLE_TLB - if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12; -#endif inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); @@ -2044,12 +1807,6 @@ void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32 if((dirty>>hr)&1) { if(regmap[hr]<64) { emit_storereg(r,hr); -#ifndef FORCE32 - if((is32>>regmap[hr])&1) { - emit_sarimm(hr,31,hr); - emit_storereg(r|64,hr); - } -#endif }else{ emit_storereg(r|64,hr); } @@ -2087,12 +1844,6 @@ void rlist() for(i=0;i<32;i++) printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]); printf("\n"); -#ifndef DISABLE_COP1 - printf("TRACE: "); - for(i=0;i<32;i++) - printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i])); - printf("\n"); -#endif } void enabletrace() @@ -2818,7 +2569,6 @@ void load_assemble(int i,struct regstat *i_regs) //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... -#ifdef PCSX if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) ||rt1[i]==0) { // could be FIFO, must perform the read @@ -2827,7 +2577,6 @@ void load_assemble(int i,struct regstat *i_regs) tl=get_reg(i_regs->regmap,-1); assert(tl>=0); } -#endif if(offset||s<0||c) addr=tl; else addr=s; //if(tl<0) tl=get_reg(i_regs->regmap,-1); @@ -3138,32 +2887,7 @@ void store_assemble(int i,struct regstat *i_regs) else addr=s; if(!using_tlb) { if(!c) { - #ifndef PCSX - #ifdef R29_HACK - // Strmnnrmn's speed hack - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - emit_cmpimm(addr,RAM_SIZE); - #ifdef DESTRUCTIVE_SHIFT - if(s==addr) emit_mov(s,temp); - #endif - #ifdef R29_HACK - memtarget=1; - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - { - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); - } - #else - jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); - #endif + jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override); } else if(ram_offset&&memtarget) { emit_addimm(addr,ram_offset,HOST_TEMPREG); @@ -3245,14 +2969,12 @@ void store_assemble(int i,struct regstat *i_regs) } type=STORED_STUB; } -#ifdef PCSX if(jaddr) { // PCSX store handlers don't check invcode again reglist|=1<waswritten&(1<regmap,FTEMP|64); - tl=get_reg(i_regs->regmap,FTEMP); - s=get_reg(i_regs->regmap,rs1[i]); - temp=get_reg(i_regs->regmap,agr); - if(temp<0) temp=get_reg(i_regs->regmap,-1); - offset=imm[i]; - assert(tl>=0); - assert(rs1[i]>0); - assert(temp>=0); - for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap[HOST_CCREG]==CCREG) reglist&=~(1<wasconst>>s)&1; - if(s>=0) c=(i_regs->wasconst>>s)&1; - // Check cop1 unusable - if(!cop1_usable) { - signed char rs=get_reg(i_regs->regmap,CSREG); - assert(rs>=0); - emit_testimm(rs,0x20000000); - jaddr=(int)out; - emit_jeq(0); - add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } - if (opcode[i]==0x39) { // SWC1 (get float address) - emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],tl); - } - if (opcode[i]==0x3D) { // SDC1 (get double address) - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],tl); - } - // Generate address + offset - if(!using_tlb) { - if(!c) - emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE); - } - else - { - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<>16)&0x1f],temp); - } - if (opcode[i]==0x35) { // LDC1 (get target address) - emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp); - } - if(!using_tlb) { - if(!c) { - jaddr2=(int)out; - emit_jno(0); - } - else if(((signed int)(constmap[i][s]+offset))>=(signed int)0x80000000+RAM_SIZE) { - jaddr2=(int)out; - emit_jmp(0); // inline_readstub/inline_writestub? Very rare case - } - #ifdef DESTRUCTIVE_SHIFT - if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1 - if(!offset&&!c&&s>=0) emit_mov(s,ar); - } - #endif - }else{ - if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1 - do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr2); - } - if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1 - do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr2); - } - } - if (opcode[i]==0x31) { // LWC1 - //if(s>=0&&!c&&!offset) emit_mov(s,tl); - //gen_tlb_addr_r(ar,map); - //emit_readword_indexed((int)rdram-0x80000000,tl,tl); - #ifdef HOST_IMM_ADDR32 - if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl); - else - #endif - emit_readword_indexed_tlb(0,offset||c||s<0?tl:s,map,tl); - type=LOADW_STUB; - } - if (opcode[i]==0x35) { // LDC1 - assert(th>=0); - //if(s>=0&&!c&&!offset) emit_mov(s,tl); - //gen_tlb_addr_r(ar,map); - //emit_readword_indexed((int)rdram-0x80000000,tl,th); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,tl,tl); - #ifdef HOST_IMM_ADDR32 - if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl); - else - #endif - emit_readdword_indexed_tlb(0,offset||c||s<0?tl:s,map,th,tl); - type=LOADD_STUB; - } - if (opcode[i]==0x39) { // SWC1 - //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp); - emit_writeword_indexed_tlb(tl,0,offset||c||s<0?temp:s,map,temp); - type=STOREW_STUB; - } - if (opcode[i]==0x3D) { // SDC1 - assert(th>=0); - //emit_writeword_indexed(th,(int)rdram-0x80000000,temp); - //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp); - emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp); - type=STORED_STUB; - } - if(!using_tlb&&!(i_regs->waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,temp,1); - #else - emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1); - #endif - #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) - emit_callne(invalidate_addr_reg[temp]); - #else - jaddr3=(int)out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<regmap,CCREG)<0) - emit_loadreg(CCREG,HOST_CCREG); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - emit_call((int)memdebug); - emit_popa(); - }/**/ -#else cop1_unusable(i, i_regs); -#endif } void c2ls_assemble(int i,struct regstat *i_regs) @@ -4004,7 +3544,7 @@ int internal_branch(uint64_t i_is32,int addr) if(addr&1) return 0; // Indirect (register) jump if(addr>=start && addr>2; + //int t=(addr-start)>>2; // Delay slots are not valid branch targets //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; // 64 -> 32 bit transition requires a recompile @@ -4014,11 +3554,7 @@ int internal_branch(uint64_t i_is32,int addr) else printf("optimizable: yes\n"); }*/ //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; -#ifndef FORCE32 - if(requires_32bit[t]&~i_is32) return 0; - else -#endif - return 1; + return 1; } return 0; } @@ -4203,22 +3739,6 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // printf("poor load scheduling!\n"); } else if(c) { -#ifndef DISABLE_TLB - if(rm>=0) { - if(!entry||entry[rm]!=mgr) { - if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) { - // Stores to memory go thru the mapper to detect self-modifying - // code, loads don't. - if((unsigned int)(constmap[i][rs]+offset)>=0xC0000000 || - (unsigned int)(constmap[i][rs]+offset)<0x80000000+RAM_SIZE ) - generate_map_const(constmap[i][rs]+offset,rm); - }else{ - if((signed int)(constmap[i][rs]+offset)>=(signed int)0xC0000000) - generate_map_const(constmap[i][rs]+offset,rm); - } - } - } -#endif if(rs1[i]!=rt1[i]||itype[i]!=LOAD) { if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -4248,32 +3768,6 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) // Preload constants for next instruction if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) { int agr,ra; - #if !defined(HOST_IMM_ADDR32) && !defined(DISABLE_TLB) - // Mapper entry - agr=MGEN1+((i+1)&1); - ra=get_reg(i_regs->regmap,agr); - if(ra>=0) { - int rs=get_reg(regs[i+1].regmap,rs1[i+1]); - int offset=imm[i+1]; - int c=(regs[i+1].wasconst>>rs)&1; - if(c) { - if(itype[i+1]==STORE||itype[i+1]==STORELR - ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1, SWC2/SDC2 - // Stores to memory go thru the mapper to detect self-modifying - // code, loads don't. - if((unsigned int)(constmap[i+1][rs]+offset)>=0xC0000000 || - (unsigned int)(constmap[i+1][rs]+offset)<0x80000000+RAM_SIZE ) - generate_map_const(constmap[i+1][rs]+offset,ra); - }else{ - if((signed int)(constmap[i+1][rs]+offset)>=(signed int)0xC0000000) - generate_map_const(constmap[i+1][rs]+offset,ra); - } - } - /*else if(rs1[i]==0) { - generate_map_const(offset,ra); - }*/ - } - #endif // Actual address agr=AGEN1+((i+1)&1); ra=get_reg(i_regs->regmap,agr); @@ -4490,17 +3984,6 @@ void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty) if((i_dirty>>hr)&1) { if(i_regmap[hr]<64) { emit_storereg(i_regmap[hr],hr); -#ifndef FORCE32 - if( ((i_is32>>i_regmap[hr])&1) ) { - #ifdef DESTRUCTIVE_WRITEBACK - emit_sarimm(hr,31,hr); - emit_storereg(i_regmap[hr]|64,hr); - #else - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - #endif - } -#endif }else{ if( !((i_is32>>(i_regmap[hr]&63))&1) ) { emit_storereg(i_regmap[hr],hr); @@ -4526,17 +4009,6 @@ void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,in if((i_dirty>>hr)&1) { if(i_regmap[hr]<64) { emit_storereg(i_regmap[hr],hr); -#ifndef FORCE32 - if( ((i_is32>>i_regmap[hr])&1) ) { - #ifdef DESTRUCTIVE_WRITEBACK - emit_sarimm(hr,31,hr); - emit_storereg(i_regmap[hr]|64,hr); - #else - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(i_regmap[hr]|64,HOST_TEMPREG); - #endif - } -#endif }else{ if( !((i_is32>>(i_regmap[hr]&63))&1) ) { emit_storereg(i_regmap[hr],hr); @@ -4792,9 +4264,6 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) } } //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0; -#ifndef FORCE32 - if(requires_32bit[t]&~i_is32) return 0; -#endif // Delay slots are not valid branch targets //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0; // Delay slots require additional processing, so do not match @@ -5416,13 +4885,11 @@ void rjump_assemble(int i,struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG); add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0); -#ifdef PCSX if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10) // special case for RFE emit_jmp(0); else -#endif - emit_jns(0); + emit_jns(0); //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1); #ifdef USE_MINI_HT if(rs1[i]==31) { @@ -7026,389 +6493,35 @@ void unneeded_registers(int istart,int iend,int r) } printf("\n");*/ } -#ifdef FORCE32 for (i=iend;i>=istart;i--) { unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL; } -#endif } -// Identify registers which are likely to contain 32-bit values -// This is used to predict whether any branches will jump to a -// location with 64-bit values in registers. -static void provisional_32bit() +// Write back dirty registers as soon as we will no longer modify them, +// so that we don't end up with lots of writes at the branches. +void clean_registers(int istart,int iend,int wr) { - int i,j; - uint64_t is32=1; - uint64_t lastbranch=1; - - for(i=0;i=istart;i--) { - if(i>0) { - if(itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) { - if(i>1) is32=lastbranch; - else is32=1; - } - } - if(i>1) + if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { - if(itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP) { - if(likely[i-2]) { - if(i>2) is32=lastbranch; - else is32=1; - } - } - if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL + if(ba[i]=(start+slen*4)) { - if(rs1[i-2]==0||rs2[i-2]==0) - { - if(rs1[i-2]) { - is32|=1LL<=0;j--) - { - if(ba[j]==start+i*4) - //temp_is32&=branch_regs[j].is32; - temp_is32&=p32[j]; - } - for(j=i;j>s1)&1LL)<>s1)&1LL); - is32&=~(1LL<=0x20&&op2<=0x23) { // ADD/ADDU/SUB/SUBU - is32|=1LL<=0x24&&op2<=0x27) { // AND/OR/XOR/NOR - uint64_t sr=((is32>>s1)&(is32>>s2)&1LL); - is32&=~(1LL<=0x2c&&op2<=0x2d) { // DADD/DADDU - if(s1==0&&s2==0) { - is32|=1LL<>s1)&1LL); - is32&=~(1LL<>s2)&1LL); - is32&=~(1LL<=0x2e&&op2<=0x2f) { // DSUB/DSUBU - if(s1==0&&s2==0) { - is32|=1LL<>s1)&1LL); - is32&=~(1LL<=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU - is32&=~((1LL<>s1)&1LL); - is32&=~(1LL<=0x14&&op2<=0x17) is32&=~(1LL<=0x38&&op2<0x3f) is32&=~(1LL<0) - { - if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000) - { - if(rt1[i-1]==31) // JAL/JALR - { - // Subroutine call will return here, don't alloc any registers - is32=1; - } - else if(i+1=0;i--) - { - int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, don't need anything - r32=0; - } - else - { - // Internal branch - // Need whatever matches the target - // (and doesn't get overwritten by the delay slot instruction) - r32=0; - int t=(ba[i]-start)>>2; - if(ba[i]>start+i*4) { - // Forward branch - //if(!(requires_32bit[t]&~regs[i].was32)) - // r32|=requires_32bit[t]&(~(1LL<>16)!=0x1000) - { - if(i0) - { - if((regs[i].was32>>us1[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i+1])&1) r32|=1LL<>dep1[i+1])&1)) - { - if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<>dep2[i+1])&1)) - { - if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us1[i])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i])&1) r32|=1LL<>dep1[i])&1)) - { - if((regs[i].was32>>dep1[i])&1) r32|=1LL<>dep2[i])&1)) - { - if((regs[i].was32>>dep2[i])&1) r32|=1LL<0&®s[i].regmap_entry[hr]<64) { - if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) { - if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1)) - pr32[i]|=1LL<=istart;i--) - { - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, flush all regs - if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) + // Branch out of this block, flush all regs + if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000) { // Unconditional branch will_dirty_i=0; @@ -7953,15 +7066,6 @@ void new_dynarec_clear_full() stop_after_jal=0; inv_code_start=inv_code_end=~0; // TLB -#ifndef DISABLE_TLB - using_tlb=0; - for(n=0;n<524288;n++) // 0 .. 0x7FFFFFFF - memory_map[n]=-1; - for(n=524288;n<526336;n++) // 0x80000000 .. 0x807FFFFF - memory_map[n]=((u_int)rdram-0x80000000)>>2; - for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF - memory_map[n]=-1; -#endif for(n=0;n<4096;n++) ll_clear(jump_in+n); for(n=0;n<4096;n++) ll_clear(jump_out+n); for(n=0;n<4096;n++) ll_clear(jump_dirty+n); @@ -7982,12 +7086,6 @@ void new_dynarec_init() // not all systems allow execute in data segment by default if (mprotect(out, 1<>21)&0x1f; //if (op2 & 0x10) { @@ -8550,7 +7605,6 @@ int new_recompile_block(int addr) case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break; case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break; case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break; -#endif default: strcpy(insn[i],"???"); type=NI; SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr); break; @@ -8821,7 +7875,6 @@ int new_recompile_block(int addr) else if(type==CJUMP||type==SJUMP||type==FJUMP) ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14); else ba[i]=-1; -#ifdef PCSX if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) { int do_in_intrp=0; // branch in delay slot? @@ -8855,7 +7908,6 @@ int new_recompile_block(int addr) i--; // don't compile the DS } } -#endif /* Is this the end of the block? */ if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(rt1[i-1]==0) { // Continue past subroutine call (JAL) @@ -8921,9 +7973,6 @@ int new_recompile_block(int addr) int cc=0; int hr=-1; -#ifndef FORCE32 - provisional_32bit(); -#endif if((u_int)addr&1) { // First instruction is delay slot cc=-1; @@ -8966,126 +8015,13 @@ int new_recompile_block(int addr) } } } -#ifndef FORCE32 - // If something jumps here with 64-bit values - // then promote those registers to 64 bits - if(bt[i]) - { - uint64_t temp_is32=current.is32; - for(j=i-1;j>=0;j--) - { - if(ba[j]==start+i*4) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0&&r<64) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>r)&1) { - temp_is32|=1LL<=0;j--) - { - if(ba[j]==start+i*4+4) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) { - if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) - { - if(rs1[i]!=(r&63)&&rs2[i]!=(r&63)) - { - //printf("dump %d/r%d\n",hr,r); - current.regmap[hr]=-1; - if(get_reg(current.regmap,r|64)>=0) - current.regmap[get_reg(current.regmap,r|64)]=-1; - } - } - } - } - } - } - } - else if(i>16)!=0x1000&&(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)) - { - uint64_t temp_is32=current.is32; - for(j=i-1;j>=0;j--) - { - if(ba[j]==start+i*4+8) - temp_is32&=branch_regs[j].is32; - } - for(j=i;j0) - { - if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) { - if(rs1[i]!=(r&63)&&rs2[i]!=(r&63)&&rs1[i+1]!=(r&63)&&rs2[i+1]!=(r&63)) - { - //printf("dump %d/r%d\n",hr,r); - current.regmap[hr]=-1; - if(get_reg(current.regmap,r|64)>=0) - current.regmap[get_reg(current.regmap,r|64)]=-1; - } - } - } - } - } - } - #endif if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) { if(i+12) { // GTE runs in parallel until accessed, divide by 2 for a rough guess @@ -10927,121 +9863,6 @@ int new_recompile_block(int addr) clean_registers(0,slen-1,1); /* Pass 7 - Identify 32-bit registers */ -#ifndef FORCE32 - provisional_r32(); - - u_int r32=0; - - for (i=slen-1;i>=0;i--) - { - int hr; - if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) - { - if(ba[i]=(start+slen*4)) - { - // Branch out of this block, don't need anything - r32=0; - } - else - { - // Internal branch - // Need whatever matches the target - // (and doesn't get overwritten by the delay slot instruction) - r32=0; - int t=(ba[i]-start)>>2; - if(ba[i]>start+i*4) { - // Forward branch - if(!(requires_32bit[t]&~regs[i].was32)) - r32|=requires_32bit[t]&(~(1LL<>16)!=0x1000) - { - if(i0) - { - if((regs[i].was32>>us1[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i+1])&1) r32|=1LL<>dep1[i+1])&1)) - { - if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<>dep2[i+1])&1)) - { - if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<0) - { - if((regs[i].was32>>us1[i])&1) r32|=1LL<0) - { - if((regs[i].was32>>us2[i])&1) r32|=1LL<>dep1[i])&1)) - { - if((regs[i].was32>>dep1[i])&1) r32|=1LL<>dep2[i])&1)) - { - if((regs[i].was32>>dep2[i])&1) r32|=1LL<0&®s[i].regmap_entry[hr]<64) { - if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) { - if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1)) - requires_32bit[i]|=1LL<=0;i--) { if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) @@ -11054,7 +9875,6 @@ int new_recompile_block(int addr) } } } -#endif if(itype[slen-1]==SPAN) { bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception @@ -11073,26 +9893,6 @@ int new_recompile_block(int addr) else printf(" r%d",r); } } -#ifndef FORCE32 - printf(" UU:"); - for(r=1;r<=CCREG;r++) { - if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) { - if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - //if(((is32[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((regs[i].was32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } -#endif printf("\n"); #if defined(__i386__) || defined(__x86_64__) printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]); @@ -11214,18 +10014,6 @@ int new_recompile_block(int addr) #endif printf("\n"); } -#ifndef FORCE32 - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - if((regs[i].is32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); -#endif /*printf(" p32:"); for(r=0;r<=CCREG;r++) { if((p32[i]>>r)&1) { @@ -11263,18 +10051,6 @@ int new_recompile_block(int addr) if((branch_regs[i].dirty>>10)&1) printf("r10 "); if((branch_regs[i].dirty>>12)&1) printf("r12 "); #endif -#ifndef FORCE32 - printf(" 32:"); - for(r=0;r<=CCREG;r++) { - if((branch_regs[i].is32>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - printf("\n"); -#endif } } #endif // DISASM @@ -11292,7 +10068,6 @@ int new_recompile_block(int addr) } u_int instr_addr0_override=0; -#ifdef PCSX if (start == 0x80030000) { // nasty hack for fastbios thing // override block entry to this code @@ -11306,7 +10081,6 @@ int new_recompile_block(int addr) emit_cmp(0,1); emit_jne((int)new_dyna_leave); } -#endif for(i=0;i>16)!=0x1000)) { - wb_sx(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,is32_pre,regs[i].was32, - unneeded_reg[i],unneeded_reg_upper[i]); wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre, unneeded_reg[i],unneeded_reg_upper[i]); } @@ -11595,26 +10367,15 @@ int new_recompile_block(int addr) // Trap writes to any of the pages we compiled for(i=start>>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; -#ifndef DISABLE_TLB - memory_map[i]|=0x40000000; - if((signed int)start>=(signed int)0xC0000000) { - assert(using_tlb); - j=(((u_int)i<<12)+(memory_map[i]<<2)-(u_int)rdram+(u_int)0x80000000)>>12; - invalid_code[j]=0; - memory_map[j]|=0x40000000; - //printf("write protect physical page: %x (virtual %x)\n",j<<12,start); - } -#endif } inv_code_start=inv_code_end=~0; -#ifdef PCSX + // for PCSX we need to mark all mirrors too if(get_page(start)<(RAM_SIZE>>12)) for(i=start>>12;i<=(start+slen*4)>>12;i++) invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; -#endif /* Pass 10 - Free memory by expiring oldest blocks */ -- cgit v1.2.3 From 1edfcc68047e356a9c57c4734cc3bbe084922ce7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 20:04:25 +0300 Subject: drc: drop less obvious dead code --- libpcsxcore/new_dynarec/assem_arm.c | 436 ++-------------------------------- libpcsxcore/new_dynarec/new_dynarec.c | 290 ++++------------------ 2 files changed, 70 insertions(+), 656 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index c2f65ee..20a6956 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -45,9 +45,7 @@ extern int pending_exception; extern int branch_target; extern uint64_t readmem_dword; extern void *dynarec_local; -extern u_int memory_map[1048576]; extern u_int mini_ht[32][2]; -extern u_int rounding_modes[4]; void indirect_jump_indexed(); void indirect_jump(); @@ -814,26 +812,6 @@ char regname[16][4] = { "lr", "pc"}; -void output_byte(u_char byte) -{ - *(out++)=byte; -} -void output_modrm(u_char mod,u_char rm,u_char ext) -{ - assert(mod<4); - assert(rm<8); - assert(ext<8); - u_char byte=(mod<<6)|(ext<<3)|rm; - *(out++)=byte; -} -void output_sib(u_char scale,u_char index,u_char base) -{ - assert(scale<4); - assert(index<8); - assert(base<8); - u_char byte=(scale<<6)|(index<<3)|base; - *(out++)=byte; -} void output_w32(u_int word) { *((u_int *)out)=word; @@ -1207,29 +1185,7 @@ void emit_adcimm(u_int rs,int imm,u_int rt) assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); } -/*void emit_sbcimm(int imm,u_int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval); -}*/ -void emit_sbbimm(int imm,u_int rt) -{ - assem_debug("sbb $%d,%%%s\n",imm,regname[rt]); - assert(rt<8); - if(imm<128&&imm>=-128) { - output_byte(0x83); - output_modrm(3,rt,3); - output_byte(imm); - } - else - { - output_byte(0x81); - output_modrm(3,rt,3); - output_w32(imm); - } -} + void emit_rscimm(int rs,int imm,u_int rt) { assert(0); @@ -1248,13 +1204,6 @@ void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) emit_adcimm(rsh,0,rth); } -void emit_sbb(int rs1,int rs2) -{ - assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]); - output_byte(0x19); - output_modrm(3,rs1,rs2); -} - void emit_andimm(int rs,int imm,int rt) { u_int armval; @@ -1445,32 +1394,7 @@ void emit_sar(u_int rs,u_int shift,u_int rt) assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -void emit_shlcl(int r) -{ - assem_debug("shl %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_shrcl(int r) -{ - assem_debug("shr %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_sarcl(int r) -{ - assem_debug("sar %%%s,%%cl\n",regname[r]); - assert(0); -} -void emit_shldcl(int r1,int r2) -{ - assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]); - assert(0); -} -void emit_shrdcl(int r1,int r2) -{ - assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]); - assert(0); -} void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); @@ -1510,21 +1434,6 @@ void emit_cmpimm(int rs,int imm) } } -void emit_cmovne(u_int *addr,int rt) -{ - assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]); - assert(0); -} -void emit_cmovl(u_int *addr,int rt) -{ - assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]); - assert(0); -} -void emit_cmovs(u_int *addr,int rt) -{ - assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]); - assert(0); -} void emit_cmovne_imm(int imm,int rt) { assem_debug("movne %s,#%d\n",regname[rt],imm); @@ -1758,31 +1667,6 @@ void emit_jcc(int a) output_w32(0x3a000000|offset); } -void emit_pushimm(int imm) -{ - assem_debug("push $%x\n",imm); - assert(0); -} -void emit_pusha() -{ - assem_debug("pusha\n"); - assert(0); -} -void emit_popa() -{ - assem_debug("popa\n"); - assert(0); -} -void emit_pushreg(u_int r) -{ - assem_debug("push %%%s\n",regname[r]); - assert(0); -} -void emit_popreg(u_int r) -{ - assem_debug("pop %%%s\n",regname[r]); - assert(0); -} void emit_callreg(u_int r) { assert(r<15); @@ -1973,17 +1857,7 @@ void emit_movzwl(int addr, int rt) assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movzwl_reg(int rs, int rt) -{ - assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]); - assert(0); -} -void emit_xchg(int rs, int rt) -{ - assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]); - assert(0); -} void emit_writeword_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); @@ -2096,27 +1970,7 @@ void emit_writebyte(int rt, int addr) assem_debug("strb %s,fp+%d\n",regname[rt],offset); output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_writeword_imm(int imm, int addr) -{ - assem_debug("movl $%x,%x\n",imm,addr); - assert(0); -} -void emit_writebyte_imm(int imm, int addr) -{ - assem_debug("movb $%x,%x\n",imm,addr); - assert(0); -} -void emit_mul(int rs) -{ - assem_debug("mul %%%s\n",regname[rs]); - assert(0); -} -void emit_imul(int rs) -{ - assem_debug("imul %%%s\n",regname[rs]); - assert(0); -} void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); @@ -2136,22 +1990,6 @@ void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_div(int rs) -{ - assem_debug("div %%%s\n",regname[rs]); - assert(0); -} -void emit_idiv(int rs) -{ - assem_debug("idiv %%%s\n",regname[rs]); - assert(0); -} -void emit_cdq() -{ - assem_debug("cdq\n"); - assert(0); -} - void emit_clz(int rs,int rt) { assem_debug("clz %s,%s\n",regname[rt],regname[rs]); @@ -2304,12 +2142,6 @@ void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } } -// special case for checking invalid_code -void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm) -{ - assert(0); -} - // special case for checking invalid_code void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) { @@ -2320,13 +2152,6 @@ void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) emit_cmpimm(HOST_TEMPREG,imm); } -// special case for tlb mapping -void emit_addsr12(int rs1,int rs2,int rt) -{ - assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2)); -} - void emit_callne(int a) { assem_debug("blne %x\n",a); @@ -2335,14 +2160,6 @@ void emit_callne(int a) } // Used to preload hash table entries -void emit_prefetch(void *addr) -{ - assem_debug("prefetch %x\n",(int)addr); - output_byte(0x0F); - output_byte(0x18); - output_modrm(0,5,1); - output_w32((int)addr); -} void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); @@ -2357,168 +2174,6 @@ void emit_ldreq_indexed(int rs, u_int offset, int rt) output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); } -void emit_flds(int r,int sr) -{ - assem_debug("flds s%d,[%s]\n",sr,regname[r]); - output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16)); -} - -void emit_vldr(int r,int vr) -{ - assem_debug("vldr d%d,[%s]\n",vr,regname[r]); - output_w32(0xed900b00|(vr<<12)|(r<<16)); -} - -void emit_fsts(int sr,int r) -{ - assem_debug("fsts s%d,[%s]\n",sr,regname[r]); - output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16)); -} - -void emit_vstr(int vr,int r) -{ - assem_debug("vstr d%d,[%s]\n",vr,regname[r]); - output_w32(0xed800b00|(vr<<12)|(r<<16)); -} - -void emit_ftosizs(int s,int d) -{ - assem_debug("ftosizs s%d,s%d\n",d,s); - output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_ftosizd(int s,int d) -{ - assem_debug("ftosizd s%d,d%d\n",d,s); - output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7)); -} - -void emit_fsitos(int s,int d) -{ - assem_debug("fsitos s%d,s%d\n",d,s); - output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fsitod(int s,int d) -{ - assem_debug("fsitod d%d,s%d\n",d,s); - output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fcvtds(int s,int d) -{ - assem_debug("fcvtds d%d,s%d\n",d,s); - output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fcvtsd(int s,int d) -{ - assem_debug("fcvtsd s%d,d%d\n",d,s); - output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7)); -} - -void emit_fsqrts(int s,int d) -{ - assem_debug("fsqrts d%d,s%d\n",d,s); - output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fsqrtd(int s,int d) -{ - assem_debug("fsqrtd s%d,d%d\n",d,s); - output_w32(0xeeb10bc0|((d&7)<<12)|(s&7)); -} - -void emit_fabss(int s,int d) -{ - assem_debug("fabss d%d,s%d\n",d,s); - output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fabsd(int s,int d) -{ - assem_debug("fabsd s%d,d%d\n",d,s); - output_w32(0xeeb00bc0|((d&7)<<12)|(s&7)); -} - -void emit_fnegs(int s,int d) -{ - assem_debug("fnegs d%d,s%d\n",d,s); - output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5)); -} - -void emit_fnegd(int s,int d) -{ - assem_debug("fnegd s%d,d%d\n",d,s); - output_w32(0xeeb10b40|((d&7)<<12)|(s&7)); -} - -void emit_fadds(int s1,int s2,int d) -{ - assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_faddd(int s1,int s2,int d) -{ - assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fsubs(int s1,int s2,int d) -{ - assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fsubd(int s1,int s2,int d) -{ - assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fmuls(int s1,int s2,int d) -{ - assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fmuld(int s1,int s2,int d) -{ - assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fdivs(int s1,int s2,int d) -{ - assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2); - output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5)); -} - -void emit_fdivd(int s1,int s2,int d) -{ - assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2); - output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7)); -} - -void emit_fcmps(int x,int y) -{ - assem_debug("fcmps s14, s15\n"); - output_w32(0xeeb47a67); -} - -void emit_fcmpd(int x,int y) -{ - assem_debug("fcmpd d6, d7\n"); - output_w32(0xeeb46b47); -} - -void emit_fmstat() -{ - assem_debug("fmstat\n"); - output_w32(0xeef1fa10); -} - void emit_bicne_imm(int rs,int imm,int rt) { u_int armval; @@ -2625,28 +2280,6 @@ static void restore_regs(u_int reglist) restore_regs_all(reglist); } -// Write back consts using r14 so we don't disturb the other registers -void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i) -{ - int hr; - for(hr=0;hr=0&&((i_dirty>>hr)&1)) { - if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) { - if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) { - int value=constmap[i][hr]; - if(value==0) { - emit_zeroreg(HOST_TEMPREG); - } - else { - emit_movimm(value,HOST_TEMPREG); - } - emit_storereg(i_regmap[hr],HOST_TEMPREG); - } - } - } - } -} - /* Stubs/epilogue */ void literal_pool(int n) @@ -3282,13 +2915,6 @@ do_cop1stub(int n) emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); } -static int do_tlb_r(int a, ...) { return 0; } -static int do_tlb_r_branch(int a, ...) { return 0; } -static int gen_tlb_addr_r(int a, ...) { return 0; } -static int do_tlb_w(int a, ...) { return 0; } -static int do_tlb_w_branch(int a, ...) { return 0; } -static int gen_tlb_addr_w(int a, ...) { return 0; } - /* Special assem */ void shift_assemble_arm(int i,struct regstat *i_regs) @@ -3609,52 +3235,30 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } - if(!using_tlb) { - if(!c) { - #ifdef RAM_OFFSET - map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif - emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR - }else{ - emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR - } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + emit_shlimm(addr,3,temp); + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR + }else{ + emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - else { - if(ram_offset&&memtarget) { - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR - }else{ - emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR - } + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + } + else { + if(ram_offset&&memtarget) { + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; } - }else{ // using tlb - int a; - if(c) { - a=-1; - }else if (opcode[i]==0x22||opcode[i]==0x26) { - a=0xFFFFFFFC; // LWL/LWR + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR }else{ - a=0xFFFFFFF8; // LDL/LDR - } - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<is32|=1LL<=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU @@ -2563,7 +2521,6 @@ void load_assemble(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if (c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } //printf("load_assemble: c=%d\n",c); @@ -2586,34 +2543,22 @@ void load_assemble(int i,struct regstat *i_regs) assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O reglist&=~(1<=0) reglist&=~(1<regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif -//#define R29_HACK 1 - #ifdef R29_HACK - // Strmnnrmn's speed hack - if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) - #endif - { - jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); - } + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + #ifdef R29_HACK + // Strmnnrmn's speed hack + if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE) + #endif + { + jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override); } - else if(ram_offset&&memtarget) { - emit_addimm(addr,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - }else{ // using tlb - int x=0; - if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU - if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU - map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<regmap,rt1[i])); // ignore loads to r0 and unneeded reg if (opcode[i]==0x20) { // LB @@ -2626,7 +2571,6 @@ void load_assemble(int i,struct regstat *i_regs) #endif { //emit_xorimm(addr,3,tl); - //gen_tlb_addr_r(tl,map); //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl); int x=0,a=tl; #ifdef BIG_ENDIAN_MIPS @@ -2667,7 +2611,6 @@ void load_assemble(int i,struct regstat *i_regs) //emit_movswl_indexed_tlb(x,tl,map,tl); //else if(map>=0) { - gen_tlb_addr_r(a,map); emit_movswl_indexed(x,a,tl); }else{ #if 1 //def RAM_OFFSET @@ -2713,7 +2656,6 @@ void load_assemble(int i,struct regstat *i_regs) #endif { //emit_xorimm(addr,3,tl); - //gen_tlb_addr_r(tl,map); //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl); int x=0,a=tl; #ifdef BIG_ENDIAN_MIPS @@ -2754,7 +2696,6 @@ void load_assemble(int i,struct regstat *i_regs) //emit_movzwl_indexed_tlb(x,tl,map,tl); //#else if(map>=0) { - gen_tlb_addr_r(a,map); emit_movzwl_indexed(x,a,tl); }else{ #if 1 //def RAM_OFFSET @@ -2798,7 +2739,6 @@ void load_assemble(int i,struct regstat *i_regs) if(!dummy) { int a=addr; if(fastload_reg_override) a=fastload_reg_override; - //gen_tlb_addr_r(tl,map); //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th); //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl); #ifdef HOST_IMM_ADDR32 @@ -2874,7 +2814,6 @@ void store_assemble(int i,struct regstat *i_regs) c=(i_regs->wasconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } assert(tl>=0); @@ -2885,23 +2824,12 @@ void store_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<regmap,TLREG); - assert(map>=0); - reglist&=~(1<=0) { - gen_tlb_addr_w(a,map); emit_writehword_indexed(tl,x,a); }else //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a); @@ -2975,7 +2901,7 @@ void store_assemble(int i,struct regstat *i_regs) add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist); jaddr=0; } - if(!using_tlb&&!(i_regs->waswritten&(1<waswritten&(1<isconst>>s)&1; if(c) { memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; } } assert(tl>=0); @@ -3086,41 +3011,26 @@ void storelr_assemble(int i,struct regstat *i_regs) if(i_regs->regmap[hr]>=0) reglist|=1<=0); - if(!using_tlb) { - if(!c) { - emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); - if(!offset&&s!=temp) emit_mov(s,temp); - jaddr=(int)out; - emit_jno(0); - } - else - { - if(!memtarget||!rs1[i]) { - jaddr=(int)out; - emit_jmp(0); - } - } - #ifdef RAM_OFFSET - int map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - gen_tlb_addr_w(temp,map); - #else - if((u_int)rdram!=0x80000000) - emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); - #endif - }else{ // using tlb - int map=get_reg(i_regs->regmap,TLREG); - assert(map>=0); - reglist&=~(1<=0) emit_mov(s,temp); - do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr); - if(!jaddr&&!memtarget) { + if(!c) { + emit_cmpimm(s<0||offset?temp:s,RAM_SIZE); + if(!offset&&s!=temp) emit_mov(s,temp); + jaddr=(int)out; + emit_jno(0); + } + else + { + if(!memtarget||!rs1[i]) { jaddr=(int)out; emit_jmp(0); } - gen_tlb_addr_w(temp,map); } + #ifdef RAM_OFFSET + int map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #else + if((u_int)rdram!=0x80000000) + emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); + #endif if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR temp2=get_reg(i_regs->regmap,FTEMP); @@ -3263,7 +3173,7 @@ void storelr_assemble(int i,struct regstat *i_regs) } if(!c||!memtarget) add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist); - if(!using_tlb&&!(i_regs->waswritten&(1<waswritten&(1<regmap,ROREG); if(map<0) map=HOST_TEMPREG; @@ -3322,7 +3232,6 @@ void c2ls_assemble(int i,struct regstat *i_regs) offset=imm[i]; assert(rs1[i]>0); assert(tl>=0); - assert(!using_tlb); for(hr=0;hrregmap[hr]>=0) reglist|=1<regmap,rt1[i]); if(ra<0) ra=get_reg(i_regs->regmap,-1); @@ -3711,17 +3619,11 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } int rs=get_reg(i_regs->regmap,rs1[i]); - int rm=get_reg(i_regs->regmap,TLREG); if(ra>=0) { int offset=imm[i]; int c=(i_regs->wasconst>>rs)&1; if(rs1[i]==0) { // Using r0 as a base address - /*if(rm>=0) { - if(!entry||entry[rm]!=mgr) { - generate_map_const(offset,rm); - } // else did it in the previous cycle - }*/ if(!entry||entry[ra]!=agr) { if (opcode[i]==0x22||opcode[i]==0x26) { emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR @@ -3747,8 +3649,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR }else{ #ifdef HOST_IMM_ADDR32 - if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2 - (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000)) + if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2 #endif emit_movimm(constmap[i][rs]+offset,ra); regs[i].loadedconst|=1<=(signed int)0xC0000000)) + if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2 #endif emit_movimm(constmap[i+1][rs]+offset,ra); regs[i+1].loadedconst|=1<>hr)&1)) { - #ifdef HOST_IMM_ADDR32 - if(!using_tlb||((signed int)constmap[i][hr]+imm[i+2])<(signed int)0xC0000000) return 0; - #endif // Precompute load address *value=constmap[i][hr]+imm[i+2]; return 1; @@ -3832,9 +3729,6 @@ int get_final_value(int hr, int i, int *value) } if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg) { - #ifdef HOST_IMM_ADDR32 - if(!using_tlb||((signed int)constmap[i][hr]+imm[i+1])<(signed int)0xC0000000) return 0; - #endif // Precompute load address *value=constmap[i][hr]+imm[i+1]; //printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]); @@ -9028,12 +8922,6 @@ int new_recompile_block(int addr) d1=dep1[i+1]; d2=dep2[i+1]; } - if(using_tlb) { - if(itype[i+1]==LOAD || itype[i+1]==LOADLR || - itype[i+1]==STORE || itype[i+1]==STORELR || - itype[i+1]==C1LS || itype[i+1]==C2LS) - map=TLREG; - } else if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; @@ -9088,12 +8976,7 @@ int new_recompile_block(int addr) d1=dep1[i]; d2=dep2[i]; } - if(using_tlb) { - if(itype[i]==LOAD || itype[i]==LOADLR || - itype[i]==STORE || itype[i]==STORELR || - itype[i]==C1LS || itype[i]==C2LS) - map=TLREG; - } else if(itype[i]==STORE || itype[i]==STORELR || + if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2 map=INVCP; } @@ -9483,14 +9366,14 @@ int new_recompile_block(int addr) // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET - if(using_tlb) + if(0) #endif { int earliest_available[HOST_REGS]; int loop_start[HOST_REGS]; int score[HOST_REGS]; int end[HOST_REGS]; - int reg=using_tlb?MMREG:ROREG; + int reg=ROREG; // Init for(hr=0;hr=0) { - int sr=get_reg(regs[i+1].regmap,rs1[i+1]); - if(sr>=0&&((regs[i+1].wasconst>>sr)&1)) { - int nr; - if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) - { - regs[i].regmap[hr]=MGEN1+((i+1)&1); - regmap_pre[i+1][hr]=MGEN1+((i+1)&1); - regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1); - regs[i].isconst&=~(1<=0) - { - // move it to another register - regs[i+1].regmap[hr]=-1; - regmap_pre[i+2][hr]=-1; - regs[i+1].regmap[nr]=TLREG; - regmap_pre[i+2][nr]=TLREG; - regs[i].regmap[nr]=MGEN1+((i+1)&1); - regmap_pre[i+1][nr]=MGEN1+((i+1)&1); - regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1); - regs[i].isconst&=~(1<>5)&1) printf("ebp "); if((needed_reg[i]>>6)&1) printf("esi "); if((needed_reg[i]>>7)&1) printf("edi "); - printf("r:"); - for(r=0;r<=CCREG;r++) { - //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((requires_32bit[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } printf("\n"); - /*printf("pr:"); - for(r=0;r<=CCREG;r++) { - //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) { - if((pr32[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - if(pr32[i]!=requires_32bit[i]) printf(" OOPS"); - printf("\n");*/ #if defined(__i386__) || defined(__x86_64__) printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]); printf("dirty: "); @@ -10014,17 +9835,6 @@ int new_recompile_block(int addr) #endif printf("\n"); } - /*printf(" p32:"); - for(r=0;r<=CCREG;r++) { - if((p32[i]>>r)&1) { - if(r==CCREG) printf(" CC"); - else if(r==HIREG) printf(" HI"); - else if(r==LOREG) printf(" LO"); - else printf(" r%d",r); - } - } - if(p32[i]!=regs[i].is32) printf(" NO MATCH\n"); - else printf("\n");*/ if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) { #if defined(__i386__) || defined(__x86_64__) printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); -- cgit v1.2.3 From 9f51b4b9aa4fffa7b9411ca274f5dfb179a6a30a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 20:10:06 +0300 Subject: drc: strip eol blanks Because people send patches while using editors that strip blanks and the patch becomes a mess. --- libpcsxcore/new_dynarec/assem_arm.c | 44 ++++++------- libpcsxcore/new_dynarec/new_dynarec.c | 118 +++++++++++++++++----------------- 2 files changed, 81 insertions(+), 81 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 20a6956..592cc88 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -186,8 +186,8 @@ add_literal(int addr,int val) assert(literalcountu>>reg)&1) return; - + // see if it's already allocated for(hr=0;hrregmap[hr]==reg) return; } - + // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); - + // Try to allocate the preferred register if(cur->regmap[preferred_reg]==-1) { cur->regmap[preferred_reg]=reg; @@ -348,7 +348,7 @@ void alloc_reg(struct regstat *cur,int i,signed char reg) cur->isconst&=~(1<uu>>reg)&1) return; - + // see if the upper half is already allocated for(hr=0;hrregmap[hr]==reg+64) return; } - + // Keep the same mapping if the register was already allocated in a loop preferred_reg = loop_reg(i,reg,preferred_reg); - + // Try to allocate the preferred register if(cur->regmap[preferred_reg]==-1) { cur->regmap[preferred_reg]=reg|64; @@ -514,7 +514,7 @@ void alloc_reg64(struct regstat *cur,int i,signed char reg) cur->isconst&=~(1<regmap[hr]==reg) return; } - + // Try to allocate any available register for(hr=HOST_REGS-1;hr>=0;hr--) { if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { @@ -663,7 +663,7 @@ void alloc_reg_temp(struct regstat *cur,int i,signed char reg) return; } } - + // Find an unneeded register for(hr=HOST_REGS-1;hr>=0;hr--) { @@ -692,7 +692,7 @@ void alloc_reg_temp(struct regstat *cur,int i,signed char reg) } } } - + // Ok, now we have to evict someone // Pick a register we hopefully won't need soon // TODO: we might want to follow unconditional jumps here @@ -767,7 +767,7 @@ void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr) { int n; int dirty=0; - + // see if it's already allocated (and dealloc it) for(n=0;nregmap[n]=-1; } } - + cur->regmap[hr]=reg; cur->dirty&=~(1<dirty|=dirty<dirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -574,7 +574,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -677,7 +677,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -789,7 +789,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -885,7 +885,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; @@ -3028,7 +3028,7 @@ void storelr_assemble(int i,struct regstat *i_regs) int map=get_reg(i_regs->regmap,ROREG); if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif @@ -3600,7 +3600,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int agr=AGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4120,7 +4120,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -4554,7 +4554,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -4588,7 +4588,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -4614,7 +4614,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4635,10 +4635,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -4689,7 +4689,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4718,7 +4718,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -4850,7 +4850,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -4905,7 +4905,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -4977,7 +4977,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5107,7 +5107,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5290,7 +5290,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -5377,7 +5377,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -5625,7 +5625,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6133,14 +6133,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -6730,7 +6730,7 @@ void clean_registers(int istart,int iend,int wr) if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<istart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7140,7 +7140,7 @@ int new_recompile_block(int addr) assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -7196,7 +7196,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -7849,7 +7849,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -7876,7 +7876,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;i--) { int hr; @@ -8892,7 +8892,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -9071,10 +9071,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -9244,10 +9244,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -9320,7 +9320,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -9362,7 +9362,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -9542,7 +9542,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -9677,7 +9677,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -9701,10 +9701,10 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ for (i=slen-1;i>=0;i--) { @@ -10165,15 +10165,15 @@ int new_recompile_block(int addr) //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4); memcpy(copy,source,slen*4); copy+=slen*4; - + #ifdef __arm__ __clear_cache((void *)beginning,out); #endif - + // If we're within 256K of the end of the buffer, // start over from the beginning. (Is 256K enough?) if((u_int)out>(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -10186,9 +10186,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -10230,7 +10230,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From e2b5e7aa45f75cd13ef238fa4ff9516891dabef5 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 18 Sep 2016 23:43:04 +0300 Subject: drc: mark things static --- libpcsxcore/new_dynarec/assem_arm.c | 521 ++++++++++++++++++++-------------- libpcsxcore/new_dynarec/new_dynarec.c | 135 ++++----- 2 files changed, 374 insertions(+), 282 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 592cc88..6bd9e49 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -38,6 +38,8 @@ char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #define CALLER_SAVE_REGS 0x120f #endif +#define unused __attribute__((unused)) + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -112,11 +114,11 @@ const u_int invalidate_addr_reg[16] = { 0, 0}; -unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; +static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; /* Linker */ -void set_jump_target(int addr,u_int target) +static void set_jump_target(int addr,u_int target) { u_char *ptr=(u_char *)addr; u_int *ptr2=(u_int *)ptr; @@ -150,7 +152,8 @@ void set_jump_target(int addr,u_int target) // This optionally copies the instruction from the target of the branch into // the space before the branch. Works, but the difference in speed is // usually insignificant. -void set_jump_target_fillslot(int addr,u_int target,int copy) +#if 0 +static void set_jump_target_fillslot(int addr,u_int target,int copy) { u_char *ptr=(u_char *)addr; u_int *ptr2=(u_int *)ptr; @@ -179,9 +182,10 @@ void set_jump_target_fillslot(int addr,u_int target,int copy) *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); } } +#endif /* Literal pool */ -add_literal(int addr,int val) +static void add_literal(int addr,int val) { assert(literalcount=33554432) { @@ -865,96 +875,99 @@ u_int genjmp(u_int addr) return ((u_int)offset>>2)&0xffffff; } -void emit_mov(int rs,int rt) +static void emit_mov(int rs,int rt) { assem_debug("mov %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); } -void emit_movs(int rs,int rt) +static void emit_movs(int rs,int rt) { assem_debug("movs %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); } -void emit_add(int rs1,int rs2,int rt) +static void emit_add(int rs1,int rs2,int rt) { assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_adds(int rs1,int rs2,int rt) +static void emit_adds(int rs1,int rs2,int rt) { assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_adcs(int rs1,int rs2,int rt) +static void emit_adcs(int rs1,int rs2,int rt) { assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_sbc(int rs1,int rs2,int rt) +static void emit_sbc(int rs1,int rs2,int rt) { assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_sbcs(int rs1,int rs2,int rt) +static void emit_sbcs(int rs1,int rs2,int rt) { assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_neg(int rs, int rt) +static void emit_neg(int rs, int rt) { assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); } -void emit_negs(int rs, int rt) +static void emit_negs(int rs, int rt) { assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); } -void emit_sub(int rs1,int rs2,int rt) +static void emit_sub(int rs1,int rs2,int rt) { assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); } -void emit_subs(int rs1,int rs2,int rt) +static void emit_subs(int rs1,int rs2,int rt) { assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); } -void emit_zeroreg(int rt) +static void emit_zeroreg(int rt) { assem_debug("mov %s,#0\n",regname[rt]); output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); } -void emit_loadlp(u_int imm,u_int rt) +static void emit_loadlp(u_int imm,u_int rt) { add_literal((int)out,imm); assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); output_w32(0xe5900000|rd_rn_rm(rt,15,0)); } -void emit_movw(u_int imm,u_int rt) + +static void emit_movw(u_int imm,u_int rt) { assert(imm<65536); assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); } -void emit_movt(u_int imm,u_int rt) + +static void emit_movt(u_int imm,u_int rt) { assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); } -void emit_movimm(u_int imm,u_int rt) + +static void emit_movimm(u_int imm,u_int rt) { u_int armval; if(genimm(imm,&armval)) { @@ -981,13 +994,14 @@ void emit_movimm(u_int imm,u_int rt) #endif } } -void emit_pcreladdr(u_int rt) + +static void emit_pcreladdr(u_int rt) { assem_debug("add %s,pc,#?\n",regname[rt]); output_w32(0xe2800000|rd_rn_rm(rt,15,0)); } -void emit_loadreg(int r, int hr) +static void emit_loadreg(int r, int hr) { if(r&64) { SysPrintf("64bit load in 32bit mode!\n"); @@ -1010,7 +1024,8 @@ void emit_loadreg(int r, int hr) output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); } } -void emit_storereg(int r, int hr) + +static void emit_storereg(int r, int hr) { if(r&64) { SysPrintf("64bit store in 32bit mode!\n"); @@ -1028,13 +1043,13 @@ void emit_storereg(int r, int hr) output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); } -void emit_test(int rs, int rt) +static void emit_test(int rs, int rt) { assem_debug("tst %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); } -void emit_testimm(int rs,int imm) +static void emit_testimm(int rs,int imm) { u_int armval; assem_debug("tst %s,#%d\n",regname[rs],imm); @@ -1042,7 +1057,7 @@ void emit_testimm(int rs,int imm) output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); } -void emit_testeqimm(int rs,int imm) +static void emit_testeqimm(int rs,int imm) { u_int armval; assem_debug("tsteq %s,$%d\n",regname[rs],imm); @@ -1050,36 +1065,37 @@ void emit_testeqimm(int rs,int imm) output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); } -void emit_not(int rs,int rt) +static void emit_not(int rs,int rt) { assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); } -void emit_mvnmi(int rs,int rt) +static void emit_mvnmi(int rs,int rt) { assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); } -void emit_and(u_int rs1,u_int rs2,u_int rt) +static void emit_and(u_int rs1,u_int rs2,u_int rt) { assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); } -void emit_or(u_int rs1,u_int rs2,u_int rt) +static void emit_or(u_int rs1,u_int rs2,u_int rt) { assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_or_and_set_flags(int rs1,int rs2,int rt) + +static void emit_or_and_set_flags(int rs1,int rs2,int rt) { assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) +static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1088,7 +1104,7 @@ void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); } -void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) +static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1097,13 +1113,13 @@ void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); } -void emit_xor(u_int rs1,u_int rs2,u_int rt) +static void emit_xor(u_int rs1,u_int rs2,u_int rt) { assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); } -void emit_addimm(u_int rs,int imm,u_int rt) +static void emit_addimm(u_int rs,int imm,u_int rt) { assert(rs<16); assert(rt<16); @@ -1144,7 +1160,7 @@ void emit_addimm(u_int rs,int imm,u_int rt) else if(rs!=rt) emit_mov(rs,rt); } -void emit_addimm_and_set_flags(int imm,int rt) +static void emit_addimm_and_set_flags(int imm,int rt) { assert(imm>-65536&&imm<65536); u_int armval; @@ -1166,19 +1182,20 @@ void emit_addimm_and_set_flags(int imm,int rt) output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); } } -void emit_addimm_no_flags(u_int imm,u_int rt) + +static void emit_addimm_no_flags(u_int imm,u_int rt) { emit_addimm(rt,imm,rt); } -void emit_addnop(u_int r) +static void emit_addnop(u_int r) { assert(r<16); assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); output_w32(0xe2800000|rd_rn_rm(r,r,0)); } -void emit_adcimm(u_int rs,int imm,u_int rt) +static void emit_adcimm(u_int rs,int imm,u_int rt) { u_int armval; genimm_checked(imm,&armval); @@ -1186,7 +1203,7 @@ void emit_adcimm(u_int rs,int imm,u_int rt) output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_rscimm(int rs,int imm,u_int rt) +static void emit_rscimm(int rs,int imm,u_int rt) { assert(0); u_int armval; @@ -1195,7 +1212,7 @@ void emit_rscimm(int rs,int imm,u_int rt) output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) +static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) { // TODO: if(genimm(imm,&armval)) ... // else @@ -1204,7 +1221,7 @@ void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) emit_adcimm(rsh,0,rth); } -void emit_andimm(int rs,int imm,int rt) +static void emit_andimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1240,7 +1257,7 @@ void emit_andimm(int rs,int imm,int rt) } } -void emit_orimm(int rs,int imm,int rt) +static void emit_orimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1257,7 +1274,7 @@ void emit_orimm(int rs,int imm,int rt) } } -void emit_xorimm(int rs,int imm,int rt) +static void emit_xorimm(int rs,int imm,int rt) { u_int armval; if(imm==0) { @@ -1274,7 +1291,7 @@ void emit_xorimm(int rs,int imm,int rt) } } -void emit_shlimm(int rs,u_int imm,int rt) +static void emit_shlimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1283,7 +1300,7 @@ void emit_shlimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_lsls_imm(int rs,int imm,int rt) +static void emit_lsls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1291,7 +1308,7 @@ void emit_lsls_imm(int rs,int imm,int rt) output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_lslpls_imm(int rs,int imm,int rt) +static unused void emit_lslpls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1299,7 +1316,7 @@ void emit_lslpls_imm(int rs,int imm,int rt) output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -void emit_shrimm(int rs,u_int imm,int rt) +static void emit_shrimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1307,7 +1324,7 @@ void emit_shrimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_sarimm(int rs,u_int imm,int rt) +static void emit_sarimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1315,7 +1332,7 @@ void emit_sarimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); } -void emit_rorimm(int rs,u_int imm,int rt) +static void emit_rorimm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -1323,7 +1340,7 @@ void emit_rorimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); } -void emit_shldimm(int rs,int rs2,u_int imm,int rt) +static void emit_shldimm(int rs,int rs2,u_int imm,int rt) { assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); assert(imm>0); @@ -1335,7 +1352,7 @@ void emit_shldimm(int rs,int rs2,u_int imm,int rt) output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); } -void emit_shrdimm(int rs,int rs2,u_int imm,int rt) +static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) { assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); assert(imm>0); @@ -1347,7 +1364,7 @@ void emit_shrdimm(int rs,int rs2,u_int imm,int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); } -void emit_signextend16(int rs,int rt) +static void emit_signextend16(int rs,int rt) { #ifndef HAVE_ARMV6 emit_shlimm(rs,16,rt); @@ -1358,7 +1375,7 @@ void emit_signextend16(int rs,int rt) #endif } -void emit_signextend8(int rs,int rt) +static void emit_signextend8(int rs,int rt) { #ifndef HAVE_ARMV6 emit_shlimm(rs,24,rt); @@ -1369,7 +1386,7 @@ void emit_signextend8(int rs,int rt) #endif } -void emit_shl(u_int rs,u_int shift,u_int rt) +static void emit_shl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1378,7 +1395,8 @@ void emit_shl(u_int rs,u_int shift,u_int rt) assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); } -void emit_shr(u_int rs,u_int shift,u_int rt) + +static void emit_shr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1386,7 +1404,8 @@ void emit_shr(u_int rs,u_int shift,u_int rt) assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); } -void emit_sar(u_int rs,u_int shift,u_int rt) + +static void emit_sar(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1395,7 +1414,7 @@ void emit_sar(u_int rs,u_int shift,u_int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -void emit_orrshl(u_int rs,u_int shift,u_int rt) +static void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1403,7 +1422,8 @@ void emit_orrshl(u_int rs,u_int shift,u_int rt) assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); } -void emit_orrshr(u_int rs,u_int shift,u_int rt) + +static void emit_orrshr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -1412,7 +1432,7 @@ void emit_orrshr(u_int rs,u_int shift,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); } -void emit_cmpimm(int rs,int imm) +static void emit_cmpimm(int rs,int imm) { u_int armval; if(genimm(imm,&armval)) { @@ -1434,70 +1454,79 @@ void emit_cmpimm(int rs,int imm) } } -void emit_cmovne_imm(int imm,int rt) +static void emit_cmovne_imm(int imm,int rt) { assem_debug("movne %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovl_imm(int imm,int rt) + +static void emit_cmovl_imm(int imm,int rt) { assem_debug("movlt %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovb_imm(int imm,int rt) + +static void emit_cmovb_imm(int imm,int rt) { assem_debug("movcc %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmovs_imm(int imm,int rt) + +static void emit_cmovs_imm(int imm,int rt) { assem_debug("movmi %s,#%d\n",regname[rt],imm); u_int armval; genimm_checked(imm,&armval); output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -void emit_cmove_reg(int rs,int rt) + +static void emit_cmove_reg(int rs,int rt) { assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovne_reg(int rs,int rt) + +static void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovl_reg(int rs,int rt) + +static void emit_cmovl_reg(int rs,int rt) { assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); } -void emit_cmovs_reg(int rs,int rt) + +static void emit_cmovs_reg(int rs,int rt) { assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); } -void emit_slti32(int rs,int imm,int rt) +static void emit_slti32(int rs,int imm,int rt) { if(rs!=rt) emit_zeroreg(rt); emit_cmpimm(rs,imm); if(rs==rt) emit_movimm(0,rt); emit_cmovl_imm(1,rt); } -void emit_sltiu32(int rs,int imm,int rt) + +static void emit_sltiu32(int rs,int imm,int rt) { if(rs!=rt) emit_zeroreg(rt); emit_cmpimm(rs,imm); if(rs==rt) emit_movimm(0,rt); emit_cmovb_imm(1,rt); } -void emit_slti64_32(int rsh,int rsl,int imm,int rt) + +static void emit_slti64_32(int rsh,int rsl,int imm,int rt) { assert(rsh!=rt); emit_slti32(rsl,imm,rt); @@ -1514,7 +1543,8 @@ void emit_slti64_32(int rsh,int rsl,int imm,int rt) emit_cmovl_imm(1,rt); } } -void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) + +static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) { assert(rsh!=rt); emit_sltiu32(rsl,imm,rt); @@ -1530,26 +1560,29 @@ void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) } } -void emit_cmp(int rs,int rt) +static void emit_cmp(int rs,int rt) { assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); } -void emit_set_gz32(int rs, int rt) + +static void emit_set_gz32(int rs, int rt) { //assem_debug("set_gz32\n"); emit_cmpimm(rs,1); emit_movimm(1,rt); emit_cmovl_imm(0,rt); } -void emit_set_nz32(int rs, int rt) + +static void emit_set_nz32(int rs, int rt) { //assem_debug("set_nz32\n"); if(rs!=rt) emit_movs(rs,rt); else emit_test(rs,rs); emit_cmovne_imm(1,rt); } -void emit_set_gz64_32(int rsh, int rsl, int rt) + +static void emit_set_gz64_32(int rsh, int rsl, int rt) { //assem_debug("set_gz64\n"); emit_set_gz32(rsl,rt); @@ -1557,13 +1590,15 @@ void emit_set_gz64_32(int rsh, int rsl, int rt) emit_cmovne_imm(1,rt); emit_cmovs_imm(0,rt); } -void emit_set_nz64_32(int rsh, int rsl, int rt) + +static void emit_set_nz64_32(int rsh, int rsl, int rt) { //assem_debug("set_nz64\n"); emit_or_and_set_flags(rsh,rsl,rt); emit_cmovne_imm(1,rt); } -void emit_set_if_less32(int rs1, int rs2, int rt) + +static void emit_set_if_less32(int rs1, int rs2, int rt) { //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); @@ -1571,7 +1606,8 @@ void emit_set_if_less32(int rs1, int rs2, int rt) if(rs1==rt||rs2==rt) emit_movimm(0,rt); emit_cmovl_imm(1,rt); } -void emit_set_if_carry32(int rs1, int rs2, int rt) + +static void emit_set_if_carry32(int rs1, int rs2, int rt) { //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); @@ -1579,7 +1615,8 @@ void emit_set_if_carry32(int rs1, int rs2, int rt) if(rs1==rt||rs2==rt) emit_movimm(0,rt); emit_cmovb_imm(1,rt); } -void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) + +static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) { //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); assert(u1!=rt); @@ -1589,7 +1626,8 @@ void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) emit_sbcs(u1,u2,HOST_TEMPREG); emit_cmovl_imm(1,rt); } -void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) + +static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) { //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); assert(u1!=rt); @@ -1600,86 +1638,97 @@ void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) emit_cmovb_imm(1,rt); } -void emit_call(int a) +static void emit_call(int a) { assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); u_int offset=genjmp(a); output_w32(0xeb000000|offset); } -void emit_jmp(int a) + +static void emit_jmp(int a) { assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); u_int offset=genjmp(a); output_w32(0xea000000|offset); } -void emit_jne(int a) + +static void emit_jne(int a) { assem_debug("bne %x\n",a); u_int offset=genjmp(a); output_w32(0x1a000000|offset); } -void emit_jeq(int a) + +static void emit_jeq(int a) { assem_debug("beq %x\n",a); u_int offset=genjmp(a); output_w32(0x0a000000|offset); } -void emit_js(int a) + +static void emit_js(int a) { assem_debug("bmi %x\n",a); u_int offset=genjmp(a); output_w32(0x4a000000|offset); } -void emit_jns(int a) + +static void emit_jns(int a) { assem_debug("bpl %x\n",a); u_int offset=genjmp(a); output_w32(0x5a000000|offset); } -void emit_jl(int a) + +static void emit_jl(int a) { assem_debug("blt %x\n",a); u_int offset=genjmp(a); output_w32(0xba000000|offset); } -void emit_jge(int a) + +static void emit_jge(int a) { assem_debug("bge %x\n",a); u_int offset=genjmp(a); output_w32(0xaa000000|offset); } -void emit_jno(int a) + +static void emit_jno(int a) { assem_debug("bvc %x\n",a); u_int offset=genjmp(a); output_w32(0x7a000000|offset); } -void emit_jc(int a) + +static void emit_jc(int a) { assem_debug("bcs %x\n",a); u_int offset=genjmp(a); output_w32(0x2a000000|offset); } -void emit_jcc(int a) + +static void emit_jcc(int a) { assem_debug("bcc %x\n",a); u_int offset=genjmp(a); output_w32(0x3a000000|offset); } -void emit_callreg(u_int r) +static void emit_callreg(u_int r) { assert(r<15); assem_debug("blx %s\n",regname[r]); output_w32(0xe12fff30|r); } -void emit_jmpreg(u_int r) + +static void emit_jmpreg(u_int r) { assem_debug("mov pc,%s\n",regname[r]); output_w32(0xe1a00000|rd_rn_rm(15,0,r)); } -void emit_readword_indexed(int offset, int rs, int rt) +static void emit_readword_indexed(int offset, int rs, int rt) { assert(offset>-4096&&offset<4096); assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1689,37 +1738,44 @@ void emit_readword_indexed(int offset, int rs, int rt) output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_readword_dualindexedx4(int rs1, int rs2, int rt) + +static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) { assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); } -void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) + +static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) { assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); } -void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_readword_indexed(addr, rs, rt); else { @@ -1727,7 +1783,8 @@ void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) emit_readword_dualindexedx4(rs, map, rt); } } -void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) + +static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) { if(map<0) { if(rh>=0) emit_readword_indexed(addr, rs, rh); @@ -1739,7 +1796,8 @@ void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) emit_readword_indexed_tlb(addr, rs, map, rl); } } -void emit_movsbl_indexed(int offset, int rs, int rt) + +static void emit_movsbl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1749,7 +1807,8 @@ void emit_movsbl_indexed(int offset, int rs, int rt) output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_movsbl_indexed(addr, rs, rt); else { @@ -1765,7 +1824,8 @@ void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) } } } -void emit_movswl_indexed(int offset, int rs, int rt) + +static void emit_movswl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1775,7 +1835,8 @@ void emit_movswl_indexed(int offset, int rs, int rt) output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_movzbl_indexed(int offset, int rs, int rt) + +static void emit_movzbl_indexed(int offset, int rs, int rt) { assert(offset>-4096&&offset<4096); assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1785,12 +1846,14 @@ void emit_movzbl_indexed(int offset, int rs, int rt) output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) + +static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) { assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) + +static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_movzbl_indexed(addr, rs, rt); else { @@ -1802,7 +1865,8 @@ void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) } } } -void emit_movzwl_indexed(int offset, int rs, int rt) + +static void emit_movzwl_indexed(int offset, int rs, int rt) { assert(offset>-256&&offset<256); assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1812,6 +1876,7 @@ void emit_movzwl_indexed(int offset, int rs, int rt) output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } + static void emit_ldrd(int offset, int rs, int rt) { assert(offset>-256&&offset<256); @@ -1822,35 +1887,40 @@ static void emit_ldrd(int offset, int rs, int rt) output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_readword(int addr, int rt) + +static void emit_readword(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("ldr %s,fp+%d\n",regname[rt],offset); output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); } -void emit_movsbl(int addr, int rt) + +static unused void emit_movsbl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movswl(int addr, int rt) + +static unused void emit_movswl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_movzbl(int addr, int rt) + +static unused void emit_movzbl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_movzwl(int addr, int rt) + +static unused void emit_movzwl(int addr, int rt) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); @@ -1858,7 +1928,7 @@ void emit_movzwl(int addr, int rt) output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_writeword_indexed(int rt, int offset, int rs) +static void emit_writeword_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1868,12 +1938,14 @@ void emit_writeword_indexed(int rt, int offset, int rs) output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) + +static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) { assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) + +static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) { if(map<0) emit_writeword_indexed(rt, addr, rs); else { @@ -1881,7 +1953,8 @@ void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) emit_writeword_dualindexedx4(rt, rs, map); } } -void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) + +static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) { if(map<0) { if(rh>=0) emit_writeword_indexed(rh, addr, rs); @@ -1897,7 +1970,8 @@ void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int } } } -void emit_writehword_indexed(int rt, int offset, int rs) + +static void emit_writehword_indexed(int rt, int offset, int rs) { assert(offset>-256&&offset<256); assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1907,7 +1981,8 @@ void emit_writehword_indexed(int rt, int offset, int rs) output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); } } -void emit_writebyte_indexed(int rt, int offset, int rs) + +static void emit_writebyte_indexed(int rt, int offset, int rs) { assert(offset>-4096&&offset<4096); assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); @@ -1917,12 +1992,14 @@ void emit_writebyte_indexed(int rt, int offset, int rs) output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); } } -void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) + +static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) { assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); } -void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) + +static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) { if(map<0) emit_writebyte_indexed(rt, addr, rs); else { @@ -1934,36 +2011,42 @@ void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) } } } -void emit_strcc_dualindexed(int rs1, int rs2, int rt) + +static void emit_strcc_dualindexed(int rs1, int rs2, int rt) { assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_strccb_dualindexed(int rs1, int rs2, int rt) + +static void emit_strccb_dualindexed(int rs1, int rs2, int rt) { assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); } -void emit_strcch_dualindexed(int rs1, int rs2, int rt) + +static void emit_strcch_dualindexed(int rs1, int rs2, int rt) { assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); } -void emit_writeword(int rt, int addr) + +static void emit_writeword(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); assem_debug("str %s,fp+%d\n",regname[rt],offset); output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); } -void emit_writehword(int rt, int addr) + +static unused void emit_writehword(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<256); assem_debug("strh %s,fp+%d\n",regname[rt],offset); output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); } -void emit_writebyte(int rt, int addr) + +static unused void emit_writebyte(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; assert(offset<4096); @@ -1971,7 +2054,7 @@ void emit_writebyte(int rt, int addr) output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); } -void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); assert(rs1<16); @@ -1980,7 +2063,8 @@ void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) assert(lo<16); output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) + +static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) { assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); assert(rs1<16); @@ -1990,19 +2074,19 @@ void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); } -void emit_clz(int rs,int rt) +static void emit_clz(int rs,int rt) { assem_debug("clz %s,%s\n",regname[rt],regname[rs]); output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); } -void emit_subcs(int rs1,int rs2,int rt) +static void emit_subcs(int rs1,int rs2,int rt) { assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); } -void emit_shrcc_imm(int rs,u_int imm,int rt) +static void emit_shrcc_imm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -2010,7 +2094,7 @@ void emit_shrcc_imm(int rs,u_int imm,int rt) output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_shrne_imm(int rs,u_int imm,int rt) +static void emit_shrne_imm(int rs,u_int imm,int rt) { assert(imm>0); assert(imm<32); @@ -2018,73 +2102,73 @@ void emit_shrne_imm(int rs,u_int imm,int rt) output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); } -void emit_negmi(int rs, int rt) +static void emit_negmi(int rs, int rt) { assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0x42600000|rd_rn_rm(rt,rs,0)); } -void emit_negsmi(int rs, int rt) +static void emit_negsmi(int rs, int rt) { assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); output_w32(0x42700000|rd_rn_rm(rt,rs,0)); } -void emit_orreq(u_int rs1,u_int rs2,u_int rt) +static void emit_orreq(u_int rs1,u_int rs2,u_int rt) { assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_orrne(u_int rs1,u_int rs2,u_int rt) +static void emit_orrne(u_int rs1,u_int rs2,u_int rt) { assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); } -void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); } -void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) { assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); } -void emit_teq(int rs, int rt) +static void emit_teq(int rs, int rt) { assem_debug("teq %s,%s\n",regname[rs],regname[rt]); output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); } -void emit_rsbimm(int rs, int imm, int rt) +static void emit_rsbimm(int rs, int imm, int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2093,7 +2177,7 @@ void emit_rsbimm(int rs, int imm, int rt) } // Load 2 immediates optimizing for small code size -void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) { emit_movimm(imm1,rt1); u_int armval; @@ -2109,7 +2193,7 @@ void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) // Conditionally select one of two immediates, optimizing for small code size // This will only be called if HAVE_CMOV_IMM is defined -void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) +static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) { u_int armval; if(genimm(imm2-imm1,&armval)) { @@ -2143,7 +2227,7 @@ void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } // special case for checking invalid_code -void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) { assert(imm<128&&imm>=0); assert(r>=0&&r<16); @@ -2152,7 +2236,7 @@ void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) emit_cmpimm(HOST_TEMPREG,imm); } -void emit_callne(int a) +static void emit_callne(int a) { assem_debug("blne %x\n",a); u_int offset=genjmp(a); @@ -2160,21 +2244,21 @@ void emit_callne(int a) } // Used to preload hash table entries -void emit_prefetchreg(int r) +static unused void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); } // Special case for mini_ht -void emit_ldreq_indexed(int rs, u_int offset, int rt) +static void emit_ldreq_indexed(int rs, u_int offset, int rt) { assert(offset<4096); assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); } -void emit_bicne_imm(int rs,int imm,int rt) +static unused void emit_bicne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2182,7 +2266,7 @@ void emit_bicne_imm(int rs,int imm,int rt) output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_biccs_imm(int rs,int imm,int rt) +static unused void emit_biccs_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2190,7 +2274,7 @@ void emit_biccs_imm(int rs,int imm,int rt) output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_bicvc_imm(int rs,int imm,int rt) +static unused void emit_bicvc_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2198,7 +2282,7 @@ void emit_bicvc_imm(int rs,int imm,int rt) output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_bichi_imm(int rs,int imm,int rt) +static unused void emit_bichi_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2206,7 +2290,7 @@ void emit_bichi_imm(int rs,int imm,int rt) output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); } -void emit_orrvs_imm(int rs,int imm,int rt) +static unused void emit_orrvs_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2214,7 +2298,7 @@ void emit_orrvs_imm(int rs,int imm,int rt) output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_orrne_imm(int rs,int imm,int rt) +static void emit_orrne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2222,7 +2306,7 @@ void emit_orrne_imm(int rs,int imm,int rt) output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_andne_imm(int rs,int imm,int rt) +static void emit_andne_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2230,7 +2314,7 @@ void emit_andne_imm(int rs,int imm,int rt) output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); } -void emit_addpl_imm(int rs,int imm,int rt) +static unused void emit_addpl_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); @@ -2238,7 +2322,7 @@ void emit_addpl_imm(int rs,int imm,int rt) output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); } -void emit_jno_unlikely(int a) +static void emit_jno_unlikely(int a) { //emit_jno(a); assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); @@ -2256,6 +2340,7 @@ static void save_regs_all(u_int reglist) assem_debug("}\n"); output_w32(0xe88b0000|reglist); } + static void restore_regs_all(u_int reglist) { int i; @@ -2267,12 +2352,14 @@ static void restore_regs_all(u_int reglist) assem_debug("}\n"); output_w32(0xe89b0000|reglist); } + // Save registers before function call static void save_regs(u_int reglist) { reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12 save_regs_all(reglist); } + // Restore registers after function call static void restore_regs(u_int reglist) { @@ -2282,7 +2369,7 @@ static void restore_regs(u_int reglist) /* Stubs/epilogue */ -void literal_pool(int n) +static void literal_pool(int n) { if(!literalcount) return; if(n) { @@ -2314,7 +2401,7 @@ void literal_pool(int n) literalcount=0; } -void literal_pool_jumpover(int n) +static void literal_pool_jumpover(int n) { if(!literalcount) return; if(n) { @@ -2326,10 +2413,12 @@ void literal_pool_jumpover(int n) set_jump_target(jaddr,(int)out); } -emit_extjump2(u_int addr, int target, int linker) +static void emit_extjump2(u_int addr, int target, int linker) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); + (void)ptr; + emit_loadlp(target,0); emit_loadlp(addr,1); assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<>12]; @@ -2542,7 +2632,7 @@ u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) } } -inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); int rth=get_reg(regmap,target|64); @@ -2624,7 +2714,7 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i restore_regs(reglist); } -do_writestub(int n) +static void do_writestub(int n) { assem_debug("do_writestub %x\n",start+stubs[n][3]*4); literal_pool(256); @@ -2708,7 +2798,7 @@ do_writestub(int n) emit_jmp(ra); } -inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,-1); int rth=get_reg(regmap,target|64); @@ -2745,7 +2835,7 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, restore_regs(reglist); } -do_unalignedwritestub(int n) +static void do_unalignedwritestub(int n) { assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); literal_pool(256); @@ -2834,12 +2924,7 @@ do_unalignedwritestub(int n) #endif } -void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a) -{ - printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]); -} - -do_invstub(int n) +static void do_invstub(int n) { literal_pool(20); u_int reglist=stubs[n][3]; @@ -2876,7 +2961,7 @@ int do_dirty_stub(int i) return entry; } -void do_dirty_stub_ds() +static void do_dirty_stub_ds() { // Careful about the code output here, verify_dirty needs to parse it. #ifndef HAVE_ARMV7 @@ -2894,7 +2979,7 @@ void do_dirty_stub_ds() emit_call((int)&verify_code_ds); } -do_cop1stub(int n) +static void do_cop1stub(int n) { literal_pool(256); assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4); @@ -2917,7 +3002,7 @@ do_cop1stub(int n) /* Special assem */ -void shift_assemble_arm(int i,struct regstat *i_regs) +static void shift_assemble_arm(int i,struct regstat *i_regs) { if(rt1[i]) { if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV @@ -3209,7 +3294,7 @@ static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) #define shift_assemble shift_assemble_arm -void loadlr_assemble_arm(int i,struct regstat *i_regs) +static void loadlr_assemble_arm(int i,struct regstat *i_regs) { int s,th,tl,temp,temp2,addr,map=-1; int offset; @@ -3338,7 +3423,7 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) } #define loadlr_assemble loadlr_assemble_arm -void cop0_assemble(int i,struct regstat *i_regs) +static void cop0_assemble(int i,struct regstat *i_regs) { if(opcode2[i]==0) // MFC0 { @@ -3525,7 +3610,7 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) } } -void cop2_assemble(int i,struct regstat *i_regs) +static void cop2_assemble(int i,struct regstat *i_regs) { u_int copr=(source[i]>>11)&0x1f; signed char temp=get_reg(i_regs->regmap,-1); @@ -3734,7 +3819,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) } } -void cop1_unusable(int i,struct regstat *i_regs) +static void cop1_unusable(int i,struct regstat *i_regs) { // XXX: should just just do the exception instead if(!cop1_usable) { @@ -3745,28 +3830,28 @@ void cop1_unusable(int i,struct regstat *i_regs) } } -void cop1_assemble(int i,struct regstat *i_regs) +static void cop1_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void fconv_assemble_arm(int i,struct regstat *i_regs) +static void fconv_assemble_arm(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } #define fconv_assemble fconv_assemble_arm -void fcomp_assemble(int i,struct regstat *i_regs) +static void fcomp_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void float_assemble(int i,struct regstat *i_regs) +static void float_assemble(int i,struct regstat *i_regs) { cop1_unusable(i, i_regs); } -void multdiv_assemble_arm(int i,struct regstat *i_regs) +static void multdiv_assemble_arm(int i,struct regstat *i_regs) { // case 0x18: MULT // case 0x19: MULTU @@ -3891,25 +3976,25 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) } #define multdiv_assemble multdiv_assemble_arm -void do_preload_rhash(int r) { +static void do_preload_rhash(int r) { // Don't need this for ARM. On x86, this puts the value 0xf8 into the // register. On ARM the hash can be done with a single instruction (below) } -void do_preload_rhtbl(int ht) { +static void do_preload_rhtbl(int ht) { emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); } -void do_rhash(int rs,int rh) { +static void do_rhash(int rs,int rh) { emit_andimm(rs,0xf8,rh); } -void do_miniht_load(int ht,int rh) { +static void do_miniht_load(int ht,int rh) { assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); } -void do_miniht_jump(int rs,int rh,int ht) { +static void do_miniht_jump(int rs,int rh,int ht) { emit_cmp(rh,rs); emit_ldreq_indexed(ht,4,15); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -3920,7 +4005,7 @@ void do_miniht_jump(int rs,int rh,int ht) { #endif } -void do_miniht_insert(u_int return_address,int rt,int temp) { +static void do_miniht_insert(u_int return_address,int rt,int temp) { #ifndef HAVE_ARMV7 emit_movimm(return_address,rt); // PC into link register add_to_linker((int)out,return_address,1); @@ -3937,7 +4022,7 @@ void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; int hr,reg,new_hr; @@ -3966,7 +4051,7 @@ void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty, /* using strd could possibly help but you'd have to allocate registers in pairs -void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) +static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) { int hr; int wrote=-1; @@ -4023,7 +4108,7 @@ void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint // Clearing the cache is rather slow on ARM Linux, so mark the areas // that need to be cleared, and then only clear these areas once. -void do_clear_cache() +static void do_clear_cache() { int i,j; for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 21e7fed..6501d26 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -83,22 +83,29 @@ struct ll_entry struct ll_entry *next; }; - u_int start; - u_int *source; - char insn[MAXBLOCK][10]; - u_char itype[MAXBLOCK]; - u_char opcode[MAXBLOCK]; - u_char opcode2[MAXBLOCK]; - u_char bt[MAXBLOCK]; - u_char rs1[MAXBLOCK]; - u_char rs2[MAXBLOCK]; - u_char rt1[MAXBLOCK]; - u_char rt2[MAXBLOCK]; - u_char us1[MAXBLOCK]; - u_char us2[MAXBLOCK]; - u_char dep1[MAXBLOCK]; - u_char dep2[MAXBLOCK]; - u_char lt1[MAXBLOCK]; + // used by asm: + u_char *out; + u_int hash_table[65536][4] __attribute__((aligned(16))); + struct ll_entry *jump_in[4096] __attribute__((aligned(16))); + struct ll_entry *jump_dirty[4096]; + + static struct ll_entry *jump_out[4096]; + static u_int start; + static u_int *source; + static char insn[MAXBLOCK][10]; + static u_char itype[MAXBLOCK]; + static u_char opcode[MAXBLOCK]; + static u_char opcode2[MAXBLOCK]; + static u_char bt[MAXBLOCK]; + static u_char rs1[MAXBLOCK]; + static u_char rs2[MAXBLOCK]; + static u_char rt1[MAXBLOCK]; + static u_char rt2[MAXBLOCK]; + static u_char us1[MAXBLOCK]; + static u_char us2[MAXBLOCK]; + static u_char dep1[MAXBLOCK]; + static u_char dep2[MAXBLOCK]; + static u_char lt1[MAXBLOCK]; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; @@ -107,52 +114,47 @@ struct ll_entry static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes static u_int smrv_weak_next; - int imm[MAXBLOCK]; - u_int ba[MAXBLOCK]; - char likely[MAXBLOCK]; - char is_ds[MAXBLOCK]; - char ooo[MAXBLOCK]; - uint64_t unneeded_reg[MAXBLOCK]; - uint64_t unneeded_reg_upper[MAXBLOCK]; - uint64_t branch_unneeded_reg[MAXBLOCK]; - uint64_t branch_unneeded_reg_upper[MAXBLOCK]; - uint64_t pr32[MAXBLOCK]; - signed char regmap_pre[MAXBLOCK][HOST_REGS]; + static int imm[MAXBLOCK]; + static u_int ba[MAXBLOCK]; + static char likely[MAXBLOCK]; + static char is_ds[MAXBLOCK]; + static char ooo[MAXBLOCK]; + static uint64_t unneeded_reg[MAXBLOCK]; + static uint64_t unneeded_reg_upper[MAXBLOCK]; + static uint64_t branch_unneeded_reg[MAXBLOCK]; + static uint64_t branch_unneeded_reg_upper[MAXBLOCK]; + static signed char regmap_pre[MAXBLOCK][HOST_REGS]; static uint64_t current_constmap[HOST_REGS]; static uint64_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; - signed char minimum_free_regs[MAXBLOCK]; - u_int needed_reg[MAXBLOCK]; - u_int wont_dirty[MAXBLOCK]; - u_int will_dirty[MAXBLOCK]; - int ccadj[MAXBLOCK]; - int slen; - u_int instr_addr[MAXBLOCK]; - u_int link_addr[MAXBLOCK][3]; - int linkcount; - u_int stubs[MAXBLOCK*3][8]; - int stubcount; - u_int literals[1024][2]; - int literalcount; - int is_delayslot; - int cop1_usable; - u_char *out; - struct ll_entry *jump_in[4096] __attribute__((aligned(16))); - struct ll_entry *jump_out[4096]; - struct ll_entry *jump_dirty[4096]; - u_int hash_table[65536][4] __attribute__((aligned(16))); - char shadow[1048576] __attribute__((aligned(16))); - void *copy; - int expirep; - int new_dynarec_did_compile; - int new_dynarec_hacks; - u_int stop_after_jal; + static signed char minimum_free_regs[MAXBLOCK]; + static u_int needed_reg[MAXBLOCK]; + static u_int wont_dirty[MAXBLOCK]; + static u_int will_dirty[MAXBLOCK]; + static int ccadj[MAXBLOCK]; + static int slen; + static u_int instr_addr[MAXBLOCK]; + static u_int link_addr[MAXBLOCK][3]; + static int linkcount; + static u_int stubs[MAXBLOCK*3][8]; + static int stubcount; + static u_int literals[1024][2]; + static int literalcount; + static int is_delayslot; + static int cop1_usable; + static char shadow[1048576] __attribute__((aligned(16))); + static void *copy; + static int expirep; + static u_int stop_after_jal; #ifndef RAM_FIXED static u_int ram_offset; #else static const u_int ram_offset=0; #endif + + int new_dynarec_hacks; + int new_dynarec_did_compile; extern u_char restore_candidate[512]; extern int cycle_count; @@ -254,15 +256,20 @@ void jump_intcall(); void new_dyna_leave(); // Needed by assembler -void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); -void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); -void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); -void load_all_regs(signed char i_regmap[]); -void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); -void load_regs_entry(int t); -void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); +static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32); +static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty); +static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr); +static void load_all_regs(signed char i_regmap[]); +static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]); +static void load_regs_entry(int t); +static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i); + +static int verify_dirty(u_int *ptr); +static int get_final_value(int hr, int i, int *value); +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e); +static void add_to_linker(int addr,int target,int ext); -int tracedebug=0; +static int tracedebug=0; //#define DEBUG_CYCLE_COUNT 1 @@ -1747,7 +1754,7 @@ static void pagespan_alloc(struct regstat *current,int i) //else ... } -add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) +static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e) { stubs[stubcount][0]=type; stubs[stubcount][1]=addr; @@ -3703,7 +3710,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) } } -int get_final_value(int hr, int i, int *value) +static int get_final_value(int hr, int i, int *value) { int reg=regs[i].regmap[hr]; while(iregmap; - int addr=get_reg(i_regmap,AGEN1+(i&1)); - int rth,rt; - int ds; + int rt; if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { - rth=get_reg(i_regmap,FTEMP|64); rt=get_reg(i_regmap,FTEMP); }else{ - rth=get_reg(i_regmap,rt1[i]|64); rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); @@ -2635,7 +2631,6 @@ static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_h static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,target); - int rth=get_reg(regmap,target|64); int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); @@ -2725,14 +2720,10 @@ static void do_writestub(int n) struct regstat *i_regs=(struct regstat *)stubs[n][5]; u_int reglist=stubs[n][7]; signed char *i_regmap=i_regs->regmap; - int addr=get_reg(i_regmap,AGEN1+(i&1)); - int rth,rt,r; - int ds; + int rt,r; if(itype[i]==C1LS||itype[i]==C2LS) { - rth=get_reg(i_regmap,FTEMP|64); rt=get_reg(i_regmap,r=FTEMP); }else{ - rth=get_reg(i_regmap,rs2[i]|64); rt=get_reg(i_regmap,r=rs2[i]); } assert(rs>=0); @@ -2801,7 +2792,6 @@ static void do_writestub(int n) static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { int rs=get_reg(regmap,-1); - int rth=get_reg(regmap,target|64); int rt=get_reg(regmap,target); assert(rs>=0); assert(rt>=0); @@ -2848,7 +2838,6 @@ static void do_unalignedwritestub(int n) signed char *i_regmap=i_regs->regmap; int temp2=get_reg(i_regmap,FTEMP); int rt; - int ds, real_rs; rt=get_reg(i_regmap,rs2[i]); assert(rt>=0); assert(addr>=0); @@ -3232,7 +3221,7 @@ static int get_ptr_mem_type(u_int a) static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) { - int jaddr,type=0; + int jaddr=0,type=0; int mr=rs1[i]; if(((smrv_strong|smrv_weak)>>mr)&1) { type=get_ptr_mem_type(smrv[mr]); @@ -3700,7 +3689,6 @@ static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) static void c2op_assemble(int i,struct regstat *i_regs) { - signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; u_int hr,reglist_full=0,reglist; int need_flags,need_ir; @@ -4025,7 +4013,7 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) { //if(dirty_pre==dirty) return; - int hr,reg,new_hr; + int hr,reg; for(hr=0;hrvaddr==vaddr) { //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; ht_bin[3]=ht_bin[1]; ht_bin[2]=ht_bin[0]; - ht_bin[1]=(int)head->addr; + ht_bin[1]=(u_int)head->addr; ht_bin[0]=vaddr; return head->addr; } @@ -334,9 +334,9 @@ void *get_addr(u_int vaddr) restore_candidate[vpage>>3]|=1<<(vpage&7); } else restore_candidate[page>>3]|=1<<(page&7); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) { - ht_bin[1]=(int)head->addr; // Replace existing entry + ht_bin[1]=(u_int)head->addr; // Replace existing entry } else { @@ -366,7 +366,7 @@ void *get_addr(u_int vaddr) void *get_addr_ht(u_int vaddr) { //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr); - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) return (void *)ht_bin[1]; if(ht_bin[2]==vaddr) return (void *)ht_bin[3]; return get_addr(vaddr); @@ -634,6 +634,7 @@ int needed_again(int r, int i) } }*/ if(rn<10) return 1; + (void)b; return 0; } @@ -781,7 +782,7 @@ void *check_addr(u_int vaddr) void remove_hash(int vaddr) { //printf("remove hash: %x\n",vaddr); - int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF]; if(ht_bin[2]==vaddr) { ht_bin[2]=ht_bin[3]=-1; } @@ -817,7 +818,7 @@ void ll_clear(struct ll_entry **head) { struct ll_entry *cur; struct ll_entry *next; - if(cur=*head) { + if((cur=*head)) { *head=0; while(cur) { next=cur->next; @@ -991,7 +992,7 @@ void invalidate_addr(u_int addr) // Anything could have changed, so invalidate everything. void invalidate_all_pages() { - u_int page,n; + u_int page; for(page=0;page<4096;page++) invalidate_page(page); for(page=0;page<1048576;page++) @@ -1014,6 +1015,7 @@ void add_link(u_int vaddr,void *src) inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page); int *ptr=(int *)(src+4); assert((*ptr&0x0fff0000)==0x059f0000); + (void)ptr; ll_add(jump_out+page,vaddr,src); //int ptr=get_pointer(src); //inv_debug("add_link: Pointer is to %x\n",(int)ptr); @@ -1033,7 +1035,7 @@ void clean_blocks(u_int page) // Don't restore blocks which are about to expire from the cache if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { u_int start,end; - if(verify_dirty((int)head->addr)) { + if(verify_dirty(head->addr)) { //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); u_int i; u_int inv=0; @@ -1054,12 +1056,12 @@ void clean_blocks(u_int page) //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); - int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; if(ht_bin[0]==head->vaddr) { - ht_bin[1]=(int)clean_addr; // Replace existing entry + ht_bin[1]=(u_int)clean_addr; // Replace existing entry } if(ht_bin[2]==head->vaddr) { - ht_bin[3]=(int)clean_addr; // Replace existing entry + ht_bin[3]=(u_int)clean_addr; // Replace existing entry } } } @@ -2333,23 +2335,25 @@ void imm16_assemble(int i,struct regstat *i_regs) emit_mov(sh,th); } } - if(opcode[i]==0x0d) //ORI - if(sl<0) { - emit_orimm(tl,imm[i],tl); - }else{ - if(!((i_regs->wasconst>>sl)&1)) - emit_orimm(sl,imm[i],tl); - else - emit_movimm(constmap[i][sl]|imm[i],tl); + if(opcode[i]==0x0d) { // ORI + if(sl<0) { + emit_orimm(tl,imm[i],tl); + }else{ + if(!((i_regs->wasconst>>sl)&1)) + emit_orimm(sl,imm[i],tl); + else + emit_movimm(constmap[i][sl]|imm[i],tl); + } } - if(opcode[i]==0x0e) //XORI - if(sl<0) { - emit_xorimm(tl,imm[i],tl); - }else{ - if(!((i_regs->wasconst>>sl)&1)) - emit_xorimm(sl,imm[i],tl); - else - emit_movimm(constmap[i][sl]^imm[i],tl); + if(opcode[i]==0x0e) { // XORI + if(sl<0) { + emit_xorimm(tl,imm[i],tl); + }else{ + if(!((i_regs->wasconst>>sl)&1)) + emit_xorimm(sl,imm[i],tl); + else + emit_movimm(constmap[i][sl]^imm[i],tl); + } } } else { @@ -2533,7 +2537,7 @@ void load_assemble(int i,struct regstat *i_regs) //printf("load_assemble: c=%d\n",c); //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset); // FIXME: Even if the load is a NOP, we should check for pagefaults... - if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) + if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)) ||rt1[i]==0) { // could be FIFO, must perform the read // ||dummy read @@ -2790,7 +2794,7 @@ void load_assemble(int i,struct regstat *i_regs) emit_call((int)memdebug); //emit_popa(); restore_regs(0x100f); - }/**/ + }*/ } #ifndef loadlr_assemble @@ -2806,7 +2810,7 @@ void store_assemble(int i,struct regstat *i_regs) int s,th,tl,map=-1; int addr,temp; int offset; - int jaddr=0,jaddr2,type; + int jaddr=0,type; int memtarget=0,c=0; int agr=AGEN1+(i&1); int faststore_reg_override=0; @@ -2925,7 +2929,7 @@ void store_assemble(int i,struct regstat *i_regs) #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) emit_callne(invalidate_addr_reg[addr]); #else - jaddr2=(int)out; + int jaddr2=(int)out; emit_jne(0); add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4,EAX); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... emit_jmp((int)jump_syscall_hle); // XXX @@ -3381,6 +3386,7 @@ void hlecall_assemble(int i,struct regstat *i_regs) signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4+4,0); // Get PC emit_movimm((int)psxHLEt[source[i]&7],1); emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX @@ -3392,6 +3398,7 @@ void intcall_assemble(int i,struct regstat *i_regs) signed char ccreg=get_reg(i_regs->regmap,CCREG); assert(ccreg==HOST_CCREG); assert(!is_delayslot); + (void)ccreg; emit_movimm(start+i*4,0); // Get PC emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); emit_jmp((int)jump_intcall); @@ -4636,7 +4643,6 @@ static void ujump_assemble_write_ra(int i) void ujump_assemble(int i,struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; int ra_done=0; if(i==(ba[i]-start)>>2) assem_debug("idle loop\n"); address_generation(i+1,i_regs,regs[i].regmap_entry); @@ -4644,6 +4650,7 @@ void ujump_assemble(int i,struct regstat *i_regs) int temp=get_reg(branch_regs[i].regmap,PTEMP); if(rt1[i]==31&&temp>=0) { + signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); @@ -4709,9 +4716,8 @@ static void rjump_assemble_write_ra(int i) void rjump_assemble(int i,struct regstat *i_regs) { - signed char *i_regmap=i_regs->regmap; int temp; - int rs,cc,adj; + int rs,cc; int ra_done=0; rs=get_reg(branch_regs[i].regmap,rs1[i]); assert(rs>=0); @@ -4728,6 +4734,7 @@ void rjump_assemble(int i,struct regstat *i_regs) if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { + signed char *i_regmap=i_regs->regmap; int return_address=start+i*4+8; if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -4756,6 +4763,7 @@ void rjump_assemble(int i,struct regstat *i_regs) rjump_assemble_write_ra(i); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); + (void)cc; #ifdef USE_MINI_HT int rh=get_reg(branch_regs[i].regmap,RHASH); int ht=get_reg(branch_regs[i].regmap,RHTBL); @@ -5761,7 +5769,6 @@ static void pagespan_assemble(int i,struct regstat *i_regs) int s1h=get_reg(i_regs->regmap,rs1[i]|64); int s2l=get_reg(i_regs->regmap,rs2[i]); int s2h=get_reg(i_regs->regmap,rs2[i]|64); - void *nt_branch=NULL; int taken=0; int nottaken=0; int unconditional=0; @@ -5778,7 +5785,7 @@ static void pagespan_assemble(int i,struct regstat *i_regs) s1h=s2h=-1; } int hr=0; - int addr,alt,ntaddr; + int addr=-1,alt=-1,ntaddr=-1; if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;} else { while(hr>r)&1));*/} + }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/} } } } @@ -6783,7 +6790,7 @@ void clean_registers(int istart,int iend,int wr) if(r!=EXCLUDE_REG) { if(regs[i].regmap[r]==regmap_pre[i+1][r]) { regs[i+1].wasdirty&=wont_dirty_i|~(1<>r)&1));*/} + }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/} } } } @@ -6826,7 +6833,7 @@ void clean_registers(int istart,int iend,int wr) wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<>r)&1));*/ + /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/ } } } @@ -6988,7 +6995,6 @@ void new_dynarec_init() if (mprotect(out, 1<=0) { if(r!=regmap_pre[i][hr]) { @@ -10151,7 +10159,7 @@ int new_recompile_block(int addr) // replace it with the new address. // Don't add new entries. We'll insert the // ones that actually get used in check_addr(). - int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; if(ht_bin[0]==vaddr) { ht_bin[1]=entry_point; } @@ -10219,7 +10227,7 @@ int new_recompile_block(int addr) case 2: // Clear hash table for(i=0;i<32;i++) { - int *ht_bin=hash_table[((expirep&2047)<<5)+i]; + u_int *ht_bin=hash_table[((expirep&2047)<<5)+i]; if((ht_bin[3]>>shift)==(base>>shift) || ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) { inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]); -- cgit v1.2.3 From 1fedf1ea555e4a6be68dd0ba384909ac21da65d0 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 19 Sep 2016 00:00:48 +0300 Subject: drc: remove unnecessary cache flushing should already be flushed or not matter at this point --- libpcsxcore/new_dynarec/new_dynarec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ca12ea9..bfe3961 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1000,9 +1000,6 @@ void invalidate_all_pages() restore_candidate[(page&2047)>>3]|=1<<(page&7); restore_candidate[((page&2047)>>3)+256]|=1<<(page&7); } - #ifdef __arm__ - __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1< #include #include +#ifdef __MACH__ +#include +#include +#endif #include "../libpcsxcore/misc.h" #include "../libpcsxcore/psxcounters.h" @@ -1202,6 +1206,11 @@ void retro_init(void) int i, ret; bool found_bios = false; +#ifdef __MACH__ + // magic sauce to make the dynarec work on iOS + syscall(SYS_ptrace, 0 /*PTRACE_TRACEME*/, 0, 0, 0); +#endif + ret = emu_core_preinit(); ret |= emu_core_init(); if (ret != 0) { diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 9a5e3a9..6c3826c 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -114,7 +114,7 @@ const u_int invalidate_addr_reg[16] = { 0, 0}; -static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; +static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; /* Linker */ @@ -193,15 +193,15 @@ static void add_literal(int addr,int val) literalcount++; } -static void *kill_pointer(void *stub) +// from a pointer to external jump stub (which was produced by emit_extjump2) +// find where the jumping insn is +static void *find_extjump_insn(void *stub) { int *ptr=(int *)(stub+4); - assert((*ptr&0x0ff00000)==0x05900000); + assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs] u_int offset=*ptr&0xfff; - int **l_ptr=(void *)ptr+offset+8; - int *i_ptr=*l_ptr; - set_jump_target((int)i_ptr,(int)stub); - return i_ptr; + void **l_ptr=(void *)ptr+offset+8; + return *l_ptr; } // find where external branch is liked to using addr of it's stub: @@ -211,11 +211,7 @@ static void *kill_pointer(void *stub) static int get_pointer(void *stub) { //printf("get_pointer(%x)\n",(int)stub); - int *ptr=(int *)(stub+4); - assert((*ptr&0x0fff0000)==0x059f0000); - u_int offset=*ptr&0xfff; - int **l_ptr=(void *)ptr+offset+8; - int *i_ptr=*l_ptr; + int *i_ptr=find_extjump_insn(stub); assert((*i_ptr&0x0f000000)==0x0a000000); return (int)i_ptr+((*i_ptr<<8)>>6)+8; } @@ -4094,6 +4090,17 @@ static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dir #define wb_invalidate wb_invalidate_arm */ +static void mark_clear_cache(void *target) +{ + u_long offset = (char *)target - (char *)BASE_ADDR; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((u_long)target & ~4095ul); + start_tcache_write(start, start + 4096); + needs_clear_cache[offset >> 17] |= mask; + } +} + // Clearing the cache is rather slow on ARM Linux, so mark the areas // that need to be cleared, and then only clear these areas once. static void do_clear_cache() @@ -4115,7 +4122,7 @@ static void do_clear_cache() end+=4096; j++; }else{ - __clear_cache((void *)start,(void *)end); + end_tcache_write((void *)start,(void *)end); break; } } diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index 2d10ac7..acf65bd 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -5,21 +5,9 @@ #define HOST_IMM8 1 #define HAVE_CMOV_IMM 1 -#define CORTEX_A8_BRANCH_PREDICTION_HACK 1 -#define USE_MINI_HT 1 -//#define REG_PREFETCH 1 #define HAVE_CONDITIONAL_CALL 1 #define RAM_SIZE 0x200000 -#ifndef __ARM_ARCH_7A__ -//#undef CORTEX_A8_BRANCH_PREDICTION_HACK -//#undef USE_MINI_HT -#endif - -#ifndef BASE_ADDR_FIXED -#define BASE_ADDR_FIXED 0 -#endif - #define REG_SHIFT 2 /* ARM calling convention: diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 95af8b4..147b0df 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -20,6 +20,7 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include "arm_features.h" +#include "new_dynarec_config.h" #include "linkage_offsets.h" @@ -160,9 +161,10 @@ ptr_hash_table: #endif .endm +/* r0 = virtual target address */ +/* r1 = instruction to patch */ .macro dyna_linker_main - /* r0 = virtual target address */ - /* r1 = instruction to patch */ +#ifndef NO_WRITE_EXEC load_varadr_ext r3, jump_in /* get_page */ lsr r2, r0, #12 @@ -242,6 +244,11 @@ ptr_hash_table: str r3, [r6, #12] mov pc, r1 8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif .endm diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index bfe3961..ec2a6fa 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -23,7 +23,11 @@ #include #include #include +#ifdef __MACH__ +#include +#endif +#include "new_dynarec_config.h" #include "emu_if.h" //emulator interface //#define DISASM @@ -42,19 +46,6 @@ #include "assem_arm.h" #endif -#ifdef __BLACKBERRY_QNX__ -#undef __clear_cache -#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); -#elif defined(__MACH__) -#include -#define __clear_cache mach_clear_cache -static void __clear_cache(void *start, void *end) { - size_t len = (char *)end - (char *)start; - sys_dcache_flush(start, len); - sys_icache_invalidate(start, len); -} -#endif - #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -271,6 +262,56 @@ static void add_to_linker(int addr,int target,int ext); static int tracedebug=0; +static void mprotect_w_x(void *start, void *end, int is_x) +{ +#ifdef NO_WRITE_EXEC + u_long mstart = (u_long)start & ~4095ul; + u_long mend = (u_long)end; + if (mprotect((void *)mstart, mend - mstart, + PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0) + SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno)); +#endif +} + +static void start_tcache_write(void *start, void *end) +{ + mprotect_w_x(start, end, 0); +} + +static void end_tcache_write(void *start, void *end) +{ +#ifdef __arm__ + size_t len = (char *)end - (char *)start; + #if defined(__BLACKBERRY_QNX__) + msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); + #elif defined(__MACH__) + sys_cache_control(kCacheFunctionPrepareForExecution, start, len); + #elif defined(VITA) + int block = sceKernelFindMemBlockByAddr(start, len); + sceKernelSyncVMDomain(block, start, len); + #else + __clear_cache(start, end); + #endif + (void)len; +#endif + + mprotect_w_x(start, end, 1); +} + +static void *start_block(void) +{ + u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; + if (end > (u_char *)BASE_ADDR + (1<addr); @@ -838,10 +879,11 @@ void ll_kill_pointers(struct ll_entry *head,int addr,int shift) (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))) { inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr); - u_int host_addr=(u_int)kill_pointer(head->addr); + void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ - needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + mark_clear_cache(host_addr); #endif + set_jump_target((int)host_addr,(int)head->addr); } head=head->next; } @@ -865,10 +907,11 @@ void invalidate_page(u_int page) jump_out[page]=0; while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr); - u_int host_addr=(u_int)kill_pointer(head->addr); + void *host_addr=find_extjump_insn(head->addr); #ifdef __arm__ - needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31); + mark_clear_cache(host_addr); #endif + set_jump_target((int)host_addr,(int)head->addr); next=head->next; free(head); head=next; @@ -6936,13 +6979,14 @@ static void disassemble_inst(int i) {} static int new_dynarec_test(void) { int (*testfunc)(void) = (void *)out; + void *beginning; int ret; + + beginning = start_block(); emit_movimm(DRC_TEST_VAL,0); // test emit_jmpreg(14); literal_pool(0); -#ifdef __arm__ - __clear_cache((void *)testfunc, out); -#endif + end_block(beginning); SysPrintf("testing if we can run recompiled code..\n"); ret = testfunc(); if (ret == DRC_TEST_VAL) @@ -6987,7 +7031,7 @@ void new_dynarec_init() -1, 0) <= 0) { SysPrintf("mmap() failed: %s\n", strerror(errno)); } -#else +#elif !defined(NO_WRITE_EXEC) // not all systems allow execute in data segment by default if (mprotect(out, 1<>12]=0; emit_movimm(start,0); emit_writeword(0,(int)&pcaddr); emit_jmp((int)new_dyna_leave); literal_pool(0); -#ifdef __arm__ - __clear_cache((void *)beginning,out); -#endif + end_block(beginning); ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning); return 0; } @@ -9883,7 +9926,7 @@ int new_recompile_block(int addr) cop1_usable=0; uint64_t is32_pre=0; u_int dirty_pre=0; - u_int beginning=(u_int)out; + void *beginning=start_block(); if((u_int)addr&1) { ds=1; pagespan_ds(); @@ -10173,14 +10216,12 @@ int new_recompile_block(int addr) // Align code if(((u_int)out)&7) emit_addnop(13); #endif - assert((u_int)out-beginning>3)+256]|=1<<(page&7); } #ifdef __arm__ - #if defined(VITA) - sceKernelCloseVMDomain(); - #endif __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<Country_code&0xFF) + switch (ROM_HEADER->Country_code&0xFF) { case 0x45: // U addr=0x34b30; - break; - case 0x4A: // J - addr=0x34b70; - break; - case 0x50: // E + break; + case 0x4A: // J + addr=0x34b70; + break; + case 0x50: // E addr=0x329f0; - break; - default: + break; + default: // Unknown country code addr=0; break; @@ -533,7 +526,7 @@ static void flush_dirty_uppers(struct regstat *cur) for (hr=0;hrdirty>>hr)&1) { reg=cur->regmap[hr]; - if(reg>=64) + if(reg>=64) if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1; } } @@ -690,7 +683,7 @@ int needed_again(int r, int i) int j; int b=-1; int rn=10; - + if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) { if(ba[i-1]start+slen*4-4) @@ -793,7 +786,7 @@ int loop_reg(int i, int r, int hr) void alloc_all(struct regstat *cur,int i) { int hr; - + for(hr=0;hrregmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&& @@ -834,7 +827,7 @@ void mult64(uint64_t m1,uint64_t m2) unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; int sign = 0; - + if (m1 < 0) { op2 = -m1; @@ -847,22 +840,22 @@ void mult64(uint64_t m1,uint64_t m2) sign = 1 - sign; } else op4 = m2; - + op1 = op2 & 0xFFFFFFFF; op2 = (op2 >> 32) & 0xFFFFFFFF; op3 = op4 & 0xFFFFFFFF; op4 = (op4 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); if (sign) @@ -878,25 +871,25 @@ void multu64(uint64_t m1,uint64_t m2) unsigned long long int op1, op2, op3, op4; unsigned long long int result1, result2, result3, result4; unsigned long long int temp1, temp2, temp3, temp4; - + op1 = m1 & 0xFFFFFFFF; op2 = (m1 >> 32) & 0xFFFFFFFF; op3 = m2 & 0xFFFFFFFF; op4 = (m2 >> 32) & 0xFFFFFFFF; - + temp1 = op1 * op3; temp2 = (temp1 >> 32) + op1 * op4; temp3 = op2 * op3; temp4 = (temp3 >> 32) + op2 * op4; - + result1 = temp1 & 0xFFFFFFFF; result2 = temp2 + (temp3 & 0xFFFFFFFF); result3 = (result2 >> 32) + temp4; result4 = (result3 >> 32); - + lo = result1 | (result2 << 32); hi = (result3 & 0xFFFFFFFF) | (result4 << 32); - + //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32) // ,(int)reg[LOREG],(int)(reg[LOREG]>>32)); } @@ -1018,7 +1011,7 @@ void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift) { struct ll_entry *next; while(*head) { - if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || + if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)) { inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr); @@ -1114,7 +1107,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) #ifdef __arm__ do_clear_cache(); #endif - + // Don't trap writes invalid_code[block]=1; #ifndef DISABLE_TLB @@ -3393,7 +3386,7 @@ void storelr_assemble(int i,struct regstat *i_regs) if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); gen_tlb_addr_w(temp,map); #else - if((u_int)rdram!=0x80000000) + if((u_int)rdram!=0x80000000) emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp); #endif }else{ // using tlb @@ -4166,7 +4159,7 @@ void address_generation(int i,struct regstat *i_regs,signed char entry[]) int mgr=MGEN1+(i&1); if(itype[i]==LOAD) { ra=get_reg(i_regs->regmap,rt1[i]); - if(ra<0) ra=get_reg(i_regs->regmap,-1); + if(ra<0) ra=get_reg(i_regs->regmap,-1); assert(ra>=0); } if(itype[i]==LOADLR) { @@ -4764,7 +4757,7 @@ int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr) { return 0; } - else + else if((i_dirty>>hr)&1) { if(i_regmap[hr]>s1l)&(branch_regs[i].is32>>rs1[i])&1) emit_loadreg(rs1[i],s1l); - } + } else { if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1) emit_loadreg(rs2[i],s1l); @@ -5201,7 +5194,7 @@ void do_ccstub(int n) load_all_regs(branch_regs[i].regmap); } emit_jmp(stubs[n][2]); // return address - + /* This works but uses a lot of memory... emit_readword((int)&last_count,ECX); emit_add(HOST_CCREG,ECX,EAX); @@ -5235,7 +5228,7 @@ add_to_linker(int addr,int target,int ext) { link_addr[linkcount][0]=addr; link_addr[linkcount][1]=target; - link_addr[linkcount][2]=ext; + link_addr[linkcount][2]=ext; linkcount++; } @@ -5261,7 +5254,7 @@ static void ujump_assemble_write_ra(int i) #endif { #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5282,10 +5275,10 @@ void ujump_assemble(int i,struct regstat *i_regs) address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH int temp=get_reg(branch_regs[i].regmap,PTEMP); - if(rt1[i]==31&&temp>=0) + if(rt1[i]==31&&temp>=0) { int return_address=start+i*4+8; - if(get_reg(branch_regs[i].regmap,31)>0) + if(get_reg(branch_regs[i].regmap,31)>0) if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } #endif @@ -5336,7 +5329,7 @@ static void rjump_assemble_write_ra(int i) assert(rt>=0); return_address=start+i*4+8; #ifdef REG_PREFETCH - if(temp>=0) + if(temp>=0) { if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp); } @@ -5365,7 +5358,7 @@ void rjump_assemble(int i,struct regstat *i_regs) } address_generation(i+1,i_regs,regs[i].regmap_entry); #ifdef REG_PREFETCH - if(rt1[i]==31) + if(rt1[i]==31) { if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) { int return_address=start+i*4+8; @@ -5499,7 +5492,7 @@ void cjump_assemble(int i,struct regstat *i_regs) #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(i>(ba[i]-start)>>2) invert=1; #endif - + if(ooo[i]) { s1l=get_reg(branch_regs[i].regmap,rs1[i]); s1h=get_reg(branch_regs[i].regmap,rs1[i]|64); @@ -5554,7 +5547,7 @@ void cjump_assemble(int i,struct regstat *i_regs) load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG); cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); //assem_debug("cycle count (adj)\n"); @@ -5626,7 +5619,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(0); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if(opcode[i]==4) // BEQ @@ -5756,7 +5749,7 @@ void cjump_assemble(int i,struct regstat *i_regs) emit_jne(1); } } // if(!only32) - + //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(s1l>=0); if((opcode[i]&0x2f)==4) // BEQ @@ -5939,7 +5932,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } cc=get_reg(branch_regs[i].regmap,CCREG); assert(cc==HOST_CCREG); - if(unconditional) + if(unconditional) store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]); //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional); assem_debug("cycle count (adj)\n"); @@ -6026,7 +6019,7 @@ void sjump_assemble(int i,struct regstat *i_regs) } } } // if(!only32) - + if(invert) { #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) { @@ -6274,7 +6267,7 @@ void fjump_assemble(int i,struct regstat *i_regs) { } } // if(!only32) - + if(invert) { if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc); #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK @@ -6782,14 +6775,14 @@ void unneeded_registers(int istart,int iend,int r) { // If subroutine call, flag return address as a possible branch target if(rt1[i]==31 && i=(start+slen*4)) { // Branch out of this block, flush all regs u=1; uu=1; gte_u=gte_u_unknown; - /* Hexagon hack + /* Hexagon hack if(itype[i]==UJUMP&&rt1[i]==31) { uu=u=0x300C00F; // Discard at, v0-v1, t6-t9 @@ -7052,7 +7045,7 @@ static void provisional_32bit() int i,j; uint64_t is32=1; uint64_t lastbranch=1; - + for(i=0;i0) { @@ -7089,13 +7082,13 @@ static void provisional_32bit() uint64_t temp_is32=is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) //temp_is32&=branch_regs[j].is32; temp_is32&=p32[j]; } for(j=i;j=0;i--) { int hr; @@ -7380,7 +7373,7 @@ static void provisional_r32() } //requires_32bit[i]=r32; pr32[i]=r32; - + // Dirty registers which are 32-bit, require 32-bit input // as they will be written as 32-bit values for(hr=0;hristart) { - if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) + if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP) { // Don't store a register immediately after writing it, // may prevent dual-issue. @@ -7981,28 +7974,17 @@ void new_dynarec_init() { SysPrintf("Init new dynarec\n"); out=(u_char *)BASE_ADDR; -#if defined(VITA) - +#if BASE_ADDR_FIXED if (mmap (out, 1< %x\n", (int)addr, (int)out); //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out); //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr); - //if(debug) + //if(debug) //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum()); //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29); /*if(Count>=312978186) { @@ -8264,7 +8246,7 @@ int new_recompile_block(int addr) unsigned int type,op,op2; //printf("addr = %x source = %x %x\n", addr,source,source[0]); - + /* Pass 1 disassembly */ for(i=0;!done;i++) { @@ -8924,7 +8906,7 @@ int new_recompile_block(int addr) /* Pass 2 - Register dependencies and branch targets */ unneeded_registers(0,slen-1,0); - + /* Pass 3 - Register allocation */ struct regstat current; // Current register allocations/status @@ -8954,7 +8936,7 @@ int new_recompile_block(int addr) unneeded_reg_upper[0]=1; current.regmap[HOST_BTREG]=BTREG; } - + for(i=0;i=0;j--) { - if(ba[j]==start+i*4) + if(ba[j]==start+i*4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0;j--) { - if(ba[j]==start+i*4+4) + if(ba[j]==start+i*4+4) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9077,12 +9059,12 @@ int new_recompile_block(int addr) uint64_t temp_is32=current.is32; for(j=i-1;j>=0;j--) { - if(ba[j]==start+i*4+8) + if(ba[j]==start+i*4+8) temp_is32&=branch_regs[j].is32; } for(j=i;j=0) + if(get_reg(current.regmap,r|64)>=0) current.regmap[get_reg(current.regmap,r|64)]=-1; } } @@ -9190,7 +9172,7 @@ int new_recompile_block(int addr) } } else { // First instruction expects CCREG to be allocated - if(i==0&&hr==HOST_CCREG) + if(i==0&&hr==HOST_CCREG) regs[i].regmap_entry[hr]=CCREG; else regs[i].regmap_entry[hr]=-1; @@ -9525,7 +9507,7 @@ int new_recompile_block(int addr) pagespan_alloc(¤t,i); break; } - + // Drop the upper half of registers that have become 32-bit current.uu|=current.is32&((1LL<=0;i--) { int hr; @@ -10083,7 +10065,7 @@ int new_recompile_block(int addr) } // Save it needed_reg[i]=nr; - + // Deallocate unneeded registers for(hr=0;hr=start && ba[i]<(start+i*4)) + if(ba[i]>=start && ba[i]<(start+i*4)) if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS @@ -10273,10 +10255,10 @@ int new_recompile_block(int addr) } } if(ooo[i]) { - if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; }else{ - if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) + if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) f_regmap[hr]=branch_regs[i].regmap[hr]; } // Avoid dirty->clean transition @@ -10446,10 +10428,10 @@ int new_recompile_block(int addr) if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) { if(ooo[j]) { - if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break; }else{ - if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) + if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break; } if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) { @@ -10522,7 +10504,7 @@ int new_recompile_block(int addr) regs[k].isconst&=~(1<i&&f_regmap[HOST_CCREG]==CCREG) @@ -10564,7 +10546,7 @@ int new_recompile_block(int addr) } } } - + // Cache memory offset or tlb map pointer if a register is available #ifndef HOST_IMM_ADDR32 #ifndef RAM_OFFSET @@ -10744,7 +10726,7 @@ int new_recompile_block(int addr) } } #endif - + // This allocates registers (if possible) one instruction prior // to use, which can avoid a load-use penalty on certain CPUs. for(i=0;i=0) { @@ -10919,7 +10901,7 @@ int new_recompile_block(int addr) } } if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) { - if(itype[i+1]==LOAD) + if(itype[i+1]==LOAD) hr=get_reg(regs[i+1].regmap,rt1[i+1]); if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); @@ -10943,16 +10925,16 @@ int new_recompile_block(int addr) } } } - + /* Pass 6 - Optimize clean/dirty state */ clean_registers(0,slen-1,1); - + /* Pass 7 - Identify 32-bit registers */ #ifndef FORCE32 provisional_r32(); u_int r32=0; - + for (i=slen-1;i>=0;i--) { int hr; @@ -11048,7 +11030,7 @@ int new_recompile_block(int addr) if((regs[i].was32>>dep2[i])&1) r32|=1LL<(u_int)BASE_ADDR+(1<>12;i<=(start+slen*4)>>12;i++) { invalid_code[i]=0; @@ -11636,9 +11618,9 @@ int new_recompile_block(int addr) invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]= invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0; #endif - + /* Pass 10 - Free memory by expiring oldest blocks */ - + int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535; while(expirep!=end) { @@ -11680,7 +11662,7 @@ int new_recompile_block(int addr) case 3: // Clear jump_out #ifdef __arm__ - if((expirep&2047)==0) + if((expirep&2047)==0) do_clear_cache(); #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); -- cgit v1.2.3 From 186935dccdeb09590c0858b7510c769f5ccb06de Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 21 Sep 2016 01:06:32 +0300 Subject: drc: avoid MAP_FIXED it's almost never a good idea, it will override some other mapping and make things crash --- libpcsxcore/new_dynarec/new_dynarec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ec2a6fa..b0bfb23 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7027,8 +7027,8 @@ void new_dynarec_init() #if BASE_ADDR_FIXED if (mmap (out, 1< #endif +#ifdef _3DS +#include <3ds_utils.h> +#endif +#ifdef VITA +#include +static int sceBlock; +#endif #include "new_dynarec_config.h" #include "emu_if.h" //emulator interface @@ -265,11 +272,20 @@ static int tracedebug=0; static void mprotect_w_x(void *start, void *end, int is_x) { #ifdef NO_WRITE_EXEC + #if defined(VITA) + // *Open* enables write on all memory that was + // allocated by sceKernelAllocMemBlockForVM()? + if (is_x) + sceKernelCloseVMDomain(); + else + sceKernelOpenVMDomain(); + #else u_long mstart = (u_long)start & ~4095ul; u_long mend = (u_long)end; if (mprotect((void *)mstart, mend - mstart, PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0) SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno)); + #endif #endif } @@ -287,8 +303,9 @@ static void end_tcache_write(void *start, void *end) #elif defined(__MACH__) sys_cache_control(kCacheFunctionPrepareForExecution, start, len); #elif defined(VITA) - int block = sceKernelFindMemBlockByAddr(start, len); - sceKernelSyncVMDomain(block, start, len); + sceKernelSyncVMDomain(sceBlock, start, len); + #elif defined(_3DS) + ctr_flush_invalidate_cache(); #else __clear_cache(start, end); #endif @@ -7023,19 +7040,43 @@ void new_dynarec_clear_full() void new_dynarec_init() { SysPrintf("Init new dynarec\n"); - out=(u_char *)BASE_ADDR; -#if BASE_ADDR_FIXED - if (mmap (out, 1<size == size) && (custom_map->tag == tag)) { - int block, ret; - char blockname[32]; - sprintf(blockname, "CODE 0x%08X",tag); - - block = sceKernelAllocMemBlock(blockname, 0x0c20d060, size + 0x1000, 0); - if(block<=0){ - sceClibPrintf("could not alloc mem block @0x%08X 0x%08X \n", block, tag); - exit(1); - } - - // get base address - ret = sceKernelGetMemBlockBase(block, &custom_map->buffer); - if (ret < 0) - { - sceClibPrintf("could get address @0x%08X 0x%08X 0x%08X \n", block, ret, tag); - exit(1); - } - custom_map->buffer = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF; - custom_map->target_map = block; - return custom_map->buffer; } } @@ -328,9 +333,6 @@ void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag) { if ((custom_map->buffer == ptr)) { - sceKernelFreeMemBlock(custom_map->target_map); - custom_map->buffer = NULL; - custom_map->target_map = NULL; return; } } @@ -1564,15 +1566,19 @@ void retro_init(void) psxUnmapHook = pl_3ds_munmap; #endif #ifdef VITA + if(init_vita_mmap()<0) + abort(); psxMapHook = pl_vita_mmap; psxUnmapHook = pl_vita_munmap; #endif ret = emu_core_preinit(); -#ifdef _3DS +#ifdef _3DS /* emu_core_preinit sets the cpu to dynarec */ if(!__ctr_svchax) Config.Cpu = CPU_INTERPRETER; #endif + Config.Cpu = CPU_INTERPRETER; + ret |= emu_core_init(); if (ret != 0) { SysPrintf("PCSX init failed.\n"); @@ -1652,6 +1658,8 @@ void retro_deinit(void) free(vout_buf); #endif vout_buf = NULL; + + deinit_vita_mmap(); } #ifdef VITA diff --git a/frontend/vita/pthread.h b/frontend/vita/pthread.h deleted file mode 100644 index e1afdc5..0000000 --- a/frontend/vita/pthread.h +++ /dev/null @@ -1,308 +0,0 @@ -/* Copyright (C) 2010-2016 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this file (psp_pthread.h). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* FIXME: unfinished on PSP, mutexes and condition variables basically a stub. */ -#ifndef _PSP_PTHREAD_WRAP__ -#define _PSP_PTHREAD_WRAP__ - -#ifdef VITA -#include -#include -#else -#include -#include -#include -#endif -#include -#include - -#define STACKSIZE (8 * 1024) - -typedef SceUID pthread_t; -typedef SceUID pthread_mutex_t; -typedef void* pthread_mutexattr_t; -typedef int pthread_attr_t; - -typedef struct -{ - SceUID mutex; - SceUID sema; - int waiting; -} pthread_cond_t; - -typedef SceUID pthread_condattr_t; - -/* Use pointer values to create unique names for threads/mutexes */ -char name_buffer[256]; - -typedef void* (*sthreadEntry)(void *argp); - -typedef struct -{ - void* arg; - sthreadEntry start_routine; -} sthread_args_struct; - - -static int psp_thread_wrap(SceSize args, void *argp) -{ - sthread_args_struct* sthread_args = (sthread_args_struct*)argp; - - return (int)sthread_args->start_routine(sthread_args->arg); -} - -static INLINE int pthread_create(pthread_t *thread, - const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg) -{ - sprintf(name_buffer, "0x%08X", (unsigned int) thread); - -#ifdef VITA - *thread = sceKernelCreateThread(name_buffer, psp_thread_wrap, - 0x10000100, 0x10000, 0, 0, NULL); -#else - *thread = sceKernelCreateThread(name_buffer, - psp_thread_wrap, 0x20, STACKSIZE, 0, NULL); -#endif - - sthread_args_struct sthread_args; - sthread_args.arg = arg; - sthread_args.start_routine = start_routine; - - return sceKernelStartThread(*thread, sizeof(sthread_args), &sthread_args); -} - -static INLINE int pthread_mutex_init(pthread_mutex_t *mutex, - const pthread_mutexattr_t *attr) -{ - sprintf(name_buffer, "0x%08X", (unsigned int) mutex); - -#ifdef VITA - *mutex = sceKernelCreateMutex(name_buffer, 0, 0, 0); - if(*mutex<0) - return *mutex; - return 0; -#else - return *mutex = sceKernelCreateSema(name_buffer, 0, 1, 1, NULL); -#endif -} - -static INLINE int pthread_mutex_destroy(pthread_mutex_t *mutex) -{ -#ifdef VITA - return sceKernelDeleteMutex(*mutex); -#else - return sceKernelDeleteSema(*mutex); -#endif -} - -static INLINE int pthread_mutex_lock(pthread_mutex_t *mutex) -{ -#ifdef VITA - int ret = sceKernelLockMutex(*mutex, 1, 0); - return ret; - -#else - /* FIXME: stub */ - return 1; -#endif -} - -static INLINE int pthread_mutex_unlock(pthread_mutex_t *mutex) -{ -#ifdef VITA - int ret = sceKernelUnlockMutex(*mutex, 1); - return ret; -#else - /* FIXME: stub */ - return 1; -#endif -} - - -static INLINE int pthread_join(pthread_t thread, void **retval) -{ -#ifdef VITA - int res = sceKernelWaitThreadEnd(thread, 0, 0); - if (res < 0) - return res; - return sceKernelDeleteThread(thread); -#else - SceUInt timeout = (SceUInt)-1; - sceKernelWaitThreadEnd(thread, &timeout); - exit_status = sceKernelGetThreadExitStatus(thread); - sceKernelDeleteThread(thread); - return exit_status; -#endif -} - -static INLINE int pthread_mutex_trylock(pthread_mutex_t *mutex) -{ -#ifdef VITA - return sceKernelTryLockMutex(*mutex, 1 /* not sure about this last param */); -#else - /* FIXME: stub */ - return 1; -#endif -} - -static INLINE int pthread_cond_wait(pthread_cond_t *cond, - pthread_mutex_t *mutex) -{ -#ifdef VITA - int ret = pthread_mutex_lock(&cond->mutex); - if (ret < 0) - return ret; - ++cond->waiting; - pthread_mutex_unlock(mutex); - pthread_mutex_unlock(&cond->mutex); - - ret = sceKernelWaitSema(cond->sema, 1, 0); - if (ret < 0) - sceClibPrintf("Premature wakeup: %08X", ret); - pthread_mutex_lock(mutex); - return ret; -#else - /* FIXME: stub */ - sceKernelDelayThread(10000); - return 1; -#endif -} - -static INLINE int pthread_cond_timedwait(pthread_cond_t *cond, - pthread_mutex_t *mutex, const struct timespec *abstime) -{ -#ifdef VITA - int ret = pthread_mutex_lock(&cond->mutex); - if (ret < 0) - return ret; - ++cond->waiting; - pthread_mutex_unlock(mutex); - pthread_mutex_unlock(&cond->mutex); - - SceUInt timeout = 0; - - timeout = abstime->tv_sec; - timeout += abstime->tv_nsec / 1.0e6; - - ret = sceKernelWaitSema(cond->sema, 1, &timeout); - if (ret < 0) - sceClibPrintf("Premature wakeup: %08X", ret); - pthread_mutex_lock(mutex); - return ret; - -#else - /* FIXME: stub */ - return 1; -#endif -} - -static INLINE int pthread_cond_init(pthread_cond_t *cond, - const pthread_condattr_t *attr) -{ -#ifdef VITA - - pthread_mutex_init(&cond->mutex,NULL); - if(cond->mutex<0){ - return cond->mutex; - } - sprintf(name_buffer, "0x%08X", (unsigned int) cond); - //cond->sema = sceKernelCreateCond(name_buffer, 0, cond->mutex, 0); - cond->sema = sceKernelCreateSema(name_buffer, 0, 0, 1, 0); - if(cond->sema<0){ - pthread_mutex_destroy(&cond->mutex); - return cond->sema; - } - - cond->waiting = 0; - - - return 0; - - -#else - /* FIXME: stub */ - return 1; -#endif -} - -static INLINE int pthread_cond_signal(pthread_cond_t *cond) -{ -#ifdef VITA - pthread_mutex_lock(&cond->mutex); - if (cond->waiting) - { - --cond->waiting; - sceKernelSignalSema(cond->sema, 1); - } - pthread_mutex_unlock(&cond->mutex); - return 0; -#else - /* FIXME: stub */ - return 1; -#endif -} - -static INLINE int pthread_cond_broadcast(pthread_cond_t *cond) -{ - /* FIXME: stub */ - return 1; -} - -static INLINE int pthread_cond_destroy(pthread_cond_t *cond) -{ -#ifdef VITA - int ret = sceKernelDeleteSema(cond->sema); - if(ret < 0) - return ret; - - return sceKernelDeleteMutex(cond->mutex); -#else - /* FIXME: stub */ - return 1; -#endif -} - - -static INLINE int pthread_detach(pthread_t thread) -{ - return 0; -} - -static INLINE void pthread_exit(void *retval) -{ -#ifdef VITA - sceKernelExitDeleteThread(sceKernelGetThreadId()); -#endif -} - -static INLINE pthread_t pthread_self(void) -{ - /* zero 20-mar-2016: untested */ - return sceKernelGetThreadId(); -} - -static INLINE int pthread_equal(pthread_t t1, pthread_t t2) -{ - return t1 == t2; -} - -#endif //_PSP_PTHREAD_WRAP__ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 1618b0f..d8c2372 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -32,6 +32,7 @@ #ifdef VITA #include static int sceBlock; +int getVMBlock(); #endif #include "new_dynarec_config.h" @@ -53,6 +54,10 @@ static int sceBlock; #include "assem_arm.h" #endif +#ifdef VITA +int _newlib_vm_size_user = 1 << TARGET_SIZE_2; +#endif + #define MAXBLOCK 4096 #define MAX_OUTPUT_BLOCK_SIZE 262144 @@ -7054,12 +7059,13 @@ void new_dynarec_init() } #elif defined(BASE_ADDR_DYNAMIC) #ifdef VITA - sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); + sceBlock = getVMBlock();//sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2); if (sceBlock < 0) SysPrintf("sceKernelAllocMemBlockForVM failed\n"); int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&translation_cache); if (ret < 0) SysPrintf("sceKernelGetMemBlockBase failed\n"); + sceClibPrintf("translation_cache = 0x%08X \n ", translation_cache); #else translation_cache = mmap (NULL, 1 << TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -7097,8 +7103,8 @@ void new_dynarec_cleanup() int n; #if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC) #ifdef VITA - sceKernelFreeMemBlock(sceBlock); - sceBlock = -1; + //sceKernelFreeMemBlock(sceBlock); + //sceBlock = -1; #else if (munmap ((void *)BASE_ADDR, 1<>2)|0xF00; + //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2); + } + else if(ptr[3]==0x72) { + // generated by emit_jno_unlikely + if((target-(u_int)ptr2-8)<1024) { + assert((addr&3)==0); + assert((target&3)==0); + *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; + } + else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) { + assert((addr&3)==0); + assert((target&3)==0); + *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00; + } + else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } + else { + assert((ptr[3]&0x0e)==0xa); + *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } +} + +// This optionally copies the instruction from the target of the branch into +// the space before the branch. Works, but the difference in speed is +// usually insignificant. +#if 0 +static void set_jump_target_fillslot(int addr,u_int target,int copy) +{ + u_char *ptr=(u_char *)addr; + u_int *ptr2=(u_int *)ptr; + assert(!copy||ptr2[-1]==0xe28dd000); + if(ptr[3]==0xe2) { + assert(!copy); + assert((target-(u_int)ptr2-8)<4096); + *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8); + } + else { + assert((ptr[3]&0x0e)==0xa); + u_int target_insn=*(u_int *)target; + if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags + copy=0; + } + if((target_insn&0x0c100000)==0x04100000) { // Load + copy=0; + } + if(target_insn&0x08000000) { + copy=0; + } + if(copy) { + ptr2[-1]=target_insn; + target+=4; + } + *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); + } +} +#endif + +/* Literal pool */ +static void add_literal(int addr,int val) +{ + assert(literalcount>6)+8; +} + +// Find the "clean" entry point from a "dirty" entry point +// by skipping past the call to verify_code +static u_int get_clean_addr(int addr) +{ + int *ptr=(int *)addr; + #ifndef HAVE_ARMV7 + ptr+=4; + #else + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + ptr++; + if((*ptr&0xFF000000)==0xea000000) { + return (int)ptr+((*ptr<<8)>>6)+8; // follow jump + } + return (u_int)ptr; +} + +static int verify_dirty(u_int *ptr) +{ + #ifndef HAVE_ARMV7 + // get from literal pool + assert((*ptr&0xFFFF0000)==0xe59f0000); + u_int offset=*ptr&0xfff; + u_int *l_ptr=(void *)ptr+offset+8; + u_int source=l_ptr[0]; + u_int copy=l_ptr[1]; + u_int len=l_ptr[2]; + ptr+=4; + #else + // ARMv7 movw/movt + assert((*ptr&0xFFF00000)==0xe3000000); + u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); + u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); + u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + //printf("verify_dirty: %x %x %x\n",source,copy,len); + return !memcmp((void *)source,(void *)copy,len); +} + +// This doesn't necessarily find all clean entry points, just +// guarantees that it's not dirty +static int isclean(int addr) +{ + #ifndef HAVE_ARMV7 + u_int *ptr=((u_int *)addr)+4; + #else + u_int *ptr=((u_int *)addr)+6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0; + if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; + return 1; +} + +// get source that block at addr was compiled from (host pointers) +static void get_bounds(int addr,u_int *start,u_int *end) +{ + u_int *ptr=(u_int *)addr; + #ifndef HAVE_ARMV7 + // get from literal pool + assert((*ptr&0xFFFF0000)==0xe59f0000); + u_int offset=*ptr&0xfff; + u_int *l_ptr=(void *)ptr+offset+8; + u_int source=l_ptr[0]; + //u_int copy=l_ptr[1]; + u_int len=l_ptr[2]; + ptr+=4; + #else + // ARMv7 movw/movt + assert((*ptr&0xFFF00000)==0xe3000000); + u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); + //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); + u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); + ptr+=6; + #endif + if((*ptr&0xFF000000)!=0xeb000000) ptr++; + assert((*ptr&0xFF000000)==0xeb000000); // bl instruction + *start=source; + *end=source+len; +} + +/* Register allocation */ + +// Note: registers are allocated clean (unmodified state) +// if you intend to modify the register, you must call dirty_reg(). +static void alloc_reg(struct regstat *cur,int i,signed char reg) +{ + int r,hr; + int preferred_reg = (reg&7); + if(reg==CCREG) preferred_reg=HOST_CCREG; + if(reg==PTEMP||reg==FTEMP) preferred_reg=12; + + // Don't allocate unused registers + if((cur->u>>reg)&1) return; + + // see if it's already allocated + for(hr=0;hrregmap[hr]==reg) return; + } + + // Keep the same mapping if the register was already allocated in a loop + preferred_reg = loop_reg(i,reg,preferred_reg); + + // Try to allocate the preferred register + if(cur->regmap[preferred_reg]==-1) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; + if(r<64&&((cur->u>>r)&1)) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { + cur->regmap[preferred_reg]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} + } + else + { + if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} + } + } + } + // Try to allocate any available register, but prefer + // registers that have not been used recently. + if(i>0) { + for(hr=0;hrregmap[hr]==-1) { + if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if(hsn[r=cur->regmap[preferred_reg]&63]==j) { + for(hr=0;hrregmap[hr]&63)==r) { + cur->regmap[hr]=-1; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<uu>>reg)&1) return; + + // see if the upper half is already allocated + for(hr=0;hrregmap[hr]==reg+64) return; + } + + // Keep the same mapping if the register was already allocated in a loop + preferred_reg = loop_reg(i,reg,preferred_reg); + + // Try to allocate the preferred register + if(cur->regmap[preferred_reg]==-1) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; + if(r<64&&((cur->u>>r)&1)) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { + cur->regmap[preferred_reg]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=0;hr--) + { + r=cur->regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} + } + else + { + if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} + } + } + } + // Try to allocate any available register, but prefer + // registers that have not been used recently. + if(i>0) { + for(hr=0;hrregmap[hr]==-1) { + if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); + //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + // Alloc preferred register if available + if(hsn[r=cur->regmap[preferred_reg]&63]==j) { + for(hr=0;hrregmap[hr]&63)==r) { + cur->regmap[hr]=-1; + cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg|64; + return; + } + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg|64; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; + } + + // Try to allocate any available register + for(hr=HOST_REGS-1;hr>=0;hr--) { + if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;hr--) + { + r=cur->regmap[hr]; + if(r>=0) { + if(r<64) { + if((cur->u>>r)&1) { + if(i==0||((unneeded_reg[i-1]>>r)&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<uu>>(r&63))&1) { + if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); + if(i>0) { + // Don't evict the cycle count at entry points, otherwise the entry + // stub will have to write it. + if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; + if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; + for(j=10;j>=3;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { + for(hr=0;hr2) { + if(cur->regmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<2) { + if(cur->regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<=0;j--) + { + for(r=1;r<=MAXREG;r++) + { + if(hsn[r]==j) { + for(hr=0;hrregmap[hr]==r+64) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { + cur->regmap[hr]=reg; + cur->dirty&=~(1<isconst&=~(1<regmap[n]==reg) { + dirty=(cur->dirty>>n)&1; + cur->regmap[n]=-1; + } + } + + cur->regmap[hr]=reg; + cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<0) + { + if(imm<256) { + *encoded=((i&30)<<7)|imm; + return 1; + } + imm=(imm>>2)|(imm<<30);i-=2; + } + return 0; +} + +static void genimm_checked(u_int imm,u_int *encoded) +{ + u_int ret=genimm(imm,encoded); + assert(ret); + (void)ret; +} + +static u_int genjmp(u_int addr) +{ + int offset=addr-(int)out-8; + if(offset<-33554432||offset>=33554432) { + if (addr>2) { + SysPrintf("genjmp: out of range: %08x\n", offset); + exit(1); + } + return 0; + } + return ((u_int)offset>>2)&0xffffff; +} + +static void emit_mov(int rs,int rt) +{ + assem_debug("mov %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_movs(int rs,int rt) +{ + assem_debug("movs %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_add(int rs1,int rs2,int rt) +{ + assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adds(int rs1,int rs2,int rt) +{ + assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_adcs(int rs1,int rs2,int rt) +{ + assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbc(int rs1,int rs2,int rt) +{ + assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_sbcs(int rs1,int rs2,int rt) +{ + assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_neg(int rs, int rt) +{ + assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); +} + +static void emit_negs(int rs, int rt) +{ + assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); +} + +static void emit_sub(int rs1,int rs2,int rt) +{ + assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_subs(int rs1,int rs2,int rt) +{ + assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_zeroreg(int rt) +{ + assem_debug("mov %s,#0\n",regname[rt]); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); +} + +static void emit_loadlp(u_int imm,u_int rt) +{ + add_literal((int)out,imm); + assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); + output_w32(0xe5900000|rd_rn_rm(rt,15,0)); +} + +static void emit_movw(u_int imm,u_int rt) +{ + assert(imm<65536); + assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); + output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); +} + +static void emit_movt(u_int imm,u_int rt) +{ + assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); + output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); +} + +static void emit_movimm(u_int imm,u_int rt) +{ + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],imm); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + }else if(genimm(~imm,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],imm); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + }else if(imm<65536) { + #ifndef HAVE_ARMV7 + assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00); + output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + #else + emit_movw(imm,rt); + #endif + }else{ + #ifndef HAVE_ARMV7 + emit_loadlp(imm,rt); + #else + emit_movw(imm&0x0000FFFF,rt); + emit_movt(imm&0xFFFF0000,rt); + #endif + } +} + +static void emit_pcreladdr(u_int rt) +{ + assem_debug("add %s,pc,#?\n",regname[rt]); + output_w32(0xe2800000|rd_rn_rm(rt,15,0)); +} + +static void emit_loadreg(int r, int hr) +{ + if(r&64) { + SysPrintf("64bit load in 32bit mode!\n"); + assert(0); + return; + } + if((r&63)==0) + emit_zeroreg(hr); + else { + int addr=((int)reg)+((r&63)<>4); + if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); + if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); + if(r==CCREG) addr=(int)&cycle_count; + if(r==CSREG) addr=(int)&Status; + if(r==FSREG) addr=(int)&FCR31; + if(r==INVCP) addr=(int)&invc_ptr; + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldr %s,fp+%d\n",regname[hr],offset); + output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); + } +} + +static void emit_storereg(int r, int hr) +{ + if(r&64) { + SysPrintf("64bit store in 32bit mode!\n"); + assert(0); + return; + } + int addr=((int)reg)+((r&63)<>4); + if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); + if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); + if(r==CCREG) addr=(int)&cycle_count; + if(r==FSREG) addr=(int)&FCR31; + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("str %s,fp+%d\n",regname[hr],offset); + output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); +} + +static void emit_test(int rs, int rt) +{ + assem_debug("tst %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); +} + +static void emit_testimm(int rs,int imm) +{ + u_int armval; + assem_debug("tst %s,#%d\n",regname[rs],imm); + genimm_checked(imm,&armval); + output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); +} + +static void emit_testeqimm(int rs,int imm) +{ + u_int armval; + assem_debug("tsteq %s,$%d\n",regname[rs],imm); + genimm_checked(imm,&armval); + output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); +} + +static void emit_not(int rs,int rt) +{ + assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_mvnmi(int rs,int rt) +{ + assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); + output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_and(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_or(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_or_and_set_flags(int rs1,int rs2,int rt) +{ + assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(imm<32); + assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); +} + +static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(imm<32); + assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); + output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); +} + +static void emit_xor(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_addimm(u_int rs,int imm,u_int rt) +{ + assert(rs<16); + assert(rt<16); + if(imm!=0) { + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + #ifdef HAVE_ARMV7 + }else if(rt!=rs&&(u_int)imm<65536) { + emit_movw(imm&0x0000ffff,rt); + emit_add(rs,rt,rt); + }else if(rt!=rs&&(u_int)-imm<65536) { + emit_movw(-imm&0x0000ffff,rt); + emit_sub(rs,rt,rt); + #endif + }else if((u_int)-imm<65536) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else { + do { + int shift = (ffs(imm) - 1) & ~1; + int imm8 = imm & (0xff << shift); + genimm_checked(imm8,&armval); + assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + rs = rt; + imm &= ~imm8; + } + while (imm != 0); + } + } + else if(rs!=rt) emit_mov(rs,rt); +} + +static void emit_addimm_and_set_flags(int imm,int rt) +{ + assert(imm>-65536&&imm<65536); + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); + output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); + output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); + }else if(imm<0) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); + assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); + output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); + output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); + }else{ + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); + assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); + output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_addimm_no_flags(u_int imm,u_int rt) +{ + emit_addimm(rt,imm,rt); +} + +static void emit_addnop(u_int r) +{ + assert(r<16); + assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); + output_w32(0xe2800000|rd_rn_rm(r,r,0)); +} + +static void emit_adcimm(u_int rs,int imm,u_int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_rscimm(int rs,int imm,u_int rt) +{ + assert(0); + u_int armval; + genimm_checked(imm,&armval); + assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) +{ + // TODO: if(genimm(imm,&armval)) ... + // else + emit_movimm(imm,HOST_TEMPREG); + emit_adds(HOST_TEMPREG,rsl,rtl); + emit_adcimm(rsh,0,rth); +} + +static void emit_andimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + emit_zeroreg(rt); + }else if(genimm(imm,&armval)) { + assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval); + }else if(genimm(~imm,&armval)) { + assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval); + }else if(imm==65535) { + #ifndef HAVE_ARMV6 + assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]); + output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF); + assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]); + output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF); + #else + assem_debug("uxth %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs)); + #endif + }else{ + assert(imm>0&&imm<65535); + #ifndef HAVE_ARMV7 + assem_debug("mov r14,#%d\n",imm&0xFF00); + output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8)); + assem_debug("add r14,r14,#%d\n",imm&0xFF); + output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0)); + #else + emit_movw(imm,HOST_TEMPREG); + #endif + assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]); + output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG)); + } +} + +static void emit_orimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + if(rs!=rt) emit_mov(rs,rt); + }else if(genimm(imm,&armval)) { + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval); + }else{ + assert(imm>0&&imm<65536); + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); + assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); + output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_xorimm(int rs,int imm,int rt) +{ + u_int armval; + if(imm==0) { + if(rs!=rt) emit_mov(rs,rt); + }else if(genimm(imm,&armval)) { + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval); + }else{ + assert(imm>0&&imm<65536); + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); + assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); + output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8)); + output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); + } +} + +static void emit_shlimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static void emit_lsls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static unused void emit_lslpls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + +static void emit_shrimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_sarimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); +} + +static void emit_rorimm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); +} + +static void emit_shldimm(int rs,int rs2,u_int imm,int rt) +{ + assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); + assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); + output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); +} + +static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) +{ + assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); + assert(imm>0); + assert(imm<32); + //if(imm==1) ... + assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7)); + assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); +} + +static void emit_signextend16(int rs,int rt) +{ + #ifndef HAVE_ARMV6 + emit_shlimm(rs,16,rt); + emit_sarimm(rt,16,rt); + #else + assem_debug("sxth %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs)); + #endif +} + +static void emit_signextend8(int rs,int rt) +{ + #ifndef HAVE_ARMV6 + emit_shlimm(rs,24,rt); + emit_sarimm(rt,24,rt); + #else + assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); + #endif +} + +static void emit_shl(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + //if(imm==1) ... + assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); +} + +static void emit_shr(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); +} + +static void emit_sar(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); + output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); +} + +static void emit_orrshl(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); +} + +static void emit_orrshr(u_int rs,u_int shift,u_int rt) +{ + assert(rs<16); + assert(rt<16); + assert(shift<16); + assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); + output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); +} + +static void emit_cmpimm(int rs,int imm) +{ + u_int armval; + if(genimm(imm,&armval)) { + assem_debug("cmp %s,#%d\n",regname[rs],imm); + output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval); + }else if(genimm(-imm,&armval)) { + assem_debug("cmn %s,#%d\n",regname[rs],imm); + output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); + }else if(imm>0) { + assert(imm<65536); + emit_movimm(imm,HOST_TEMPREG); + assem_debug("cmp %s,r14\n",regname[rs]); + output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG)); + }else{ + assert(imm>-65536); + emit_movimm(-imm,HOST_TEMPREG); + assem_debug("cmn %s,r14\n",regname[rs]); + output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG)); + } +} + +static void emit_cmovne_imm(int imm,int rt) +{ + assem_debug("movne %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovl_imm(int imm,int rt) +{ + assem_debug("movlt %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovb_imm(int imm,int rt) +{ + assem_debug("movcc %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmovs_imm(int imm,int rt) +{ + assem_debug("movmi %s,#%d\n",regname[rt],imm); + u_int armval; + genimm_checked(imm,&armval); + output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); +} + +static void emit_cmove_reg(int rs,int rt) +{ + assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); + output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovne_reg(int rs,int rt) +{ + assem_debug("movne %s,%s\n",regname[rt],regname[rs]); + output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovl_reg(int rs,int rt) +{ + assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); + output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_cmovs_reg(int rs,int rt) +{ + assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); + output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); +} + +static void emit_slti32(int rs,int imm,int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_sltiu32(int rs,int imm,int rt) +{ + if(rs!=rt) emit_zeroreg(rt); + emit_cmpimm(rs,imm); + if(rs==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +static void emit_slti64_32(int rsh,int rsl,int imm,int rt) +{ + assert(rsh!=rt); + emit_slti32(rsl,imm,rt); + if(imm>=0) + { + emit_test(rsh,rsh); + emit_cmovne_imm(0,rt); + emit_cmovs_imm(1,rt); + } + else + { + emit_cmpimm(rsh,-1); + emit_cmovne_imm(0,rt); + emit_cmovl_imm(1,rt); + } +} + +static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) +{ + assert(rsh!=rt); + emit_sltiu32(rsl,imm,rt); + if(imm>=0) + { + emit_test(rsh,rsh); + emit_cmovne_imm(0,rt); + } + else + { + emit_cmpimm(rsh,-1); + emit_cmovne_imm(1,rt); + } +} + +static void emit_cmp(int rs,int rt) +{ + assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); +} + +static void emit_set_gz32(int rs, int rt) +{ + //assem_debug("set_gz32\n"); + emit_cmpimm(rs,1); + emit_movimm(1,rt); + emit_cmovl_imm(0,rt); +} + +static void emit_set_nz32(int rs, int rt) +{ + //assem_debug("set_nz32\n"); + if(rs!=rt) emit_movs(rs,rt); + else emit_test(rs,rs); + emit_cmovne_imm(1,rt); +} + +static void emit_set_gz64_32(int rsh, int rsl, int rt) +{ + //assem_debug("set_gz64\n"); + emit_set_gz32(rsl,rt); + emit_test(rsh,rsh); + emit_cmovne_imm(1,rt); + emit_cmovs_imm(0,rt); +} + +static void emit_set_nz64_32(int rsh, int rsl, int rt) +{ + //assem_debug("set_nz64\n"); + emit_or_and_set_flags(rsh,rsl,rt); + emit_cmovne_imm(1,rt); +} + +static void emit_set_if_less32(int rs1, int rs2, int rt) +{ + //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovl_imm(1,rt); +} + +static void emit_set_if_carry32(int rs1, int rs2, int rt) +{ + //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); + if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); + emit_cmp(rs1,rs2); + if(rs1==rt||rs2==rt) emit_movimm(0,rt); + emit_cmovb_imm(1,rt); +} + +static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) +{ + //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); + assert(u1!=rt); + assert(u2!=rt); + emit_cmp(l1,l2); + emit_movimm(0,rt); + emit_sbcs(u1,u2,HOST_TEMPREG); + emit_cmovl_imm(1,rt); +} + +static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) +{ + //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); + assert(u1!=rt); + assert(u2!=rt); + emit_cmp(l1,l2); + emit_movimm(0,rt); + emit_sbcs(u1,u2,HOST_TEMPREG); + emit_cmovb_imm(1,rt); +} + +static void emit_call(int a) +{ + assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); + u_int offset=genjmp(a); + output_w32(0xeb000000|offset); +} + +static void emit_jmp(int a) +{ + assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); + u_int offset=genjmp(a); + output_w32(0xea000000|offset); +} + +static void emit_jne(int a) +{ + assem_debug("bne %x\n",a); + u_int offset=genjmp(a); + output_w32(0x1a000000|offset); +} + +static void emit_jeq(int a) +{ + assem_debug("beq %x\n",a); + u_int offset=genjmp(a); + output_w32(0x0a000000|offset); +} + +static void emit_js(int a) +{ + assem_debug("bmi %x\n",a); + u_int offset=genjmp(a); + output_w32(0x4a000000|offset); +} + +static void emit_jns(int a) +{ + assem_debug("bpl %x\n",a); + u_int offset=genjmp(a); + output_w32(0x5a000000|offset); +} + +static void emit_jl(int a) +{ + assem_debug("blt %x\n",a); + u_int offset=genjmp(a); + output_w32(0xba000000|offset); +} + +static void emit_jge(int a) +{ + assem_debug("bge %x\n",a); + u_int offset=genjmp(a); + output_w32(0xaa000000|offset); +} + +static void emit_jno(int a) +{ + assem_debug("bvc %x\n",a); + u_int offset=genjmp(a); + output_w32(0x7a000000|offset); +} + +static void emit_jc(int a) +{ + assem_debug("bcs %x\n",a); + u_int offset=genjmp(a); + output_w32(0x2a000000|offset); +} + +static void emit_jcc(int a) +{ + assem_debug("bcc %x\n",a); + u_int offset=genjmp(a); + output_w32(0x3a000000|offset); +} + +static void emit_callreg(u_int r) +{ + assert(r<15); + assem_debug("blx %s\n",regname[r]); + output_w32(0xe12fff30|r); +} + +static void emit_jmpreg(u_int r) +{ + assem_debug("mov pc,%s\n",regname[r]); + output_w32(0xe1a00000|rd_rn_rm(15,0,r)); +} + +static void emit_readword_indexed(int offset, int rs, int rt) +{ + assert(offset>-4096&&offset<4096); + assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) +{ + assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_readword_indexed(addr, rs, rt); + else { + assert(addr==0); + emit_readword_dualindexedx4(rs, map, rt); + } +} + +static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) +{ + if(map<0) { + if(rh>=0) emit_readword_indexed(addr, rs, rh); + emit_readword_indexed(addr+4, rs, rl); + }else{ + assert(rh!=rs); + if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh); + emit_addimm(map,1,map); + emit_readword_indexed_tlb(addr, rs, map, rl); + } +} + +static void emit_movsbl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_movsbl_indexed(addr, rs, rt); + else { + if(addr==0) { + emit_shlimm(map,2,map); + assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]); + output_w32(0xe19000d0|rd_rn_rm(rt,rs,map)); + }else{ + assert(addr>-256&&addr<256); + assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]); + output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7)); + emit_movsbl_indexed(addr, rt, rt); + } + } +} + +static void emit_movswl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_movzbl_indexed(int offset, int rs, int rt) +{ + assert(offset>-4096&&offset<4096); + assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) +{ + assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) +{ + if(map<0) emit_movzbl_indexed(addr, rs, rt); + else { + if(addr==0) { + emit_movzbl_dualindexedx4(rs, map, rt); + }else{ + emit_addimm(rs,addr,rt); + emit_movzbl_dualindexedx4(rt, map, rt); + } + } +} + +static void emit_movzwl_indexed(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_ldrd(int offset, int rs, int rt) +{ + assert(offset>-256&&offset<256); + assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_readword(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldr %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_movsbl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_movswl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_movzbl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_movzwl(int addr, int rt) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static void emit_writeword_indexed(int rt, int offset, int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) +{ + assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) +{ + if(map<0) emit_writeword_indexed(rt, addr, rs); + else { + assert(addr==0); + emit_writeword_dualindexedx4(rt, rs, map); + } +} + +static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) +{ + if(map<0) { + if(rh>=0) emit_writeword_indexed(rh, addr, rs); + emit_writeword_indexed(rl, addr+4, rs); + }else{ + assert(rh>=0); + if(temp!=rs) emit_addimm(map,1,temp); + emit_writeword_indexed_tlb(rh, addr, rs, map, temp); + if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp); + else { + emit_addimm(rs,4,rs); + emit_writeword_indexed_tlb(rl, addr, rs, map, temp); + } + } +} + +static void emit_writehword_indexed(int rt, int offset, int rs) +{ + assert(offset>-256&&offset<256); + assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); + }else{ + output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); + } +} + +static void emit_writebyte_indexed(int rt, int offset, int rs) +{ + assert(offset>-4096&&offset<4096); + assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); + if(offset>=0) { + output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset); + }else{ + output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); + } +} + +static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) +{ + assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); +} + +static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) +{ + if(map<0) emit_writebyte_indexed(rt, addr, rs); + else { + if(addr==0) { + emit_writebyte_dualindexedx4(rt, rs, map); + }else{ + emit_addimm(rs,addr,temp); + emit_writebyte_dualindexedx4(rt, temp, map); + } + } +} + +static void emit_strcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_strcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_writeword(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("str %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); +} + +static unused void emit_writehword(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<256); + assem_debug("strh %s,fp+%d\n",regname[rt],offset); + output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); +} + +static unused void emit_writebyte(int rt, int addr) +{ + u_int offset = addr-(u_int)&dynarec_local; + assert(offset<4096); + assem_debug("strb %s,fp+%d\n",regname[rt],offset); + output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); +} + +static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) +{ + assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); + assert(rs1<16); + assert(rs2<16); + assert(hi<16); + assert(lo<16); + output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); +} + +static void emit_clz(int rs,int rt) +{ + assem_debug("clz %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); +} + +static void emit_subcs(int rs1,int rs2,int rt) +{ + assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_shrcc_imm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_shrne_imm(int rs,u_int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); +} + +static void emit_negmi(int rs, int rt) +{ + assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0x42600000|rd_rn_rm(rt,rs,0)); +} + +static void emit_negsmi(int rs, int rt) +{ + assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); + output_w32(0x42700000|rd_rn_rm(rt,rs,0)); +} + +static void emit_orreq(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_orrne(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); +} + +static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); +} + +static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) +{ + assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); + output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); +} + +static void emit_teq(int rs, int rt) +{ + assem_debug("teq %s,%s\n",regname[rs],regname[rt]); + output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); +} + +static void emit_rsbimm(int rs, int imm, int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval); +} + +// Load 2 immediates optimizing for small code size +static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) +{ + emit_movimm(imm1,rt1); + u_int armval; + if(genimm(imm2-imm1,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1); + output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval); + }else if(genimm(imm1-imm2,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2); + output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval); + } + else emit_movimm(imm2,rt2); +} + +// Conditionally select one of two immediates, optimizing for small code size +// This will only be called if HAVE_CMOV_IMM is defined +static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) +{ + u_int armval; + if(genimm(imm2-imm1,&armval)) { + emit_movimm(imm1,rt); + assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1); + output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval); + }else if(genimm(imm1-imm2,&armval)) { + emit_movimm(imm1,rt); + assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2); + output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval); + } + else { + #ifndef HAVE_ARMV7 + emit_movimm(imm1,rt); + add_literal((int)out,imm2); + assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2); + output_w32(0x15900000|rd_rn_rm(rt,15,0)); + #else + emit_movw(imm1&0x0000FFFF,rt); + if((imm1&0xFFFF)!=(imm2&0xFFFF)) { + assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF); + output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000)); + } + emit_movt(imm1&0xFFFF0000,rt); + if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) { + assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000); + output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000)); + } + #endif + } +} + +// special case for checking invalid_code +static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +{ + assert(imm<128&&imm>=0); + assert(r>=0&&r<16); + assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); + output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); + emit_cmpimm(HOST_TEMPREG,imm); +} + +static void emit_callne(int a) +{ + assem_debug("blne %x\n",a); + u_int offset=genjmp(a); + output_w32(0x1b000000|offset); +} + +// Used to preload hash table entries +static unused void emit_prefetchreg(int r) +{ + assem_debug("pld %s\n",regname[r]); + output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); +} + +// Special case for mini_ht +static void emit_ldreq_indexed(int rs, u_int offset, int rt) +{ + assert(offset<4096); + assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); + output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); +} + +static unused void emit_bicne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_biccs_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_bicvc_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_bichi_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_orrvs_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_orrne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_andne_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); +} + +static unused void emit_addpl_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); +} + +static void emit_jno_unlikely(int a) +{ + //emit_jno(a); + assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); + output_w32(0x72800000|rd_rn_rm(15,15,0)); +} + +static void save_regs_all(u_int reglist) +{ + int i; + if(!reglist) return; + assem_debug("stmia fp,{"); + for(i=0;i<16;i++) + if(reglist&(1<=BASE_ADDR&&addr<(BASE_ADDR+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); +//DEBUG > +#ifdef DEBUG_CYCLE_COUNT + emit_readword((int)&last_count,ECX); + emit_add(HOST_CCREG,ECX,HOST_CCREG); + emit_readword((int)&next_interupt,ECX); + emit_writeword(HOST_CCREG,(int)&Count); + emit_sub(HOST_CCREG,ECX,HOST_CCREG); + emit_writeword(ECX,(int)&last_count); +#endif +//DEBUG < + emit_jmp(linker); +} + +static void emit_extjump(int addr, int target) +{ + emit_extjump2(addr, target, (int)dyna_linker); +} + +static void emit_extjump_ds(int addr, int target) +{ + emit_extjump2(addr, target, (int)dyna_linker_ds); +} + +// put rt_val into rt, potentially making use of rs with value rs_val +static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) +{ + u_int armval; + int diff; + if(genimm(rt_val,&armval)) { + assem_debug("mov %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); + return; + } + if(genimm(~rt_val,&armval)) { + assem_debug("mvn %s,#%d\n",regname[rt],rt_val); + output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); + return; + } + diff=rt_val-rs_val; + if(genimm(diff,&armval)) { + assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); + output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); + return; + }else if(genimm(-diff,&armval)) { + assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); + output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); + return; + } + emit_movimm(rt_val,rt); +} + +// return 1 if above function can do it's job cheaply +static int is_similar_value(u_int v1,u_int v2) +{ + u_int xs; + int diff; + if(v1==v2) return 1; + diff=v2-v1; + for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) + ; + if(xs<0x100) return 1; + return 0; +} + +// trashes r2 +static void pass_args(int a0, int a1) +{ + if(a0==1&&a1==0) { + // must swap + emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); + } + else if(a0!=0&&a1==0) { + emit_mov(a1,1); + if (a0>=0) emit_mov(a0,0); + } + else { + if(a0>=0&&a0!=0) emit_mov(a0,0); + if(a1>=0&&a1!=1) emit_mov(a1,1); + } +} + +static void mov_loadtype_adj(int type,int rs,int rt) +{ + switch(type) { + case LOADB_STUB: emit_signextend8(rs,rt); break; + case LOADBU_STUB: emit_andimm(rs,0xff,rt); break; + case LOADH_STUB: emit_signextend16(rs,rt); break; + case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break; + case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break; + default: assert(0); + } +} + +#include "../backends/psx/pcsxmem.h" +#include "../backends/psx/pcsxmem_inline.c" + +static void do_readstub(int n) +{ + assem_debug("do_readstub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + int type=stubs[n][0]; + int i=stubs[n][3]; + int rs=stubs[n][4]; + struct regstat *i_regs=(struct regstat *)stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int rt; + if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { + rt=get_reg(i_regmap,FTEMP); + }else{ + rt=get_reg(i_regmap,rt1[i]); + } + assert(rs>=0); + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; + reglist|=(1<=0&&rt1[i]!=0) + reglist&=~(1<=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; + case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; + case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; + case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; + case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; + } + } + if(regs_saved) { + restore_jump=(int)out; + emit_jcc(0); // jump to reg restore + } + else + emit_jcc(stubs[n][2]); // return address + + if(!regs_saved) + save_regs(reglist); + int handler=0; + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + assert(handler!=0); + pass_args(rs,temp2); + int cc=get_reg(i_regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); + emit_call(handler); + if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + mov_loadtype_adj(type,0,rt); + } + if(restore_jump) + set_jump_target(restore_jump,(int)out); + restore_regs(reglist); + emit_jmp(stubs[n][2]); // return address +} + +// return memhandler, or get directly accessable address and return 0 +static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) +{ + u_int l1,l2=0; + l1=((u_int *)table)[addr>>12]; + if((l1&(1<<31))==0) { + u_int v=l1<<1; + *addr_host=v+addr; + return 0; + } + else { + l1<<=1; + if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) + l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; + else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) + l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; + else + l2=((u_int *)l1)[(addr&0xfff)/4]; + if((l2&(1<<31))==0) { + u_int v=l2<<1; + *addr_host=v+(addr&0xfff); + return 0; + } + return l2<<1; + } +} + +static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) +{ + int rs=get_reg(regmap,target); + int rt=get_reg(regmap,target); + if(rs<0) rs=get_reg(regmap,-1); + assert(rs>=0); + u_int handler,host_addr=0,is_dynamic,far_call=0; + int cc=get_reg(regmap,CCREG); + if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) + return; + handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); + if (handler==0) { + if(rt<0||rt1[i]==0) + return; + if(addr!=host_addr) + emit_movimm_from(addr,rs,host_addr,rs); + switch(type) { + case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; + case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + default: assert(0); + } + return; + } + is_dynamic=pcsxmem_is_handler_dynamic(addr); + if(is_dynamic) { + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + } + + // call a memhandler + if(rt>=0&&rt1[i]!=0) + reglist&=~(1<=33554432) { + // unreachable memhandler, a plugin func perhaps + emit_movimm(handler,12); + far_call=1; + } + if(cc<0) + emit_loadreg(CCREG,2); + if(is_dynamic) { + emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + } + else { + emit_readword((int)&last_count,3); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_add(2,3,2); + emit_writeword(2,(int)&Count); + } + + if(far_call) + emit_callreg(12); + else + emit_call(handler); + + if(rt>=0&&rt1[i]!=0) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + default: assert(0); + } + } + restore_regs(reglist); +} + +static void do_writestub(int n) +{ + assem_debug("do_writestub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + int type=stubs[n][0]; + int i=stubs[n][3]; + int rs=stubs[n][4]; + struct regstat *i_regs=(struct regstat *)stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int rt,r; + if(itype[i]==C1LS||itype[i]==C2LS) { + rt=get_reg(i_regmap,r=FTEMP); + }else{ + rt=get_reg(i_regmap,r=rs2[i]); + } + assert(rs>=0); + assert(rt>=0); + int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; + int reglist2=reglist|(1<=0); + assert(rt>=0); + u_int handler,host_addr=0; + handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); + if (handler==0) { + if(addr!=host_addr) + emit_movimm_from(addr,rs,host_addr,rs); + switch(type) { + case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; + case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; + case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; + default: assert(0); + } + return; + } + + // call a memhandler + save_regs(reglist); + pass_args(rs,rt); + int cc=get_reg(regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); + emit_movimm(handler,3); + // returns new cycle_count + emit_call((int)jump_handler_write_h); + emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); + if(cc<0) + emit_storereg(CCREG,2); + restore_regs(reglist); +} + +static void do_unalignedwritestub(int n) +{ + assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); + literal_pool(256); + set_jump_target(stubs[n][1],(int)out); + + int i=stubs[n][3]; + struct regstat *i_regs=(struct regstat *)stubs[n][4]; + int addr=stubs[n][5]; + u_int reglist=stubs[n][7]; + signed char *i_regmap=i_regs->regmap; + int temp2=get_reg(i_regmap,FTEMP); + int rt; + rt=get_reg(i_regmap,rs2[i]); + assert(rt>=0); + assert(addr>=0); + assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented + reglist|=(1<regmap_entry,i_regs->was32,i_regs->wasdirty); + if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); + emit_movimm(start+(i-ds)*4,EAX); // Get PC + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... + emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); +} + +/* Special assem */ + +static void shift_assemble_arm(int i,struct regstat *i_regs) +{ + if(rt1[i]) { + if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV + { + signed char s,t,shift; + t=get_reg(i_regs->regmap,rt1[i]); + s=get_reg(i_regs->regmap,rs1[i]); + shift=get_reg(i_regs->regmap,rs2[i]); + if(t>=0){ + if(rs1[i]==0) + { + emit_zeroreg(t); + } + else if(rs2[i]==0) + { + assert(s>=0); + if(s!=t) emit_mov(s,t); + } + else + { + emit_andimm(shift,31,HOST_TEMPREG); + if(opcode2[i]==4) // SLLV + { + emit_shl(s,HOST_TEMPREG,t); + } + if(opcode2[i]==6) // SRLV + { + emit_shr(s,HOST_TEMPREG,t); + } + if(opcode2[i]==7) // SRAV + { + emit_sar(s,HOST_TEMPREG,t); + } + } + } + } else { // DSLLV/DSRLV/DSRAV + signed char sh,sl,th,tl,shift; + th=get_reg(i_regs->regmap,rt1[i]|64); + tl=get_reg(i_regs->regmap,rt1[i]); + sh=get_reg(i_regs->regmap,rs1[i]|64); + sl=get_reg(i_regs->regmap,rs1[i]); + shift=get_reg(i_regs->regmap,rs2[i]); + if(tl>=0){ + if(rs1[i]==0) + { + emit_zeroreg(tl); + if(th>=0) emit_zeroreg(th); + } + else if(rs2[i]==0) + { + assert(sl>=0); + if(sl!=tl) emit_mov(sl,tl); + if(th>=0&&sh!=th) emit_mov(sh,th); + } + else + { + // FIXME: What if shift==tl ? + assert(shift!=tl); + int temp=get_reg(i_regs->regmap,-1); + int real_th=th; + if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register + assert(sl>=0); + assert(sh>=0); + emit_andimm(shift,31,HOST_TEMPREG); + if(opcode2[i]==0x14) // DSLLV + { + if(th>=0) emit_shl(sh,HOST_TEMPREG,th); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + emit_orrshr(sl,HOST_TEMPREG,th); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_shl(sl,HOST_TEMPREG,tl); + if(th>=0) emit_cmovne_reg(tl,th); + emit_cmovne_imm(0,tl); + } + if(opcode2[i]==0x16) // DSRLV + { + assert(th>=0); + emit_shr(sl,HOST_TEMPREG,tl); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + emit_orrshl(sh,HOST_TEMPREG,tl); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_shr(sh,HOST_TEMPREG,th); + emit_cmovne_reg(th,tl); + if(real_th>=0) emit_cmovne_imm(0,th); + } + if(opcode2[i]==0x17) // DSRAV + { + assert(th>=0); + emit_shr(sl,HOST_TEMPREG,tl); + emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); + if(real_th>=0) { + assert(temp>=0); + emit_sarimm(th,31,temp); + } + emit_orrshl(sh,HOST_TEMPREG,tl); + emit_andimm(shift,31,HOST_TEMPREG); + emit_testimm(shift,32); + emit_sar(sh,HOST_TEMPREG,th); + emit_cmovne_reg(th,tl); + if(real_th>=0) emit_cmovne_reg(temp,th); + } + } + } + } + } +} + +static void speculate_mov(int rs,int rt) +{ + if(rt!=0) { + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); + else { + smrv_strong_next&=~(1<=0) { + if(get_final_value(hr,i,&value)) + smrv[rt1[i]]=value; + else smrv[rt1[i]]=constmap[i][hr]; + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + } + break; + case LOAD: + if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { + // special case for BIOS + smrv[rt1[i]]=0xa0000000; + smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); +#endif +} + +enum { + MTYPE_8000 = 0, + MTYPE_8020, + MTYPE_0000, + MTYPE_A000, + MTYPE_1F80, +}; + +static int get_ptr_mem_type(u_int a) +{ + if(a < 0x00200000) { + if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) + // return wrong, must use memhandler for BIOS self-test to pass + // 007 does similar stuff from a00 mirror, weird stuff + return MTYPE_8000; + return MTYPE_0000; + } + if(0x1f800000 <= a && a < 0x1f801000) + return MTYPE_1F80; + if(0x80200000 <= a && a < 0x80800000) + return MTYPE_8020; + if(0xa0000000 <= a && a < 0xa0200000) + return MTYPE_A000; + return MTYPE_8000; +} + +static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +{ + int jaddr=0,type=0; + int mr=rs1[i]; + if(((smrv_strong|smrv_weak)>>mr)&1) { + type=get_ptr_mem_type(smrv[mr]); + //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + } + else { + // use the mirror we are running on + type=get_ptr_mem_type(start); + //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); + } + + if(type==MTYPE_8020) { // RAM 80200000+ mirror + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_0000) { // RAM 0 mirror + emit_orimm(addr,0x80000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_A000) { // RAM A mirror + emit_andimm(addr,~0x20000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_1F80) { // scratchpad + if (psxH == (void *)0x1f800000) { + emit_addimm(addr,-0x1f800000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,0x1000); + jaddr=(int)out; + emit_jc(0); + } + else { + // do usual RAM check, jump will go to the right handler + type=0; + } + } + + if(type==0) + { + emit_cmpimm(addr,RAM_SIZE); + jaddr=(int)out; + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + // Hint to branch predictor that the branch is unlikely to be taken + if(rs1[i]>=28) + emit_jno_unlikely(0); + else + #endif + emit_jno(0); + if(ram_offset!=0) { + emit_addimm(addr,ram_offset,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + } + } + + return jaddr; +} + +#define shift_assemble shift_assemble_arm + +static void loadlr_assemble_arm(int i,struct regstat *i_regs) +{ + int s,th,tl,temp,temp2,addr,map=-1; + int offset; + int jaddr=0; + int memtarget=0,c=0; + int fastload_reg_override=0; + u_int hr,reglist=0; + th=get_reg(i_regs->regmap,rt1[i]|64); + tl=get_reg(i_regs->regmap,rt1[i]); + s=get_reg(i_regs->regmap,rs1[i]); + temp=get_reg(i_regs->regmap,-1); + temp2=get_reg(i_regs->regmap,FTEMP); + addr=get_reg(i_regs->regmap,AGEN1+(i&1)); + assert(addr<0); + offset=imm[i]; + for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { + c=(i_regs->wasconst>>s)&1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + } + } + if(!c) { + #ifdef RAM_OFFSET + map=get_reg(i_regs->regmap,ROREG); + if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); + #endif + emit_shlimm(addr,3,temp); + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR + }else{ + emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR + } + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); + } + else { + if(ram_offset&&memtarget) { + emit_addimm(temp2,ram_offset,HOST_TEMPREG); + fastload_reg_override=HOST_TEMPREG; + } + if (opcode[i]==0x22||opcode[i]==0x26) { + emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR + }else{ + emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR + } + } + if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR + if(!c||memtarget) { + int a=temp2; + if(fastload_reg_override) a=fastload_reg_override; + //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); + emit_readword_indexed_tlb(0,a,map,temp2); + if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + } + else + inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); + if(rt1[i]) { + assert(tl>=0); + emit_andimm(temp,24,temp); +#ifdef BIG_ENDIAN_MIPS + if (opcode[i]==0x26) // LWR +#else + if (opcode[i]==0x22) // LWL +#endif + emit_xorimm(temp,24,temp); + emit_movimm(-1,HOST_TEMPREG); + if (opcode[i]==0x26) { + emit_shr(temp2,temp,temp2); + emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); + }else{ + emit_shl(temp2,temp,temp2); + emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); + } + emit_or(temp2,tl,tl); + } + //emit_storereg(rt1[i],tl); // DEBUG + } + if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR + // FIXME: little endian, fastload_reg_override + int temp2h=get_reg(i_regs->regmap,FTEMP|64); + if(!c||memtarget) { + //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); + //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); + emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); + if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); + } + else + inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); + if(rt1[i]) { + assert(th>=0); + assert(tl>=0); + emit_testimm(temp,32); + emit_andimm(temp,24,temp); + if (opcode[i]==0x1A) { // LDL + emit_rsbimm(temp,32,HOST_TEMPREG); + emit_shl(temp2h,temp,temp2h); + emit_orrshr(temp2,HOST_TEMPREG,temp2h); + emit_movimm(-1,HOST_TEMPREG); + emit_shl(temp2,temp,temp2); + emit_cmove_reg(temp2h,th); + emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl); + emit_bicne_lsl(th,HOST_TEMPREG,temp,th); + emit_orreq(temp2,tl,tl); + emit_orrne(temp2,th,th); + } + if (opcode[i]==0x1B) { // LDR + emit_xorimm(temp,24,temp); + emit_rsbimm(temp,32,HOST_TEMPREG); + emit_shr(temp2,temp,temp2); + emit_orrshl(temp2h,HOST_TEMPREG,temp2); + emit_movimm(-1,HOST_TEMPREG); + emit_shr(temp2h,temp,temp2h); + emit_cmovne_reg(temp2,tl); + emit_bicne_lsr(th,HOST_TEMPREG,temp,th); + emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl); + emit_orrne(temp2h,th,th); + emit_orreq(temp2h,tl,tl); + } + } + } +} +#define loadlr_assemble loadlr_assemble_arm + +static void cop0_assemble(int i,struct regstat *i_regs) +{ + if(opcode2[i]==0) // MFC0 + { + signed char t=get_reg(i_regs->regmap,rt1[i]); + char copr=(source[i]>>11)&0x1f; + //assert(t>=0); // Why does this happen? OOT is weird + if(t>=0&&rt1[i]!=0) { + emit_readword((int)®_cop0+copr*4,t); + } + } + else if(opcode2[i]==4) // MTC0 + { + signed char s=get_reg(i_regs->regmap,rs1[i]); + char copr=(source[i]>>11)&0x1f; + assert(s>=0); + wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword((int)&last_count,HOST_TEMPREG); + emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc + emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_writeword(HOST_CCREG,(int)&Count); + } + // What a mess. The status register (12) can enable interrupts, + // so needs a special case to handle a pending interrupt. + // The interrupt must be taken immediately, because a subsequent + // instruction might disable interrupts again. + if(copr==12||copr==13) { + if (is_delayslot) { + // burn cycles to cause cc_interrupt, which will + // reschedule next_interupt. Relies on CCREG from above. + assem_debug("MTC0 DS %d\n", copr); + emit_writeword(HOST_CCREG,(int)&last_count); + emit_movimm(0,HOST_CCREG); + emit_storereg(CCREG,HOST_CCREG); + emit_loadreg(rs1[i],1); + emit_movimm(copr,0); + emit_call((int)pcsx_mtc0_ds); + emit_loadreg(rs1[i],s); + return; + } + emit_movimm(start+i*4+4,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,(int)&pcaddr); + emit_movimm(0,HOST_TEMPREG); + emit_writeword(HOST_TEMPREG,(int)&pending_exception); + } + //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); + //else + if(s==HOST_CCREG) + emit_loadreg(rs1[i],1); + else if(s!=1) + emit_mov(s,1); + emit_movimm(copr,0); + emit_call((int)pcsx_mtc0); + if(copr==9||copr==11||copr==12||copr==13) { + emit_readword((int)&Count,HOST_CCREG); + emit_readword((int)&next_interupt,HOST_TEMPREG); + emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); + emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); + emit_writeword(HOST_TEMPREG,(int)&last_count); + emit_storereg(CCREG,HOST_CCREG); + } + if(copr==12||copr==13) { + assert(!is_delayslot); + emit_readword((int)&pending_exception,14); + emit_test(14,14); + emit_jne((int)&do_interrupt); + } + emit_loadreg(rs1[i],s); + if(get_reg(i_regs->regmap,rs1[i]|64)>=0) + emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); + cop1_usable=0; + } + else + { + assert(opcode2[i]==0x10); + if((source[i]&0x3f)==0x10) // RFE + { + emit_readword((int)&Status,0); + emit_andimm(0,0x3c,1); + emit_andimm(0,~0xf,0); + emit_orrshr_imm(1,2,0); + emit_writeword(0,(int)&Status); + } + } +} + +static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) +{ + switch (copr) { + case 1: + case 3: + case 5: + case 8: + case 9: + case 10: + case 11: + emit_readword((int)®_cop2d[copr],tl); + emit_signextend16(tl,tl); + emit_writeword(tl,(int)®_cop2d[copr]); // hmh + break; + case 7: + case 16: + case 17: + case 18: + case 19: + emit_readword((int)®_cop2d[copr],tl); + emit_andimm(tl,0xffff,tl); + emit_writeword(tl,(int)®_cop2d[copr]); + break; + case 15: + emit_readword((int)®_cop2d[14],tl); // SXY2 + emit_writeword(tl,(int)®_cop2d[copr]); + break; + case 28: + case 29: + emit_readword((int)®_cop2d[9],temp); + emit_testimm(temp,0x8000); // do we need this? + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_shrimm(temp,7,tl); + emit_readword((int)®_cop2d[10],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshr_imm(temp,2,tl); + emit_readword((int)®_cop2d[11],temp); + emit_testimm(temp,0x8000); + emit_andimm(temp,0xf80,temp); + emit_andne_imm(temp,0,temp); + emit_orrshl_imm(temp,3,tl); + emit_writeword(tl,(int)®_cop2d[copr]); + break; + default: + emit_readword((int)®_cop2d[copr],tl); + break; + } +} + +static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) +{ + switch (copr) { + case 15: + emit_readword((int)®_cop2d[13],temp); // SXY1 + emit_writeword(sl,(int)®_cop2d[copr]); + emit_writeword(temp,(int)®_cop2d[12]); // SXY0 + emit_readword((int)®_cop2d[14],temp); // SXY2 + emit_writeword(sl,(int)®_cop2d[14]); + emit_writeword(temp,(int)®_cop2d[13]); // SXY1 + break; + case 28: + emit_andimm(sl,0x001f,temp); + emit_shlimm(temp,7,temp); + emit_writeword(temp,(int)®_cop2d[9]); + emit_andimm(sl,0x03e0,temp); + emit_shlimm(temp,2,temp); + emit_writeword(temp,(int)®_cop2d[10]); + emit_andimm(sl,0x7c00,temp); + emit_shrimm(temp,3,temp); + emit_writeword(temp,(int)®_cop2d[11]); + emit_writeword(sl,(int)®_cop2d[28]); + break; + case 30: + emit_movs(sl,temp); + emit_mvnmi(temp,temp); +#ifdef HAVE_ARMV5 + emit_clz(temp,temp); +#else + emit_movs(temp,HOST_TEMPREG); + emit_movimm(0,temp); + emit_jeq((int)out+4*4); + emit_addpl_imm(temp,1,temp); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif + emit_writeword(sl,(int)®_cop2d[30]); + emit_writeword(temp,(int)®_cop2d[31]); + break; + case 31: + break; + default: + emit_writeword(sl,(int)®_cop2d[copr]); + break; + } +} + +static void cop2_assemble(int i,struct regstat *i_regs) +{ + u_int copr=(source[i]>>11)&0x1f; + signed char temp=get_reg(i_regs->regmap,-1); + if (opcode2[i]==0) { // MFC2 + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + cop2_get_dreg(copr,tl,temp); + } + else if (opcode2[i]==4) { // MTC2 + signed char sl=get_reg(i_regs->regmap,rs1[i]); + cop2_put_dreg(copr,sl,temp); + } + else if (opcode2[i]==2) // CFC2 + { + signed char tl=get_reg(i_regs->regmap,rt1[i]); + if(tl>=0&&rt1[i]!=0) + emit_readword((int)®_cop2c[copr],tl); + } + else if (opcode2[i]==6) // CTC2 + { + signed char sl=get_reg(i_regs->regmap,rs1[i]); + switch(copr) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + emit_signextend16(sl,temp); + break; + case 31: + //value = value & 0x7ffff000; + //if (value & 0x7f87e000) value |= 0x80000000; + emit_shrimm(sl,12,temp); + emit_shlimm(temp,12,temp); + emit_testimm(temp,0x7f000000); + emit_testeqimm(temp,0x00870000); + emit_testeqimm(temp,0x0000e000); + emit_orrne_imm(temp,0x80000000,temp); + break; + default: + temp=sl; + break; + } + emit_writeword(temp,(int)®_cop2c[copr]); + assert(sl>=0); + } +} + +static void c2op_prologue(u_int op,u_int reglist) +{ + save_regs_all(reglist); +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_start); +#endif + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs +} + +static void c2op_epilogue(u_int op,u_int reglist) +{ +#ifdef PCNT + emit_movimm(op,0); + emit_call((int)pcnt_gte_end); +#endif + restore_regs_all(reglist); +} + +static void c2op_call_MACtoIR(int lm,int need_flags) +{ + if(need_flags) + emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); + else + emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); +} + +static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) +{ + emit_call((int)func); + // func is C code and trashes r0 + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); + emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); +} + +static void c2op_assemble(int i,struct regstat *i_regs) +{ + u_int c2op=source[i]&0x3f; + u_int hr,reglist_full=0,reglist; + int need_flags,need_ir; + for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works + need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; + assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", + source[i],gte_unneeded[i+1],need_flags,need_ir); + if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) + need_flags=0; + int shift = (source[i] >> 19) & 1; + int lm = (source[i] >> 10) & 1; + switch(c2op) { +#ifndef DRC_DBG + case GTE_MVMVA: { +#ifdef HAVE_ARMV5 + int v = (source[i] >> 15) & 3; + int cv = (source[i] >> 13) & 3; + int mx = (source[i] >> 17) & 3; + reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} + c2op_prologue(c2op,reglist); + /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ + if(v<3) + emit_ldrd(v*8,0,4); + else { + emit_movzwl_indexed(9*4,0,4); // gteIR + emit_movzwl_indexed(10*4,0,6); + emit_movzwl_indexed(11*4,0,5); + emit_orrshl_imm(6,16,4); + } + if(mx<3) + emit_addimm(0,32*4+mx*8*4,6); + else + emit_readword((int)&zeromem_ptr,6); + if(cv<3) + emit_addimm(0,32*4+(cv*8+5)*4,7); + else + emit_readword((int)&zeromem_ptr,7); +#ifdef __ARM_NEON__ + emit_movimm(source[i],1); // opcode + emit_call((int)gteMVMVA_part_neon); + if(need_flags) { + emit_movimm(lm,1); + emit_call((int)gteMACtoIR_flags_neon); + } +#else + if(cv==3&&shift) + emit_call((int)gteMVMVA_part_cv3sh12_arm); + else { + emit_movimm(shift,1); + emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); + } + if(need_flags||need_ir) + c2op_call_MACtoIR(lm,need_flags); +#endif +#else /* if not HAVE_ARMV5 */ + c2op_prologue(c2op,reglist); + emit_movimm(source[i],1); // opcode + emit_writeword(1,(int)&psxRegs.code); + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); +#endif + break; + } + case GTE_OP: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DPCS: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); + break; + case GTE_INTPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); + break; + case GTE_SQR: + c2op_prologue(c2op,reglist); + emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); + if(need_flags||need_ir) { + emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); + c2op_call_MACtoIR(lm,need_flags); + } + break; + case GTE_DCPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); + break; + case GTE_GPF: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); + break; + case GTE_GPL: + c2op_prologue(c2op,reglist); + c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); + break; +#endif + default: + c2op_prologue(c2op,reglist); +#ifdef DRC_DBG + emit_movimm(source[i],1); // opcode + emit_writeword(1,(int)&psxRegs.code); +#endif + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); + break; + } + c2op_epilogue(c2op,reglist); + } +} + +static void cop1_unusable(int i,struct regstat *i_regs) +{ + // XXX: should just just do the exception instead + if(!cop1_usable) { + int jaddr=(int)out; + emit_jmp(0); + add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0); + cop1_usable=1; + } +} + +static void cop1_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void fconv_assemble_arm(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} +#define fconv_assemble fconv_assemble_arm + +static void fcomp_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void float_assemble(int i,struct regstat *i_regs) +{ + cop1_unusable(i, i_regs); +} + +static void multdiv_assemble_arm(int i,struct regstat *i_regs) +{ + // case 0x18: MULT + // case 0x19: MULTU + // case 0x1A: DIV + // case 0x1B: DIVU + // case 0x1C: DMULT + // case 0x1D: DMULTU + // case 0x1E: DDIV + // case 0x1F: DDIVU + if(rs1[i]&&rs2[i]) + { + if((opcode2[i]&4)==0) // 32-bit + { + if(opcode2[i]==0x18) // MULT + { + signed char m1=get_reg(i_regs->regmap,rs1[i]); + signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char hi=get_reg(i_regs->regmap,HIREG); + signed char lo=get_reg(i_regs->regmap,LOREG); + assert(m1>=0); + assert(m2>=0); + assert(hi>=0); + assert(lo>=0); + emit_smull(m1,m2,hi,lo); + } + if(opcode2[i]==0x19) // MULTU + { + signed char m1=get_reg(i_regs->regmap,rs1[i]); + signed char m2=get_reg(i_regs->regmap,rs2[i]); + signed char hi=get_reg(i_regs->regmap,HIREG); + signed char lo=get_reg(i_regs->regmap,LOREG); + assert(m1>=0); + assert(m2>=0); + assert(hi>=0); + assert(lo>=0); + emit_umull(m1,m2,hi,lo); + } + if(opcode2[i]==0x1A) // DIV + { + signed char d1=get_reg(i_regs->regmap,rs1[i]); + signed char d2=get_reg(i_regs->regmap,rs2[i]); + assert(d1>=0); + assert(d2>=0); + signed char quotient=get_reg(i_regs->regmap,LOREG); + signed char remainder=get_reg(i_regs->regmap,HIREG); + assert(quotient>=0); + assert(remainder>=0); + emit_movs(d1,remainder); + emit_movimm(0xffffffff,quotient); + emit_negmi(quotient,quotient); // .. quotient and .. + emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) + emit_movs(d2,HOST_TEMPREG); + emit_jeq((int)out+52); // Division by zero + emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); +#ifdef HAVE_ARMV5 + emit_clz(HOST_TEMPREG,quotient); + emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); +#else + emit_movimm(0,quotient); + emit_addpl_imm(quotient,1,quotient); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif + emit_orimm(quotient,1<<31,quotient); + emit_shr(quotient,quotient,quotient); + emit_cmp(remainder,HOST_TEMPREG); + emit_subcs(remainder,HOST_TEMPREG,remainder); + emit_adcs(quotient,quotient,quotient); + emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jcc((int)out-16); // -4 + emit_teq(d1,d2); + emit_negmi(quotient,quotient); + emit_test(d1,d1); + emit_negmi(remainder,remainder); + } + if(opcode2[i]==0x1B) // DIVU + { + signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend + signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor + assert(d1>=0); + assert(d2>=0); + signed char quotient=get_reg(i_regs->regmap,LOREG); + signed char remainder=get_reg(i_regs->regmap,HIREG); + assert(quotient>=0); + assert(remainder>=0); + emit_mov(d1,remainder); + emit_movimm(0xffffffff,quotient); // div0 case + emit_test(d2,d2); + emit_jeq((int)out+40); // Division by zero +#ifdef HAVE_ARMV5 + emit_clz(d2,HOST_TEMPREG); + emit_movimm(1<<31,quotient); + emit_shl(d2,HOST_TEMPREG,d2); +#else + emit_movimm(0,HOST_TEMPREG); + emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_lslpls_imm(d2,1,d2); + emit_jns((int)out-2*4); + emit_movimm(1<<31,quotient); +#endif + emit_shr(quotient,HOST_TEMPREG,quotient); + emit_cmp(remainder,d2); + emit_subcs(remainder,d2,remainder); + emit_adcs(quotient,quotient,quotient); + emit_shrcc_imm(d2,1,d2); + emit_jcc((int)out-16); // -4 + } + } + else // 64-bit + assert(0); + } + else + { + // Multiply by zero is zero. + // MIPS does not have a divide by zero exception. + // The result is undefined, we return zero. + signed char hr=get_reg(i_regs->regmap,HIREG); + signed char lr=get_reg(i_regs->regmap,LOREG); + if(hr>=0) emit_zeroreg(hr); + if(lr>=0) emit_zeroreg(lr); + } +} +#define multdiv_assemble multdiv_assemble_arm + +static void do_preload_rhash(int r) { + // Don't need this for ARM. On x86, this puts the value 0xf8 into the + // register. On ARM the hash can be done with a single instruction (below) +} + +static void do_preload_rhtbl(int ht) { + emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); +} + +static void do_rhash(int rs,int rh) { + emit_andimm(rs,0xf8,rh); +} + +static void do_miniht_load(int ht,int rh) { + assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); + output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); +} + +static void do_miniht_jump(int rs,int rh,int ht) { + emit_cmp(rh,rs); + emit_ldreq_indexed(ht,4,15); + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + emit_mov(rs,7); + emit_jmp(jump_vaddr_reg[7]); + #else + emit_jmp(jump_vaddr_reg[rs]); + #endif +} + +static void do_miniht_insert(u_int return_address,int rt,int temp) { + #ifndef HAVE_ARMV7 + emit_movimm(return_address,rt); // PC into link register + add_to_linker((int)out,return_address,1); + emit_pcreladdr(temp); + emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); + emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + #else + emit_movw(return_address&0x0000FFFF,rt); + add_to_linker((int)out,return_address,1); + emit_pcreladdr(temp); + emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); + emit_movt(return_address&0xFFFF0000,rt); + emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); + #endif +} + +static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) +{ + //if(dirty_pre==dirty) return; + int hr,reg; + for(hr=0;hr>(reg&63))&1) { + if(reg>0) { + if(((dirty_pre&~dirty)>>hr)&1) { + if(reg>0&®<34) { + emit_storereg(reg,hr); + if( ((is32_pre&~uu)>>reg)&1 ) { + emit_sarimm(hr,31,HOST_TEMPREG); + emit_storereg(reg|64,HOST_TEMPREG); + } + } + else if(reg>=64) { + emit_storereg(reg,hr); + } + } + } + } + } + } +} + + +/* using strd could possibly help but you'd have to allocate registers in pairs +static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) +{ + int hr; + int wrote=-1; + for(hr=HOST_REGS-1;hr>=0;hr--) { + if(hr!=EXCLUDE_REG) { + if(pre[hr]!=entry[hr]) { + if(pre[hr]>=0) { + if((dirty>>hr)&1) { + if(get_reg(entry,pre[hr])<0) { + if(pre[hr]<64) { + if(!((u>>pre[hr])&1)) { + if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) { + if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { + emit_sarimm(hr,31,hr+1); + emit_strdreg(pre[hr],hr); + } + else + emit_storereg(pre[hr],hr); + }else{ + emit_storereg(pre[hr],hr); + if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { + emit_sarimm(hr,31,hr); + emit_storereg(pre[hr]|64,hr); + } + } + } + }else{ + if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { + emit_storereg(pre[hr],hr); + } + } + wrote=hr; + } + } + } + } + } + } + for(hr=0;hr=0) { + int nr; + if((nr=get_reg(entry,pre[hr]))>=0) { + emit_mov(hr,nr); + } + } + } + } + } +} +#define wb_invalidate wb_invalidate_arm +*/ + +static void mark_clear_cache(void *target) +{ + u_long offset = (char *)target - (char *)BASE_ADDR; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((u_long)target & ~4095ul); + start_tcache_write(start, start + 4096); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache() +{ + int i,j; + for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) + { + u_int bitmap=needs_clear_cache[i]; + if(bitmap) { + u_int start,end; + for(j=0;j<32;j++) + { + if(bitmap&(1<> 16) +#else + mov \reg, #(\imm & 0x0000ff) + orr \reg, #(\imm & 0x00ff00) + orr \reg, #(\imm & 0xff0000) +#endif +.endm + +/* r0 = virtual target address */ +/* r1 = instruction to patch */ +.macro dyna_linker_main +#ifndef NO_WRITE_EXEC + load_varadr_ext r3, jump_in + /* get_page */ + lsr r2, r0, #12 + mov r6, #4096 + bic r2, r2, #0xe0000 + sub r6, r6, #1 + cmp r2, #0x1000 + ldr r7, [r1] + biclt r2, #0x0e00 + and r6, r6, r2 + cmp r2, #2048 + add r12, r7, #2 + orrcs r2, r6, #2048 + ldr r5, [r3, r2, lsl #2] + lsl r12, r12, #8 + add r6, r1, r12, asr #6 + mov r8, #0 + /* jump_in lookup */ +1: + movs r4, r5 + beq 2f + ldr r3, [r5] /* ll_entry .vaddr */ + ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ + teq r3, r0 + bne 1b + teq r4, r6 + moveq pc, r4 /* Stale i-cache */ + mov r8, r4 + b 1b /* jump_in may have dupes, continue search */ +2: + tst r8, r8 + beq 3f /* r0 not in jump_in */ + + mov r5, r1 + mov r1, r6 + bl add_link + sub r2, r8, r5 + and r1, r7, #0xff000000 + lsl r2, r2, #6 + sub r1, r1, #2 + add r1, r1, r2, lsr #8 + str r1, [r5] + mov pc, r8 +3: + /* hash_table lookup */ + cmp r2, #2048 + load_varadr_ext r3, jump_dirty + eor r4, r0, r0, lsl #16 + lslcc r2, r0, #9 + load_varadr_ext r6, hash_table + lsr r4, r4, #12 + lsrcc r2, r2, #21 + bic r4, r4, #15 + ldr r5, [r3, r2, lsl #2] + ldr r7, [r6, r4]! + teq r7, r0 + ldreq pc, [r6, #4] + ldr r7, [r6, #8] + teq r7, r0 + ldreq pc, [r6, #12] + /* jump_dirty lookup */ +6: + movs r4, r5 + beq 8f + ldr r3, [r5] + ldr r5, [r4, #12] + teq r3, r0 + bne 6b +7: + ldr r1, [r4, #8] + /* hash_table insert */ + ldr r2, [r6] + ldr r3, [r6, #4] + str r0, [r6] + str r1, [r6, #4] + str r2, [r6, #8] + str r3, [r6, #12] + mov pc, r1 +8: +#else + /* XXX: should be able to do better than this... */ + bl get_addr_ht + mov pc, r0 +#endif +.endm + + +FUNCTION(dyna_linker): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + + mov r4, r0 + mov r5, r1 + bl new_recompile_block + tst r0, r0 + mov r0, r4 + mov r1, r5 + beq dyna_linker + /* pagefault */ + mov r1, r0 + mov r2, #8 + .size dyna_linker, .-dyna_linker + +FUNCTION(exec_pagefault): + /* r0 = instruction pointer */ + /* r1 = fault address */ + /* r2 = cause */ + ldr r3, [fp, #LO_reg_cop0+48] /* Status */ + mvn r6, #0xF000000F + ldr r4, [fp, #LO_reg_cop0+16] /* Context */ + bic r6, r6, #0x0F800000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r3, r3, #2 + str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ + bic r4, r4, r6 + str r3, [fp, #LO_reg_cop0+48] /* Status */ + and r5, r6, r1, lsr #9 + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + and r1, r1, r6, lsl #9 + str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ + orr r4, r4, r5 + str r4, [fp, #LO_reg_cop0+16] /* Context */ + mov r0, #0x80000000 + bl get_addr_ht + mov pc, r0 + .size exec_pagefault, .-exec_pagefault + +/* Special dynamic linker for the case where a page fault + may occur in a branch delay slot */ +FUNCTION(dyna_linker_ds): + /* r0 = virtual target address */ + /* r1 = instruction to patch */ + dyna_linker_main + + mov r4, r0 + bic r0, r0, #7 + mov r5, r1 + orr r0, r0, #1 + bl new_recompile_block + tst r0, r0 + mov r0, r4 + mov r1, r5 + beq dyna_linker_ds + /* pagefault */ + bic r1, r0, #7 + mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ + sub r0, r1, #4 + b exec_pagefault + .size dyna_linker_ds, .-dyna_linker_ds + + .align 2 + +FUNCTION(jump_vaddr_r0): + eor r2, r0, r0, lsl #16 + b jump_vaddr + .size jump_vaddr_r0, .-jump_vaddr_r0 +FUNCTION(jump_vaddr_r1): + eor r2, r1, r1, lsl #16 + mov r0, r1 + b jump_vaddr + .size jump_vaddr_r1, .-jump_vaddr_r1 +FUNCTION(jump_vaddr_r2): + mov r0, r2 + eor r2, r2, r2, lsl #16 + b jump_vaddr + .size jump_vaddr_r2, .-jump_vaddr_r2 +FUNCTION(jump_vaddr_r3): + eor r2, r3, r3, lsl #16 + mov r0, r3 + b jump_vaddr + .size jump_vaddr_r3, .-jump_vaddr_r3 +FUNCTION(jump_vaddr_r4): + eor r2, r4, r4, lsl #16 + mov r0, r4 + b jump_vaddr + .size jump_vaddr_r4, .-jump_vaddr_r4 +FUNCTION(jump_vaddr_r5): + eor r2, r5, r5, lsl #16 + mov r0, r5 + b jump_vaddr + .size jump_vaddr_r5, .-jump_vaddr_r5 +FUNCTION(jump_vaddr_r6): + eor r2, r6, r6, lsl #16 + mov r0, r6 + b jump_vaddr + .size jump_vaddr_r6, .-jump_vaddr_r6 +FUNCTION(jump_vaddr_r8): + eor r2, r8, r8, lsl #16 + mov r0, r8 + b jump_vaddr + .size jump_vaddr_r8, .-jump_vaddr_r8 +FUNCTION(jump_vaddr_r9): + eor r2, r9, r9, lsl #16 + mov r0, r9 + b jump_vaddr + .size jump_vaddr_r9, .-jump_vaddr_r9 +FUNCTION(jump_vaddr_r10): + eor r2, r10, r10, lsl #16 + mov r0, r10 + b jump_vaddr + .size jump_vaddr_r10, .-jump_vaddr_r10 +FUNCTION(jump_vaddr_r12): + eor r2, r12, r12, lsl #16 + mov r0, r12 + b jump_vaddr + .size jump_vaddr_r12, .-jump_vaddr_r12 +FUNCTION(jump_vaddr_r7): + eor r2, r7, r7, lsl #16 + add r0, r7, #0 + .size jump_vaddr_r7, .-jump_vaddr_r7 +FUNCTION(jump_vaddr): + load_varadr_ext r1, hash_table + mvn r3, #15 + and r2, r3, r2, lsr #12 + ldr r2, [r1, r2]! + teq r2, r0 + ldreq pc, [r1, #4] + ldr r2, [r1, #8] + teq r2, r0 + ldreq pc, [r1, #12] + str r10, [fp, #LO_cycle_count] + bl get_addr + ldr r10, [fp, #LO_cycle_count] + mov pc, r0 + .size jump_vaddr, .-jump_vaddr + + .align 2 + +FUNCTION(verify_code_ds): + str r8, [fp, #LO_branch_target] +FUNCTION(verify_code_vm): +FUNCTION(verify_code): + /* r1 = source */ + /* r2 = target */ + /* r3 = length */ + tst r3, #4 + mov r4, #0 + add r3, r1, r3 + mov r5, #0 + ldrne r4, [r1], #4 + mov r12, #0 + ldrne r5, [r2], #4 + teq r1, r3 + beq .D3 +.D2: + ldr r7, [r1], #4 + eor r9, r4, r5 + ldr r8, [r2], #4 + orrs r9, r9, r12 + bne .D4 + ldr r4, [r1], #4 + eor r12, r7, r8 + ldr r5, [r2], #4 + cmp r1, r3 + bcc .D2 + teq r7, r8 +.D3: + teqeq r4, r5 +.D4: + ldr r8, [fp, #LO_branch_target] + moveq pc, lr +.D5: + bl get_addr + mov pc, r0 + .size verify_code, .-verify_code + .size verify_code_vm, .-verify_code_vm + + .align 2 +FUNCTION(cc_interrupt): + ldr r0, [fp, #LO_last_count] + mov r1, #0 + mov r2, #0x1fc + add r10, r0, r10 + str r1, [fp, #LO_pending_exception] + and r2, r2, r10, lsr #17 + add r3, fp, #LO_restore_candidate + str r10, [fp, #LO_cycle] /* PCSX cycles */ +@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ + ldr r4, [r2, r3] + mov r10, lr + tst r4, r4 + bne .E4 +.E1: + bl gen_interupt + mov lr, r10 + ldr r10, [fp, #LO_cycle] + ldr r0, [fp, #LO_next_interupt] + ldr r1, [fp, #LO_pending_exception] + ldr r2, [fp, #LO_stop] + str r0, [fp, #LO_last_count] + sub r10, r10, r0 + tst r2, r2 + ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + tst r1, r1 + moveq pc, lr +.E2: + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + mov pc, r0 +.E4: + /* Move 'dirty' blocks to the 'clean' list */ + lsl r5, r2, #3 + str r1, [r2, r3] +.E5: + lsrs r4, r4, #1 + mov r0, r5 + add r5, r5, #1 + blcs clean_blocks + tst r5, #31 + bne .E5 + b .E1 + .size cc_interrupt, .-cc_interrupt + + .align 2 +FUNCTION(do_interrupt): + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + add r10, r10, #2 + mov pc, r0 + .size do_interrupt, .-do_interrupt + + .align 2 +FUNCTION(fp_exception): + mov r2, #0x10000000 +.E7: + ldr r1, [fp, #LO_reg_cop0+48] /* Status */ + mov r3, #0x80000000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r1, #2 + add r2, r2, #0x2c + str r1, [fp, #LO_reg_cop0+48] /* Status */ + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + add r0, r3, #0x80 + bl get_addr_ht + mov pc, r0 + .size fp_exception, .-fp_exception + .align 2 +FUNCTION(fp_exception_ds): + mov r2, #0x90000000 /* Set high bit if delay slot */ + b .E7 + .size fp_exception_ds, .-fp_exception_ds + + .align 2 +FUNCTION(jump_syscall): + ldr r1, [fp, #LO_reg_cop0+48] /* Status */ + mov r3, #0x80000000 + str r0, [fp, #LO_reg_cop0+56] /* EPC */ + orr r1, #2 + mov r2, #0x20 + str r1, [fp, #LO_reg_cop0+48] /* Status */ + str r2, [fp, #LO_reg_cop0+52] /* Cause */ + add r0, r3, #0x80 + bl get_addr_ht + mov pc, r0 + .size jump_syscall, .-jump_syscall + .align 2 + + .align 2 +FUNCTION(jump_syscall_hle): + str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ + ldr r2, [fp, #LO_last_count] + mov r1, #0 /* in delay slot */ + add r2, r2, r10 + mov r0, #0x20 /* cause */ + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bl psxException + + /* note: psxException might do recursive recompiler call from it's HLE code, + * so be ready for this */ +pcsx_return: + ldr r1, [fp, #LO_next_interupt] + ldr r10, [fp, #LO_cycle] + ldr r0, [fp, #LO_pcaddr] + sub r10, r10, r1 + str r1, [fp, #LO_last_count] + bl get_addr_ht + mov pc, r0 + .size jump_syscall_hle, .-jump_syscall_hle + + .align 2 +FUNCTION(jump_hlecall): + ldr r2, [fp, #LO_last_count] + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + bx r1 + .size jump_hlecall, .-jump_hlecall + + .align 2 +FUNCTION(jump_intcall): + ldr r2, [fp, #LO_last_count] + str r0, [fp, #LO_pcaddr] + add r2, r2, r10 + adr lr, pcsx_return + str r2, [fp, #LO_cycle] /* PCSX cycle counter */ + b execI + .size jump_hlecall, .-jump_hlecall + + .align 2 +FUNCTION(new_dyna_leave): + ldr r0, [fp, #LO_last_count] + add r12, fp, #28 + add r10, r0, r10 + str r10, [fp, #LO_cycle] + ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} + .size new_dyna_leave, .-new_dyna_leave + + .align 2 +FUNCTION(invalidate_addr_r0): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + b invalidate_addr_call + .size invalidate_addr_r0, .-invalidate_addr_r0 + .align 2 +FUNCTION(invalidate_addr_r1): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r1 + b invalidate_addr_call + .size invalidate_addr_r1, .-invalidate_addr_r1 + .align 2 +FUNCTION(invalidate_addr_r2): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r2 + b invalidate_addr_call + .size invalidate_addr_r2, .-invalidate_addr_r2 + .align 2 +FUNCTION(invalidate_addr_r3): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r3 + b invalidate_addr_call + .size invalidate_addr_r3, .-invalidate_addr_r3 + .align 2 +FUNCTION(invalidate_addr_r4): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r4 + b invalidate_addr_call + .size invalidate_addr_r4, .-invalidate_addr_r4 + .align 2 +FUNCTION(invalidate_addr_r5): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r5 + b invalidate_addr_call + .size invalidate_addr_r5, .-invalidate_addr_r5 + .align 2 +FUNCTION(invalidate_addr_r6): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r6 + b invalidate_addr_call + .size invalidate_addr_r6, .-invalidate_addr_r6 + .align 2 +FUNCTION(invalidate_addr_r7): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r7 + b invalidate_addr_call + .size invalidate_addr_r7, .-invalidate_addr_r7 + .align 2 +FUNCTION(invalidate_addr_r8): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r8 + b invalidate_addr_call + .size invalidate_addr_r8, .-invalidate_addr_r8 + .align 2 +FUNCTION(invalidate_addr_r9): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r9 + b invalidate_addr_call + .size invalidate_addr_r9, .-invalidate_addr_r9 + .align 2 +FUNCTION(invalidate_addr_r10): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r10 + b invalidate_addr_call + .size invalidate_addr_r10, .-invalidate_addr_r10 + .align 2 +FUNCTION(invalidate_addr_r12): + stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} + mov r0, r12 + .size invalidate_addr_r12, .-invalidate_addr_r12 + .align 2 +invalidate_addr_call: + ldr r12, [fp, #LO_inv_code_start] + ldr lr, [fp, #LO_inv_code_end] + cmp r0, r12 + cmpcs lr, r0 + blcc invalidate_addr + ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} + .size invalidate_addr_call, .-invalidate_addr_call + + .align 2 +FUNCTION(new_dyna_start): + /* ip is stored to conform EABI alignment */ + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} + load_varadr fp, dynarec_local + ldr r0, [fp, #LO_pcaddr] + bl get_addr_ht + ldr r1, [fp, #LO_next_interupt] + ldr r10, [fp, #LO_cycle] + str r1, [fp, #LO_last_count] + sub r10, r10, r1 + mov pc, r0 + .size new_dyna_start, .-new_dyna_start + +/* --------------------------------------- */ + +.align 2 + +.macro pcsx_read_mem readop tab_shift + /* r0 = address, r1 = handler_tab, r2 = cycles */ + lsl r3, r0, #20 + lsr r3, #(20+\tab_shift) + ldr r12, [fp, #LO_last_count] + ldr r1, [r1, r3, lsl #2] + add r2, r2, r12 + lsls r1, #1 +.if \tab_shift == 1 + lsl r3, #1 + \readop r0, [r1, r3] +.else + \readop r0, [r1, r3, lsl #\tab_shift] +.endif + movcc pc, lr + str r2, [fp, #LO_cycle] + bx r1 +.endm + +FUNCTION(jump_handler_read8): + add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_read_mem ldrbcc, 0 + +FUNCTION(jump_handler_read16): + add r1, #0x1000/4*4 @ shift to r16 part + pcsx_read_mem ldrhcc, 1 + +FUNCTION(jump_handler_read32): + pcsx_read_mem ldrcc, 2 + + +.macro pcsx_write_mem wrtop tab_shift + /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ + lsl r12,r0, #20 + lsr r12, #(20+\tab_shift) + ldr r3, [r3, r12, lsl #2] + str r0, [fp, #LO_address] @ some handlers still need it.. + lsls r3, #1 + mov r0, r2 @ cycle return in case of direct store +.if \tab_shift == 1 + lsl r12, #1 + \wrtop r1, [r3, r12] +.else + \wrtop r1, [r3, r12, lsl #\tab_shift] +.endif + movcc pc, lr + ldr r12, [fp, #LO_last_count] + mov r0, r1 + add r2, r2, r12 + push {r2, lr} + str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] + pop {r2, r3} + str r0, [fp, #LO_last_count] + sub r0, r2, r0 + bx r3 +.endm + +FUNCTION(jump_handler_write8): + add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_write_mem strbcc, 0 + +FUNCTION(jump_handler_write16): + add r3, #0x1000/4*4 @ shift to r16 part + pcsx_write_mem strhcc, 1 + +FUNCTION(jump_handler_write32): + pcsx_write_mem strcc, 2 + +FUNCTION(jump_handler_write_h): + /* r0 = address, r1 = data, r2 = cycles, r3 = handler */ + ldr r12, [fp, #LO_last_count] + str r0, [fp, #LO_address] @ some handlers still need it.. + add r2, r2, r12 + mov r0, r1 + push {r2, lr} + str r2, [fp, #LO_cycle] + blx r3 + + ldr r0, [fp, #LO_next_interupt] + pop {r2, r3} + str r0, [fp, #LO_last_count] + sub r0, r2, r0 + bx r3 + +FUNCTION(jump_handle_swl): + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #LO_mem_wtab] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + mov r0, r2 + tst r3, #2 + beq 101f + tst r3, #1 + beq 2f +3: + str r1, [r3, #-3] + bx lr +2: + lsr r2, r1, #8 + lsr r1, #24 + strh r2, [r3, #-2] + strb r1, [r3] + bx lr +101: + tst r3, #1 + lsrne r1, #16 @ 1 + lsreq r12, r1, #24 @ 0 + strhne r1, [r3, #-1] + strbeq r12, [r3] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +FUNCTION(jump_handle_swr): + /* r0 = address, r1 = data, r2 = cycles */ + ldr r3, [fp, #LO_mem_wtab] + mov r12,r0,lsr #12 + ldr r3, [r3, r12, lsl #2] + lsls r3, #1 + bcs 4f + add r3, r0, r3 + and r12,r3, #3 + mov r0, r2 + cmp r12,#2 + strbgt r1, [r3] @ 3 + strheq r1, [r3] @ 2 + cmp r12,#1 + strlt r1, [r3] @ 0 + bxne lr + lsr r2, r1, #8 @ 1 + strb r1, [r3] + strh r2, [r3, #1] + bx lr +4: + mov r0, r2 +@ b abort + bx lr @ TODO? + + +.macro rcntx_read_mode0 num + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart + mov r0, r2, lsl #16 + sub r0, r0, r3, lsl #16 + lsr r0, #16 + bx lr +.endm + +FUNCTION(rcnt0_read_count_m0): + rcntx_read_mode0 0 + +FUNCTION(rcnt1_read_count_m0): + rcntx_read_mode0 1 + +FUNCTION(rcnt2_read_count_m0): + rcntx_read_mode0 2 + +FUNCTION(rcnt0_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*0] @ cycleStart + mov_16 r1, 0x3334 + sub r2, r2, r3 + mul r0, r1, r2 @ /= 5 + lsr r0, #16 + bx lr + +FUNCTION(rcnt1_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*1] + mov_24 r1, 0x1e6cde + sub r2, r2, r3 + umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd + bx lr + +FUNCTION(rcnt2_read_count_m1): + /* r0 = address, r2 = cycles */ + ldr r3, [fp, #LO_rcnts+6*4+7*4*2] + mov r0, r2, lsl #16-3 + sub r0, r0, r3, lsl #16-3 + lsr r0, #16 @ /= 8 + bx lr + +@ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/arm/linkage_offsets.h b/libpcsxcore/new_dynarec/arm/linkage_offsets.h new file mode 100644 index 0000000..f7e1911 --- /dev/null +++ b/libpcsxcore/new_dynarec/arm/linkage_offsets.h @@ -0,0 +1,41 @@ + +#define LO_next_interupt 64 +#define LO_cycle_count (LO_next_interupt + 4) +#define LO_last_count (LO_cycle_count + 4) +#define LO_pending_exception (LO_last_count + 4) +#define LO_stop (LO_pending_exception + 4) +#define LO_invc_ptr (LO_stop + 4) +#define LO_address (LO_invc_ptr + 4) +#define LO_psxRegs (LO_address + 4) +#define LO_reg (LO_psxRegs) +#define LO_lo (LO_reg + 128) +#define LO_hi (LO_lo + 4) +#define LO_reg_cop0 (LO_hi + 4) +#define LO_reg_cop2d (LO_reg_cop0 + 128) +#define LO_reg_cop2c (LO_reg_cop2d + 128) +#define LO_PC (LO_reg_cop2c + 128) +#define LO_pcaddr (LO_PC) +#define LO_code (LO_PC + 4) +#define LO_cycle (LO_code + 4) +#define LO_interrupt (LO_cycle + 4) +#define LO_intCycle (LO_interrupt + 4) +#define LO_psxRegs_end (LO_intCycle + 256) +#define LO_rcnts (LO_psxRegs_end) +#define LO_rcnts_end (LO_rcnts + 7*4*4) +#define LO_mem_rtab (LO_rcnts_end) +#define LO_mem_wtab (LO_mem_rtab + 4) +#define LO_psxH_ptr (LO_mem_wtab + 4) +#define LO_zeromem_ptr (LO_psxH_ptr + 4) +#define LO_inv_code_start (LO_zeromem_ptr + 4) +#define LO_inv_code_end (LO_inv_code_start + 4) +#define LO_branch_target (LO_inv_code_end + 4) +#define LO_scratch_buf_ptr (LO_branch_target + 4) +#define LO_align0 (LO_scratch_buf_ptr + 4) +#define LO_mini_ht (LO_align0 + 12) +#define LO_restore_candidate (LO_mini_ht + 256) +#define LO_dynarec_local_size (LO_restore_candidate + 512) + +#define LO_FCR0 (LO_align0) +#define LO_FCR31 (LO_align0) + +#define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c deleted file mode 100644 index 21640f8..0000000 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ /dev/null @@ -1,4143 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus/PCSX - assem_arm.c * - * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include "../gte.h" -#define FLAGLESS -#include "../gte.h" -#undef FLAGLESS -#include "../gte_arm.h" -#include "../gte_neon.h" -#include "pcnt.h" -#include "arm_features.h" - -#if defined(BASE_ADDR_FIXED) -#elif defined(BASE_ADDR_DYNAMIC) -char *translation_cache; -#else -char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); -#endif - -#ifndef __MACH__ -#define CALLER_SAVE_REGS 0x100f -#else -#define CALLER_SAVE_REGS 0x120f -#endif - -#define unused __attribute__((unused)) - -extern int cycle_count; -extern int last_count; -extern int pcaddr; -extern int pending_exception; -extern int branch_target; -extern uint64_t readmem_dword; -extern void *dynarec_local; -extern u_int mini_ht[32][2]; - -void indirect_jump_indexed(); -void indirect_jump(); -void do_interrupt(); -void jump_vaddr_r0(); -void jump_vaddr_r1(); -void jump_vaddr_r2(); -void jump_vaddr_r3(); -void jump_vaddr_r4(); -void jump_vaddr_r5(); -void jump_vaddr_r6(); -void jump_vaddr_r7(); -void jump_vaddr_r8(); -void jump_vaddr_r9(); -void jump_vaddr_r10(); -void jump_vaddr_r12(); - -const u_int jump_vaddr_reg[16] = { - (int)jump_vaddr_r0, - (int)jump_vaddr_r1, - (int)jump_vaddr_r2, - (int)jump_vaddr_r3, - (int)jump_vaddr_r4, - (int)jump_vaddr_r5, - (int)jump_vaddr_r6, - (int)jump_vaddr_r7, - (int)jump_vaddr_r8, - (int)jump_vaddr_r9, - (int)jump_vaddr_r10, - 0, - (int)jump_vaddr_r12, - 0, - 0, - 0}; - -void invalidate_addr_r0(); -void invalidate_addr_r1(); -void invalidate_addr_r2(); -void invalidate_addr_r3(); -void invalidate_addr_r4(); -void invalidate_addr_r5(); -void invalidate_addr_r6(); -void invalidate_addr_r7(); -void invalidate_addr_r8(); -void invalidate_addr_r9(); -void invalidate_addr_r10(); -void invalidate_addr_r12(); - -const u_int invalidate_addr_reg[16] = { - (int)invalidate_addr_r0, - (int)invalidate_addr_r1, - (int)invalidate_addr_r2, - (int)invalidate_addr_r3, - (int)invalidate_addr_r4, - (int)invalidate_addr_r5, - (int)invalidate_addr_r6, - (int)invalidate_addr_r7, - (int)invalidate_addr_r8, - (int)invalidate_addr_r9, - (int)invalidate_addr_r10, - 0, - (int)invalidate_addr_r12, - 0, - 0, - 0}; - -static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; - -/* Linker */ - -static void set_jump_target(int addr,u_int target) -{ - u_char *ptr=(u_char *)addr; - u_int *ptr2=(u_int *)ptr; - if(ptr[3]==0xe2) { - assert((target-(u_int)ptr2-8)<1024); - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; - //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2); - } - else if(ptr[3]==0x72) { - // generated by emit_jno_unlikely - if((target-(u_int)ptr2-8)<1024) { - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00; - } - else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) { - assert((addr&3)==0); - assert((target&3)==0); - *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00; - } - else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } - else { - assert((ptr[3]&0x0e)==0xa); - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } -} - -// This optionally copies the instruction from the target of the branch into -// the space before the branch. Works, but the difference in speed is -// usually insignificant. -#if 0 -static void set_jump_target_fillslot(int addr,u_int target,int copy) -{ - u_char *ptr=(u_char *)addr; - u_int *ptr2=(u_int *)ptr; - assert(!copy||ptr2[-1]==0xe28dd000); - if(ptr[3]==0xe2) { - assert(!copy); - assert((target-(u_int)ptr2-8)<4096); - *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8); - } - else { - assert((ptr[3]&0x0e)==0xa); - u_int target_insn=*(u_int *)target; - if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags - copy=0; - } - if((target_insn&0x0c100000)==0x04100000) { // Load - copy=0; - } - if(target_insn&0x08000000) { - copy=0; - } - if(copy) { - ptr2[-1]=target_insn; - target+=4; - } - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } -} -#endif - -/* Literal pool */ -static void add_literal(int addr,int val) -{ - assert(literalcount>6)+8; -} - -// Find the "clean" entry point from a "dirty" entry point -// by skipping past the call to verify_code -static u_int get_clean_addr(int addr) -{ - int *ptr=(int *)addr; - #ifndef HAVE_ARMV7 - ptr+=4; - #else - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - ptr++; - if((*ptr&0xFF000000)==0xea000000) { - return (int)ptr+((*ptr<<8)>>6)+8; // follow jump - } - return (u_int)ptr; -} - -static int verify_dirty(u_int *ptr) -{ - #ifndef HAVE_ARMV7 - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - //printf("verify_dirty: %x %x %x\n",source,copy,len); - return !memcmp((void *)source,(void *)copy,len); -} - -// This doesn't necessarily find all clean entry points, just -// guarantees that it's not dirty -static int isclean(int addr) -{ - #ifndef HAVE_ARMV7 - u_int *ptr=((u_int *)addr)+4; - #else - u_int *ptr=((u_int *)addr)+6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0; - if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0; - return 1; -} - -// get source that block at addr was compiled from (host pointers) -static void get_bounds(int addr,u_int *start,u_int *end) -{ - u_int *ptr=(u_int *)addr; - #ifndef HAVE_ARMV7 - // get from literal pool - assert((*ptr&0xFFFF0000)==0xe59f0000); - u_int offset=*ptr&0xfff; - u_int *l_ptr=(void *)ptr+offset+8; - u_int source=l_ptr[0]; - //u_int copy=l_ptr[1]; - u_int len=l_ptr[2]; - ptr+=4; - #else - // ARMv7 movw/movt - assert((*ptr&0xFFF00000)==0xe3000000); - u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000); - //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000); - u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000); - ptr+=6; - #endif - if((*ptr&0xFF000000)!=0xeb000000) ptr++; - assert((*ptr&0xFF000000)==0xeb000000); // bl instruction - *start=source; - *end=source+len; -} - -/* Register allocation */ - -// Note: registers are allocated clean (unmodified state) -// if you intend to modify the register, you must call dirty_reg(). -static void alloc_reg(struct regstat *cur,int i,signed char reg) -{ - int r,hr; - int preferred_reg = (reg&7); - if(reg==CCREG) preferred_reg=HOST_CCREG; - if(reg==PTEMP||reg==FTEMP) preferred_reg=12; - - // Don't allocate unused registers - if((cur->u>>reg)&1) return; - - // see if it's already allocated - for(hr=0;hrregmap[hr]==reg) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { - cur->regmap[preferred_reg]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<uu>>reg)&1) return; - - // see if the upper half is already allocated - for(hr=0;hrregmap[hr]==reg+64) return; - } - - // Keep the same mapping if the register was already allocated in a loop - preferred_reg = loop_reg(i,reg,preferred_reg); - - // Try to allocate the preferred register - if(cur->regmap[preferred_reg]==-1) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]; - if(r<64&&((cur->u>>r)&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=64&&((cur->uu>>(r&63))&1)) { - cur->regmap[preferred_reg]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;} - } - else - { - if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;} - } - } - } - // Try to allocate any available register, but prefer - // registers that have not been used recently. - if(i>0) { - for(hr=0;hrregmap[hr]==-1) { - if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==-1) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]); - //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - // Alloc preferred register if available - if(hsn[r=cur->regmap[preferred_reg]&63]==j) { - for(hr=0;hrregmap[hr]&63)==r) { - cur->regmap[hr]=-1; - cur->dirty&=~(1<isconst&=~(1<regmap[preferred_reg]=reg|64; - return; - } - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg|64; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==reg) return; - } - - // Try to allocate any available register - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;hr--) - { - r=cur->regmap[hr]; - if(r>=0) { - if(r<64) { - if((cur->u>>r)&1) { - if(i==0||((unneeded_reg[i-1]>>r)&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<uu>>(r&63))&1) { - if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]); - if(i>0) { - // Don't evict the cycle count at entry points, otherwise the entry - // stub will have to write it. - if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2; - if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2; - for(j=10;j>=3;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) { - for(hr=0;hr2) { - if(cur->regmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<2) { - if(cur->regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<=0;j--) - { - for(r=1;r<=MAXREG;r++) - { - if(hsn[r]==j) { - for(hr=0;hrregmap[hr]==r+64) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[hr]==r) { - cur->regmap[hr]=reg; - cur->dirty&=~(1<isconst&=~(1<regmap[n]==reg) { - dirty=(cur->dirty>>n)&1; - cur->regmap[n]=-1; - } - } - - cur->regmap[hr]=reg; - cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<0) - { - if(imm<256) { - *encoded=((i&30)<<7)|imm; - return 1; - } - imm=(imm>>2)|(imm<<30);i-=2; - } - return 0; -} - -static void genimm_checked(u_int imm,u_int *encoded) -{ - u_int ret=genimm(imm,encoded); - assert(ret); - (void)ret; -} - -static u_int genjmp(u_int addr) -{ - int offset=addr-(int)out-8; - if(offset<-33554432||offset>=33554432) { - if (addr>2) { - SysPrintf("genjmp: out of range: %08x\n", offset); - exit(1); - } - return 0; - } - return ((u_int)offset>>2)&0xffffff; -} - -static void emit_mov(int rs,int rt) -{ - assem_debug("mov %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_movs(int rs,int rt) -{ - assem_debug("movs %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_add(int rs1,int rs2,int rt) -{ - assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_adds(int rs1,int rs2,int rt) -{ - assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_adcs(int rs1,int rs2,int rt) -{ - assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_sbc(int rs1,int rs2,int rt) -{ - assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_sbcs(int rs1,int rs2,int rt) -{ - assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_neg(int rs, int rt) -{ - assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0xe2600000|rd_rn_rm(rt,rs,0)); -} - -static void emit_negs(int rs, int rt) -{ - assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0xe2700000|rd_rn_rm(rt,rs,0)); -} - -static void emit_sub(int rs1,int rs2,int rt) -{ - assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_subs(int rs1,int rs2,int rt) -{ - assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_zeroreg(int rt) -{ - assem_debug("mov %s,#0\n",regname[rt]); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)); -} - -static void emit_loadlp(u_int imm,u_int rt) -{ - add_literal((int)out,imm); - assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm); - output_w32(0xe5900000|rd_rn_rm(rt,15,0)); -} - -static void emit_movw(u_int imm,u_int rt) -{ - assert(imm<65536); - assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm); - output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000)); -} - -static void emit_movt(u_int imm,u_int rt) -{ - assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000); - output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000)); -} - -static void emit_movimm(u_int imm,u_int rt) -{ - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("mov %s,#%d\n",regname[rt],imm); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); - }else if(genimm(~imm,&armval)) { - assem_debug("mvn %s,#%d\n",regname[rt],imm); - output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); - }else if(imm<65536) { - #ifndef HAVE_ARMV7 - assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00); - output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - #else - emit_movw(imm,rt); - #endif - }else{ - #ifndef HAVE_ARMV7 - emit_loadlp(imm,rt); - #else - emit_movw(imm&0x0000FFFF,rt); - emit_movt(imm&0xFFFF0000,rt); - #endif - } -} - -static void emit_pcreladdr(u_int rt) -{ - assem_debug("add %s,pc,#?\n",regname[rt]); - output_w32(0xe2800000|rd_rn_rm(rt,15,0)); -} - -static void emit_loadreg(int r, int hr) -{ - if(r&64) { - SysPrintf("64bit load in 32bit mode!\n"); - assert(0); - return; - } - if((r&63)==0) - emit_zeroreg(hr); - else { - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; - if(r==CSREG) addr=(int)&Status; - if(r==FSREG) addr=(int)&FCR31; - if(r==INVCP) addr=(int)&invc_ptr; - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[hr],offset); - output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset); - } -} - -static void emit_storereg(int r, int hr) -{ - if(r&64) { - SysPrintf("64bit store in 32bit mode!\n"); - assert(0); - return; - } - int addr=((int)reg)+((r&63)<>4); - if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4); - if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4); - if(r==CCREG) addr=(int)&cycle_count; - if(r==FSREG) addr=(int)&FCR31; - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[hr],offset); - output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset); -} - -static void emit_test(int rs, int rt) -{ - assem_debug("tst %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1100000|rd_rn_rm(0,rs,rt)); -} - -static void emit_testimm(int rs,int imm) -{ - u_int armval; - assem_debug("tst %s,#%d\n",regname[rs],imm); - genimm_checked(imm,&armval); - output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); -} - -static void emit_testeqimm(int rs,int imm) -{ - u_int armval; - assem_debug("tsteq %s,$%d\n",regname[rs],imm); - genimm_checked(imm,&armval); - output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval); -} - -static void emit_not(int rs,int rt) -{ - assem_debug("mvn %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe1e00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_mvnmi(int rs,int rt) -{ - assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]); - output_w32(0x41e00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_and(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_or(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_or_and_set_flags(int rs1,int rs2,int rt) -{ - assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(imm<32); - assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7)); -} - -static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(imm<32); - assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm); - output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7)); -} - -static void emit_xor(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_addimm(u_int rs,int imm,u_int rt) -{ - assert(rs<16); - assert(rt<16); - if(imm!=0) { - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm); - output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); - #ifdef HAVE_ARMV7 - }else if(rt!=rs&&(u_int)imm<65536) { - emit_movw(imm&0x0000ffff,rt); - emit_add(rs,rt,rt); - }else if(rt!=rs&&(u_int)-imm<65536) { - emit_movw(-imm&0x0000ffff,rt); - emit_sub(rs,rt,rt); - #endif - }else if((u_int)-imm<65536) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else { - do { - int shift = (ffs(imm) - 1) & ~1; - int imm8 = imm & (0xff << shift); - genimm_checked(imm8,&armval); - assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - rs = rt; - imm &= ~imm8; - } - while (imm != 0); - } - } - else if(rs!=rt) emit_mov(rs,rt); -} - -static void emit_addimm_and_set_flags(int imm,int rt) -{ - assert(imm>-65536&&imm<65536); - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm); - output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval); - }else if(imm<0) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00); - assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); - output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8)); - output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); - }else{ - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00); - assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8)); - output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_addimm_no_flags(u_int imm,u_int rt) -{ - emit_addimm(rt,imm,rt); -} - -static void emit_addnop(u_int r) -{ - assert(r<16); - assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]); - output_w32(0xe2800000|rd_rn_rm(r,r,0)); -} - -static void emit_adcimm(u_int rs,int imm,u_int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_rscimm(int rs,int imm,u_int rt) -{ - assert(0); - u_int armval; - genimm_checked(imm,&armval); - assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl) -{ - // TODO: if(genimm(imm,&armval)) ... - // else - emit_movimm(imm,HOST_TEMPREG); - emit_adds(HOST_TEMPREG,rsl,rtl); - emit_adcimm(rsh,0,rth); -} - -static void emit_andimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - emit_zeroreg(rt); - }else if(genimm(imm,&armval)) { - assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval); - }else if(genimm(~imm,&armval)) { - assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval); - }else if(imm==65535) { - #ifndef HAVE_ARMV6 - assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]); - output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF); - assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]); - output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF); - #else - assem_debug("uxth %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs)); - #endif - }else{ - assert(imm>0&&imm<65535); - #ifndef HAVE_ARMV7 - assem_debug("mov r14,#%d\n",imm&0xFF00); - output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8)); - assem_debug("add r14,r14,#%d\n",imm&0xFF); - output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0)); - #else - emit_movw(imm,HOST_TEMPREG); - #endif - assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]); - output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG)); - } -} - -static void emit_orimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - if(rs!=rt) emit_mov(rs,rt); - }else if(genimm(imm,&armval)) { - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval); - }else{ - assert(imm>0&&imm<65536); - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); - assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); - output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); - output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_xorimm(int rs,int imm,int rt) -{ - u_int armval; - if(imm==0) { - if(rs!=rt) emit_mov(rs,rt); - }else if(genimm(imm,&armval)) { - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval); - }else{ - assert(imm>0&&imm<65536); - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); - assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF); - output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8)); - output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0)); - } -} - -static void emit_shlimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static void emit_lsls_imm(int rs,int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static unused void emit_lslpls_imm(int rs,int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); -} - -static void emit_shrimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_sarimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7)); -} - -static void emit_rorimm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7)); -} - -static void emit_shldimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - -static void emit_shrdimm(int rs,int rs2,u_int imm,int rt) -{ - assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm); - assert(imm>0); - assert(imm<32); - //if(imm==1) ... - assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7)); - assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7)); -} - -static void emit_signextend16(int rs,int rt) -{ - #ifndef HAVE_ARMV6 - emit_shlimm(rs,16,rt); - emit_sarimm(rt,16,rt); - #else - assem_debug("sxth %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs)); - #endif -} - -static void emit_signextend8(int rs,int rt) -{ - #ifndef HAVE_ARMV6 - emit_shlimm(rs,24,rt); - emit_sarimm(rt,24,rt); - #else - assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); - #endif -} - -static void emit_shl(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - //if(imm==1) ... - assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8)); -} - -static void emit_shr(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8)); -} - -static void emit_sar(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]); - output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); -} - -static void emit_orrshl(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); -} - -static void emit_orrshr(u_int rs,u_int shift,u_int rt) -{ - assert(rs<16); - assert(rt<16); - assert(shift<16); - assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]); - output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8)); -} - -static void emit_cmpimm(int rs,int imm) -{ - u_int armval; - if(genimm(imm,&armval)) { - assem_debug("cmp %s,#%d\n",regname[rs],imm); - output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval); - }else if(genimm(-imm,&armval)) { - assem_debug("cmn %s,#%d\n",regname[rs],imm); - output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); - }else if(imm>0) { - assert(imm<65536); - emit_movimm(imm,HOST_TEMPREG); - assem_debug("cmp %s,r14\n",regname[rs]); - output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG)); - }else{ - assert(imm>-65536); - emit_movimm(-imm,HOST_TEMPREG); - assem_debug("cmn %s,r14\n",regname[rs]); - output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG)); - } -} - -static void emit_cmovne_imm(int imm,int rt) -{ - assem_debug("movne %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovl_imm(int imm,int rt) -{ - assem_debug("movlt %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovb_imm(int imm,int rt) -{ - assem_debug("movcc %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmovs_imm(int imm,int rt) -{ - assem_debug("movmi %s,#%d\n",regname[rt],imm); - u_int armval; - genimm_checked(imm,&armval); - output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); -} - -static void emit_cmove_reg(int rs,int rt) -{ - assem_debug("moveq %s,%s\n",regname[rt],regname[rs]); - output_w32(0x01a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovne_reg(int rs,int rt) -{ - assem_debug("movne %s,%s\n",regname[rt],regname[rs]); - output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovl_reg(int rs,int rt) -{ - assem_debug("movlt %s,%s\n",regname[rt],regname[rs]); - output_w32(0xb1a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_cmovs_reg(int rs,int rt) -{ - assem_debug("movmi %s,%s\n",regname[rt],regname[rs]); - output_w32(0x41a00000|rd_rn_rm(rt,0,rs)); -} - -static void emit_slti32(int rs,int imm,int rt) -{ - if(rs!=rt) emit_zeroreg(rt); - emit_cmpimm(rs,imm); - if(rs==rt) emit_movimm(0,rt); - emit_cmovl_imm(1,rt); -} - -static void emit_sltiu32(int rs,int imm,int rt) -{ - if(rs!=rt) emit_zeroreg(rt); - emit_cmpimm(rs,imm); - if(rs==rt) emit_movimm(0,rt); - emit_cmovb_imm(1,rt); -} - -static void emit_slti64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_slti32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - emit_cmovs_imm(1,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(0,rt); - emit_cmovl_imm(1,rt); - } -} - -static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt) -{ - assert(rsh!=rt); - emit_sltiu32(rsl,imm,rt); - if(imm>=0) - { - emit_test(rsh,rsh); - emit_cmovne_imm(0,rt); - } - else - { - emit_cmpimm(rsh,-1); - emit_cmovne_imm(1,rt); - } -} - -static void emit_cmp(int rs,int rt) -{ - assem_debug("cmp %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1500000|rd_rn_rm(0,rs,rt)); -} - -static void emit_set_gz32(int rs, int rt) -{ - //assem_debug("set_gz32\n"); - emit_cmpimm(rs,1); - emit_movimm(1,rt); - emit_cmovl_imm(0,rt); -} - -static void emit_set_nz32(int rs, int rt) -{ - //assem_debug("set_nz32\n"); - if(rs!=rt) emit_movs(rs,rt); - else emit_test(rs,rs); - emit_cmovne_imm(1,rt); -} - -static void emit_set_gz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_gz64\n"); - emit_set_gz32(rsl,rt); - emit_test(rsh,rsh); - emit_cmovne_imm(1,rt); - emit_cmovs_imm(0,rt); -} - -static void emit_set_nz64_32(int rsh, int rsl, int rt) -{ - //assem_debug("set_nz64\n"); - emit_or_and_set_flags(rsh,rsl,rt); - emit_cmovne_imm(1,rt); -} - -static void emit_set_if_less32(int rs1, int rs2, int rt) -{ - //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); - if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); - emit_cmp(rs1,rs2); - if(rs1==rt||rs2==rt) emit_movimm(0,rt); - emit_cmovl_imm(1,rt); -} - -static void emit_set_if_carry32(int rs1, int rs2, int rt) -{ - //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]); - if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt); - emit_cmp(rs1,rs2); - if(rs1==rt||rs2==rt) emit_movimm(0,rt); - emit_cmovb_imm(1,rt); -} - -static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovl_imm(1,rt); -} - -static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt) -{ - //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]); - assert(u1!=rt); - assert(u2!=rt); - emit_cmp(l1,l2); - emit_movimm(0,rt); - emit_sbcs(u1,u2,HOST_TEMPREG); - emit_cmovb_imm(1,rt); -} - -static void emit_call(int a) -{ - assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8); - u_int offset=genjmp(a); - output_w32(0xeb000000|offset); -} - -static void emit_jmp(int a) -{ - assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8); - u_int offset=genjmp(a); - output_w32(0xea000000|offset); -} - -static void emit_jne(int a) -{ - assem_debug("bne %x\n",a); - u_int offset=genjmp(a); - output_w32(0x1a000000|offset); -} - -static void emit_jeq(int a) -{ - assem_debug("beq %x\n",a); - u_int offset=genjmp(a); - output_w32(0x0a000000|offset); -} - -static void emit_js(int a) -{ - assem_debug("bmi %x\n",a); - u_int offset=genjmp(a); - output_w32(0x4a000000|offset); -} - -static void emit_jns(int a) -{ - assem_debug("bpl %x\n",a); - u_int offset=genjmp(a); - output_w32(0x5a000000|offset); -} - -static void emit_jl(int a) -{ - assem_debug("blt %x\n",a); - u_int offset=genjmp(a); - output_w32(0xba000000|offset); -} - -static void emit_jge(int a) -{ - assem_debug("bge %x\n",a); - u_int offset=genjmp(a); - output_w32(0xaa000000|offset); -} - -static void emit_jno(int a) -{ - assem_debug("bvc %x\n",a); - u_int offset=genjmp(a); - output_w32(0x7a000000|offset); -} - -static void emit_jc(int a) -{ - assem_debug("bcs %x\n",a); - u_int offset=genjmp(a); - output_w32(0x2a000000|offset); -} - -static void emit_jcc(int a) -{ - assem_debug("bcc %x\n",a); - u_int offset=genjmp(a); - output_w32(0x3a000000|offset); -} - -static void emit_callreg(u_int r) -{ - assert(r<15); - assem_debug("blx %s\n",regname[r]); - output_w32(0xe12fff30|r); -} - -static void emit_jmpreg(u_int r) -{ - assem_debug("mov pc,%s\n",regname[r]); - output_w32(0xe1a00000|rd_rn_rm(15,0,r)); -} - -static void emit_readword_indexed(int offset, int rs, int rt) -{ - assert(offset>-4096&&offset<4096); - assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_readword_dualindexedx4(int rs1, int rs2, int rt) -{ - assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_readword_indexed(addr, rs, rt); - else { - assert(addr==0); - emit_readword_dualindexedx4(rs, map, rt); - } -} - -static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl) -{ - if(map<0) { - if(rh>=0) emit_readword_indexed(addr, rs, rh); - emit_readword_indexed(addr+4, rs, rl); - }else{ - assert(rh!=rs); - if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh); - emit_addimm(map,1,map); - emit_readword_indexed_tlb(addr, rs, map, rl); - } -} - -static void emit_movsbl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movsbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_shlimm(map,2,map); - assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe19000d0|rd_rn_rm(rt,rs,map)); - }else{ - assert(addr>-256&&addr<256); - assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]); - output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7)); - emit_movsbl_indexed(addr, rt, rt); - } - } -} - -static void emit_movswl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_movzbl_indexed(int offset, int rs, int rt) -{ - assert(offset>-4096&&offset<4096); - assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt) -{ - assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt) -{ - if(map<0) emit_movzbl_indexed(addr, rs, rt); - else { - if(addr==0) { - emit_movzbl_dualindexedx4(rs, map, rt); - }else{ - emit_addimm(rs,addr,rt); - emit_movzbl_dualindexedx4(rt, map, rt); - } - } -} - -static void emit_movzwl_indexed(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_ldrd(int offset, int rs, int rt) -{ - assert(offset>-256&&offset<256); - assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_readword(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldr %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_movsbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movswl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_movzbl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("ldrb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_movzwl(int addr, int rt) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("ldrh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static void emit_writeword_indexed(int rt, int offset, int rs) -{ - assert(offset>-4096&&offset<4096); - assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2) -{ - assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writeword_indexed(rt, addr, rs); - else { - assert(addr==0); - emit_writeword_dualindexedx4(rt, rs, map); - } -} - -static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp) -{ - if(map<0) { - if(rh>=0) emit_writeword_indexed(rh, addr, rs); - emit_writeword_indexed(rl, addr+4, rs); - }else{ - assert(rh>=0); - if(temp!=rs) emit_addimm(map,1,temp); - emit_writeword_indexed_tlb(rh, addr, rs, map, temp); - if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp); - else { - emit_addimm(rs,4,rs); - emit_writeword_indexed_tlb(rl, addr, rs, map, temp); - } - } -} - -static void emit_writehword_indexed(int rt, int offset, int rs) -{ - assert(offset>-256&&offset<256); - assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf)); - }else{ - output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf)); - } -} - -static void emit_writebyte_indexed(int rt, int offset, int rs) -{ - assert(offset>-4096&&offset<4096); - assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset); - if(offset>=0) { - output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset); - }else{ - output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset)); - } -} - -static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2) -{ - assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100); -} - -static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) -{ - if(map<0) emit_writebyte_indexed(rt, addr, rs); - else { - if(addr==0) { - emit_writebyte_dualindexedx4(rt, rs, map); - }else{ - emit_addimm(rs,addr,temp); - emit_writebyte_dualindexedx4(rt, temp, map); - } - } -} - -static void emit_strcc_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_strccb_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_strcch_dualindexed(int rs1, int rs2, int rt) -{ - assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_writeword(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("str %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset); -} - -static unused void emit_writehword(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<256); - assem_debug("strh %s,fp+%d\n",regname[rt],offset); - output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf)); -} - -static unused void emit_writebyte(int rt, int addr) -{ - u_int offset = addr-(u_int)&dynarec_local; - assert(offset<4096); - assem_debug("strb %s,fp+%d\n",regname[rt],offset); - output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset); -} - -static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo) -{ - assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); -} - -static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo) -{ - assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]); - assert(rs1<16); - assert(rs2<16); - assert(hi<16); - assert(lo<16); - output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1); -} - -static void emit_clz(int rs,int rt) -{ - assem_debug("clz %s,%s\n",regname[rt],regname[rs]); - output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs)); -} - -static void emit_subcs(int rs1,int rs2,int rt) -{ - assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_shrcc_imm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_shrne_imm(int rs,u_int imm,int rt) -{ - assert(imm>0); - assert(imm<32); - assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7)); -} - -static void emit_negmi(int rs, int rt) -{ - assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0x42600000|rd_rn_rm(rt,rs,0)); -} - -static void emit_negsmi(int rs, int rt) -{ - assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]); - output_w32(0x42700000|rd_rn_rm(rt,rs,0)); -} - -static void emit_orreq(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_orrne(u_int rs1,u_int rs2,u_int rt) -{ - assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); - output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2)); -} - -static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8)); -} - -static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) -{ - assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]); - output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8)); -} - -static void emit_teq(int rs, int rt) -{ - assem_debug("teq %s,%s\n",regname[rs],regname[rt]); - output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); -} - -static void emit_rsbimm(int rs, int imm, int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval); -} - -// Load 2 immediates optimizing for small code size -static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2) -{ - emit_movimm(imm1,rt1); - u_int armval; - if(genimm(imm2-imm1,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1); - output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval); - }else if(genimm(imm1-imm2,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2); - output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval); - } - else emit_movimm(imm2,rt2); -} - -// Conditionally select one of two immediates, optimizing for small code size -// This will only be called if HAVE_CMOV_IMM is defined -static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) -{ - u_int armval; - if(genimm(imm2-imm1,&armval)) { - emit_movimm(imm1,rt); - assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1); - output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval); - }else if(genimm(imm1-imm2,&armval)) { - emit_movimm(imm1,rt); - assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2); - output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval); - } - else { - #ifndef HAVE_ARMV7 - emit_movimm(imm1,rt); - add_literal((int)out,imm2); - assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2); - output_w32(0x15900000|rd_rn_rm(rt,15,0)); - #else - emit_movw(imm1&0x0000FFFF,rt); - if((imm1&0xFFFF)!=(imm2&0xFFFF)) { - assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF); - output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000)); - } - emit_movt(imm1&0xFFFF0000,rt); - if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) { - assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000); - output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000)); - } - #endif - } -} - -// special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) -{ - assert(imm<128&&imm>=0); - assert(r>=0&&r<16); - assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); - output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); - emit_cmpimm(HOST_TEMPREG,imm); -} - -static void emit_callne(int a) -{ - assem_debug("blne %x\n",a); - u_int offset=genjmp(a); - output_w32(0x1b000000|offset); -} - -// Used to preload hash table entries -static unused void emit_prefetchreg(int r) -{ - assem_debug("pld %s\n",regname[r]); - output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); -} - -// Special case for mini_ht -static void emit_ldreq_indexed(int rs, u_int offset, int rt) -{ - assert(offset<4096); - assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset); - output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset); -} - -static unused void emit_bicne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_biccs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bicvc_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_bichi_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_orrvs_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_orrne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_andne_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); -} - -static unused void emit_addpl_imm(int rs,int imm,int rt) -{ - u_int armval; - genimm_checked(imm,&armval); - assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); - output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); -} - -static void emit_jno_unlikely(int a) -{ - //emit_jno(a); - assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a); - output_w32(0x72800000|rd_rn_rm(15,15,0)); -} - -static void save_regs_all(u_int reglist) -{ - int i; - if(!reglist) return; - assem_debug("stmia fp,{"); - for(i=0;i<16;i++) - if(reglist&(1<=BASE_ADDR&&addr<(BASE_ADDR+(1<=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000)); -//DEBUG > -#ifdef DEBUG_CYCLE_COUNT - emit_readword((int)&last_count,ECX); - emit_add(HOST_CCREG,ECX,HOST_CCREG); - emit_readword((int)&next_interupt,ECX); - emit_writeword(HOST_CCREG,(int)&Count); - emit_sub(HOST_CCREG,ECX,HOST_CCREG); - emit_writeword(ECX,(int)&last_count); -#endif -//DEBUG < - emit_jmp(linker); -} - -static void emit_extjump(int addr, int target) -{ - emit_extjump2(addr, target, (int)dyna_linker); -} - -static void emit_extjump_ds(int addr, int target) -{ - emit_extjump2(addr, target, (int)dyna_linker_ds); -} - -// put rt_val into rt, potentially making use of rs with value rs_val -static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt) -{ - u_int armval; - int diff; - if(genimm(rt_val,&armval)) { - assem_debug("mov %s,#%d\n",regname[rt],rt_val); - output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval); - return; - } - if(genimm(~rt_val,&armval)) { - assem_debug("mvn %s,#%d\n",regname[rt],rt_val); - output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); - return; - } - diff=rt_val-rs_val; - if(genimm(diff,&armval)) { - assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff); - output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval); - return; - }else if(genimm(-diff,&armval)) { - assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff); - output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); - return; - } - emit_movimm(rt_val,rt); -} - -// return 1 if above function can do it's job cheaply -static int is_similar_value(u_int v1,u_int v2) -{ - u_int xs; - int diff; - if(v1==v2) return 1; - diff=v2-v1; - for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2) - ; - if(xs<0x100) return 1; - for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2) - ; - if(xs<0x100) return 1; - return 0; -} - -// trashes r2 -static void pass_args(int a0, int a1) -{ - if(a0==1&&a1==0) { - // must swap - emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); - } - else if(a0!=0&&a1==0) { - emit_mov(a1,1); - if (a0>=0) emit_mov(a0,0); - } - else { - if(a0>=0&&a0!=0) emit_mov(a0,0); - if(a1>=0&&a1!=1) emit_mov(a1,1); - } -} - -static void mov_loadtype_adj(int type,int rs,int rt) -{ - switch(type) { - case LOADB_STUB: emit_signextend8(rs,rt); break; - case LOADBU_STUB: emit_andimm(rs,0xff,rt); break; - case LOADH_STUB: emit_signextend16(rs,rt); break; - case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break; - case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break; - default: assert(0); - } -} - -#include "pcsxmem.h" -#include "pcsxmem_inline.c" - -static void do_readstub(int n) -{ - assem_debug("do_readstub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int rt; - if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) { - rt=get_reg(i_regmap,FTEMP); - }else{ - rt=get_reg(i_regmap,rt1[i]); - } - assert(rs>=0); - int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; - reglist|=(1<=0&&rt1[i]!=0) - reglist&=~(1<=0&&rt1[i]!=0)) { - switch(type) { - case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; - case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; - case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; - case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; - case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; - } - } - if(regs_saved) { - restore_jump=(int)out; - emit_jcc(0); // jump to reg restore - } - else - emit_jcc(stubs[n][2]); // return address - - if(!regs_saved) - save_regs(reglist); - int handler=0; - if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; - if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; - if(type==LOADW_STUB) - handler=(int)jump_handler_read32; - assert(handler!=0); - pass_args(rs,temp2); - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2); - emit_call(handler); - if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { - mov_loadtype_adj(type,0,rt); - } - if(restore_jump) - set_jump_target(restore_jump,(int)out); - restore_regs(reglist); - emit_jmp(stubs[n][2]); // return address -} - -// return memhandler, or get directly accessable address and return 0 -static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) -{ - u_int l1,l2=0; - l1=((u_int *)table)[addr>>12]; - if((l1&(1<<31))==0) { - u_int v=l1<<1; - *addr_host=v+addr; - return 0; - } - else { - l1<<=1; - if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) - l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; - else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) - l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; - else - l2=((u_int *)l1)[(addr&0xfff)/4]; - if((l2&(1<<31))==0) { - u_int v=l2<<1; - *addr_host=v+(addr&0xfff); - return 0; - } - return l2<<1; - } -} - -static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) -{ - int rs=get_reg(regmap,target); - int rt=get_reg(regmap,target); - if(rs<0) rs=get_reg(regmap,-1); - assert(rs>=0); - u_int handler,host_addr=0,is_dynamic,far_call=0; - int cc=get_reg(regmap,CCREG); - if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt)) - return; - handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); - if (handler==0) { - if(rt<0||rt1[i]==0) - return; - if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); - switch(type) { - case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; - case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; - case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; - case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; - case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; - default: assert(0); - } - return; - } - is_dynamic=pcsxmem_is_handler_dynamic(addr); - if(is_dynamic) { - if(type==LOADB_STUB||type==LOADBU_STUB) - handler=(int)jump_handler_read8; - if(type==LOADH_STUB||type==LOADHU_STUB) - handler=(int)jump_handler_read16; - if(type==LOADW_STUB) - handler=(int)jump_handler_read32; - } - - // call a memhandler - if(rt>=0&&rt1[i]!=0) - reglist&=~(1<=33554432) { - // unreachable memhandler, a plugin func perhaps - emit_movimm(handler,12); - far_call=1; - } - if(cc<0) - emit_loadreg(CCREG,2); - if(is_dynamic) { - emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - } - else { - emit_readword((int)&last_count,3); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_add(2,3,2); - emit_writeword(2,(int)&Count); - } - - if(far_call) - emit_callreg(12); - else - emit_call(handler); - - if(rt>=0&&rt1[i]!=0) { - switch(type) { - case LOADB_STUB: emit_signextend8(0,rt); break; - case LOADBU_STUB: emit_andimm(0,0xff,rt); break; - case LOADH_STUB: emit_signextend16(0,rt); break; - case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; - case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; - default: assert(0); - } - } - restore_regs(reglist); -} - -static void do_writestub(int n) -{ - assem_debug("do_writestub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - int type=stubs[n][0]; - int i=stubs[n][3]; - int rs=stubs[n][4]; - struct regstat *i_regs=(struct regstat *)stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int rt,r; - if(itype[i]==C1LS||itype[i]==C2LS) { - rt=get_reg(i_regmap,r=FTEMP); - }else{ - rt=get_reg(i_regmap,r=rs2[i]); - } - assert(rs>=0); - assert(rt>=0); - int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; - int reglist2=reglist|(1<=0); - assert(rt>=0); - u_int handler,host_addr=0; - handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); - if (handler==0) { - if(addr!=host_addr) - emit_movimm_from(addr,rs,host_addr,rs); - switch(type) { - case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; - case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; - case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; - default: assert(0); - } - return; - } - - // call a memhandler - save_regs(reglist); - pass_args(rs,rt); - int cc=get_reg(regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2); - emit_movimm(handler,3); - // returns new cycle_count - emit_call((int)jump_handler_write_h); - emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc); - if(cc<0) - emit_storereg(CCREG,2); - restore_regs(reglist); -} - -static void do_unalignedwritestub(int n) -{ - assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4); - literal_pool(256); - set_jump_target(stubs[n][1],(int)out); - - int i=stubs[n][3]; - struct regstat *i_regs=(struct regstat *)stubs[n][4]; - int addr=stubs[n][5]; - u_int reglist=stubs[n][7]; - signed char *i_regmap=i_regs->regmap; - int temp2=get_reg(i_regmap,FTEMP); - int rt; - rt=get_reg(i_regmap,rs2[i]); - assert(rt>=0); - assert(addr>=0); - assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented - reglist|=(1<regmap_entry,i_regs->was32,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,EAX); // Get PC - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception); -} - -/* Special assem */ - -static void shift_assemble_arm(int i,struct regstat *i_regs) -{ - if(rt1[i]) { - if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV - { - signed char s,t,shift; - t=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(t>=0){ - if(rs1[i]==0) - { - emit_zeroreg(t); - } - else if(rs2[i]==0) - { - assert(s>=0); - if(s!=t) emit_mov(s,t); - } - else - { - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==4) // SLLV - { - emit_shl(s,HOST_TEMPREG,t); - } - if(opcode2[i]==6) // SRLV - { - emit_shr(s,HOST_TEMPREG,t); - } - if(opcode2[i]==7) // SRAV - { - emit_sar(s,HOST_TEMPREG,t); - } - } - } - } else { // DSLLV/DSRLV/DSRAV - signed char sh,sl,th,tl,shift; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - sh=get_reg(i_regs->regmap,rs1[i]|64); - sl=get_reg(i_regs->regmap,rs1[i]); - shift=get_reg(i_regs->regmap,rs2[i]); - if(tl>=0){ - if(rs1[i]==0) - { - emit_zeroreg(tl); - if(th>=0) emit_zeroreg(th); - } - else if(rs2[i]==0) - { - assert(sl>=0); - if(sl!=tl) emit_mov(sl,tl); - if(th>=0&&sh!=th) emit_mov(sh,th); - } - else - { - // FIXME: What if shift==tl ? - assert(shift!=tl); - int temp=get_reg(i_regs->regmap,-1); - int real_th=th; - if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register - assert(sl>=0); - assert(sh>=0); - emit_andimm(shift,31,HOST_TEMPREG); - if(opcode2[i]==0x14) // DSLLV - { - if(th>=0) emit_shl(sh,HOST_TEMPREG,th); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshr(sl,HOST_TEMPREG,th); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shl(sl,HOST_TEMPREG,tl); - if(th>=0) emit_cmovne_reg(tl,th); - emit_cmovne_imm(0,tl); - } - if(opcode2[i]==0x16) // DSRLV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_shr(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_imm(0,th); - } - if(opcode2[i]==0x17) // DSRAV - { - assert(th>=0); - emit_shr(sl,HOST_TEMPREG,tl); - emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG); - if(real_th>=0) { - assert(temp>=0); - emit_sarimm(th,31,temp); - } - emit_orrshl(sh,HOST_TEMPREG,tl); - emit_andimm(shift,31,HOST_TEMPREG); - emit_testimm(shift,32); - emit_sar(sh,HOST_TEMPREG,th); - emit_cmovne_reg(th,tl); - if(real_th>=0) emit_cmovne_reg(temp,th); - } - } - } - } - } -} - -static void speculate_mov(int rs,int rt) -{ - if(rt!=0) { - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); - else { - smrv_strong_next&=~(1<=0) { - if(get_final_value(hr,i,&value)) - smrv[rt1[i]]=value; - else smrv[rt1[i]]=constmap[i][hr]; - smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); - else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); - } - break; - case LOAD: - if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { - // special case for BIOS - smrv[rt1[i]]=0xa0000000; - smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); -#endif -} - -enum { - MTYPE_8000 = 0, - MTYPE_8020, - MTYPE_0000, - MTYPE_A000, - MTYPE_1F80, -}; - -static int get_ptr_mem_type(u_int a) -{ - if(a < 0x00200000) { - if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) - // return wrong, must use memhandler for BIOS self-test to pass - // 007 does similar stuff from a00 mirror, weird stuff - return MTYPE_8000; - return MTYPE_0000; - } - if(0x1f800000 <= a && a < 0x1f801000) - return MTYPE_1F80; - if(0x80200000 <= a && a < 0x80800000) - return MTYPE_8020; - if(0xa0000000 <= a && a < 0xa0200000) - return MTYPE_A000; - return MTYPE_8000; -} - -static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) -{ - int jaddr=0,type=0; - int mr=rs1[i]; - if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(smrv[mr]); - //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); - } - else { - // use the mirror we are running on - type=get_ptr_mem_type(start); - //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); - } - - if(type==MTYPE_8020) { // RAM 80200000+ mirror - emit_andimm(addr,~0x00e00000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_0000) { // RAM 0 mirror - emit_orimm(addr,0x80000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_A000) { // RAM A mirror - emit_andimm(addr,~0x20000000,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - type=0; - } - else if(type==MTYPE_1F80) { // scratchpad - if (psxH == (void *)0x1f800000) { - emit_addimm(addr,-0x1f800000,HOST_TEMPREG); - emit_cmpimm(HOST_TEMPREG,0x1000); - jaddr=(int)out; - emit_jc(0); - } - else { - // do usual RAM check, jump will go to the right handler - type=0; - } - } - - if(type==0) - { - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Hint to branch predictor that the branch is unlikely to be taken - if(rs1[i]>=28) - emit_jno_unlikely(0); - else - #endif - emit_jno(0); - if(ram_offset!=0) { - emit_addimm(addr,ram_offset,HOST_TEMPREG); - addr=*addr_reg_override=HOST_TEMPREG; - } - } - - return jaddr; -} - -#define shift_assemble shift_assemble_arm - -static void loadlr_assemble_arm(int i,struct regstat *i_regs) -{ - int s,th,tl,temp,temp2,addr,map=-1; - int offset; - int jaddr=0; - int memtarget=0,c=0; - int fastload_reg_override=0; - u_int hr,reglist=0; - th=get_reg(i_regs->regmap,rt1[i]|64); - tl=get_reg(i_regs->regmap,rt1[i]); - s=get_reg(i_regs->regmap,rs1[i]); - temp=get_reg(i_regs->regmap,-1); - temp2=get_reg(i_regs->regmap,FTEMP); - addr=get_reg(i_regs->regmap,AGEN1+(i&1)); - assert(addr<0); - offset=imm[i]; - for(hr=0;hrregmap[hr]>=0) reglist|=1<=0) { - c=(i_regs->wasconst>>s)&1; - if(c) { - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - } - } - if(!c) { - #ifdef RAM_OFFSET - map=get_reg(i_regs->regmap,ROREG); - if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG); - #endif - emit_shlimm(addr,3,temp); - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR - }else{ - emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR - } - jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); - } - else { - if(ram_offset&&memtarget) { - emit_addimm(temp2,ram_offset,HOST_TEMPREG); - fastload_reg_override=HOST_TEMPREG; - } - if (opcode[i]==0x22||opcode[i]==0x26) { - emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR - }else{ - emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR - } - } - if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR - if(!c||memtarget) { - int a=temp2; - if(fastload_reg_override) a=fastload_reg_override; - //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2); - emit_readword_indexed_tlb(0,a,map,temp2); - if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(tl>=0); - emit_andimm(temp,24,temp); -#ifdef BIG_ENDIAN_MIPS - if (opcode[i]==0x26) // LWR -#else - if (opcode[i]==0x22) // LWL -#endif - emit_xorimm(temp,24,temp); - emit_movimm(-1,HOST_TEMPREG); - if (opcode[i]==0x26) { - emit_shr(temp2,temp,temp2); - emit_bic_lsr(tl,HOST_TEMPREG,temp,tl); - }else{ - emit_shl(temp2,temp,temp2); - emit_bic_lsl(tl,HOST_TEMPREG,temp,tl); - } - emit_or(temp2,tl,tl); - } - //emit_storereg(rt1[i],tl); // DEBUG - } - if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR - // FIXME: little endian, fastload_reg_override - int temp2h=get_reg(i_regs->regmap,FTEMP|64); - if(!c||memtarget) { - //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); - //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2); - emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2); - if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist); - } - else - inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist); - if(rt1[i]) { - assert(th>=0); - assert(tl>=0); - emit_testimm(temp,32); - emit_andimm(temp,24,temp); - if (opcode[i]==0x1A) { // LDL - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shl(temp2h,temp,temp2h); - emit_orrshr(temp2,HOST_TEMPREG,temp2h); - emit_movimm(-1,HOST_TEMPREG); - emit_shl(temp2,temp,temp2); - emit_cmove_reg(temp2h,th); - emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl); - emit_bicne_lsl(th,HOST_TEMPREG,temp,th); - emit_orreq(temp2,tl,tl); - emit_orrne(temp2,th,th); - } - if (opcode[i]==0x1B) { // LDR - emit_xorimm(temp,24,temp); - emit_rsbimm(temp,32,HOST_TEMPREG); - emit_shr(temp2,temp,temp2); - emit_orrshl(temp2h,HOST_TEMPREG,temp2); - emit_movimm(-1,HOST_TEMPREG); - emit_shr(temp2h,temp,temp2h); - emit_cmovne_reg(temp2,tl); - emit_bicne_lsr(th,HOST_TEMPREG,temp,th); - emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl); - emit_orrne(temp2h,th,th); - emit_orreq(temp2h,tl,tl); - } - } - } -} -#define loadlr_assemble loadlr_assemble_arm - -static void cop0_assemble(int i,struct regstat *i_regs) -{ - if(opcode2[i]==0) // MFC0 - { - signed char t=get_reg(i_regs->regmap,rt1[i]); - char copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird - if(t>=0&&rt1[i]!=0) { - emit_readword((int)®_cop0+copr*4,t); - } - } - else if(opcode2[i]==4) // MTC0 - { - signed char s=get_reg(i_regs->regmap,rs1[i]); - char copr=(source[i]>>11)&0x1f; - assert(s>=0); - wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&last_count,HOST_TEMPREG); - emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc - emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_writeword(HOST_CCREG,(int)&Count); - } - // What a mess. The status register (12) can enable interrupts, - // so needs a special case to handle a pending interrupt. - // The interrupt must be taken immediately, because a subsequent - // instruction might disable interrupts again. - if(copr==12||copr==13) { - if (is_delayslot) { - // burn cycles to cause cc_interrupt, which will - // reschedule next_interupt. Relies on CCREG from above. - assem_debug("MTC0 DS %d\n", copr); - emit_writeword(HOST_CCREG,(int)&last_count); - emit_movimm(0,HOST_CCREG); - emit_storereg(CCREG,HOST_CCREG); - emit_loadreg(rs1[i],1); - emit_movimm(copr,0); - emit_call((int)pcsx_mtc0_ds); - emit_loadreg(rs1[i],s); - return; - } - emit_movimm(start+i*4+4,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pcaddr); - emit_movimm(0,HOST_TEMPREG); - emit_writeword(HOST_TEMPREG,(int)&pending_exception); - } - //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12); - //else - if(s==HOST_CCREG) - emit_loadreg(rs1[i],1); - else if(s!=1) - emit_mov(s,1); - emit_movimm(copr,0); - emit_call((int)pcsx_mtc0); - if(copr==9||copr==11||copr==12||copr==13) { - emit_readword((int)&Count,HOST_CCREG); - emit_readword((int)&next_interupt,HOST_TEMPREG); - emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG); - emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); - emit_writeword(HOST_TEMPREG,(int)&last_count); - emit_storereg(CCREG,HOST_CCREG); - } - if(copr==12||copr==13) { - assert(!is_delayslot); - emit_readword((int)&pending_exception,14); - emit_test(14,14); - emit_jne((int)&do_interrupt); - } - emit_loadreg(rs1[i],s); - if(get_reg(i_regs->regmap,rs1[i]|64)>=0) - emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64)); - cop1_usable=0; - } - else - { - assert(opcode2[i]==0x10); - if((source[i]&0x3f)==0x10) // RFE - { - emit_readword((int)&Status,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,(int)&Status); - } - } -} - -static void cop2_get_dreg(u_int copr,signed char tl,signed char temp) -{ - switch (copr) { - case 1: - case 3: - case 5: - case 8: - case 9: - case 10: - case 11: - emit_readword((int)®_cop2d[copr],tl); - emit_signextend16(tl,tl); - emit_writeword(tl,(int)®_cop2d[copr]); // hmh - break; - case 7: - case 16: - case 17: - case 18: - case 19: - emit_readword((int)®_cop2d[copr],tl); - emit_andimm(tl,0xffff,tl); - emit_writeword(tl,(int)®_cop2d[copr]); - break; - case 15: - emit_readword((int)®_cop2d[14],tl); // SXY2 - emit_writeword(tl,(int)®_cop2d[copr]); - break; - case 28: - case 29: - emit_readword((int)®_cop2d[9],temp); - emit_testimm(temp,0x8000); // do we need this? - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_shrimm(temp,7,tl); - emit_readword((int)®_cop2d[10],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshr_imm(temp,2,tl); - emit_readword((int)®_cop2d[11],temp); - emit_testimm(temp,0x8000); - emit_andimm(temp,0xf80,temp); - emit_andne_imm(temp,0,temp); - emit_orrshl_imm(temp,3,tl); - emit_writeword(tl,(int)®_cop2d[copr]); - break; - default: - emit_readword((int)®_cop2d[copr],tl); - break; - } -} - -static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) -{ - switch (copr) { - case 15: - emit_readword((int)®_cop2d[13],temp); // SXY1 - emit_writeword(sl,(int)®_cop2d[copr]); - emit_writeword(temp,(int)®_cop2d[12]); // SXY0 - emit_readword((int)®_cop2d[14],temp); // SXY2 - emit_writeword(sl,(int)®_cop2d[14]); - emit_writeword(temp,(int)®_cop2d[13]); // SXY1 - break; - case 28: - emit_andimm(sl,0x001f,temp); - emit_shlimm(temp,7,temp); - emit_writeword(temp,(int)®_cop2d[9]); - emit_andimm(sl,0x03e0,temp); - emit_shlimm(temp,2,temp); - emit_writeword(temp,(int)®_cop2d[10]); - emit_andimm(sl,0x7c00,temp); - emit_shrimm(temp,3,temp); - emit_writeword(temp,(int)®_cop2d[11]); - emit_writeword(sl,(int)®_cop2d[28]); - break; - case 30: - emit_movs(sl,temp); - emit_mvnmi(temp,temp); -#ifdef HAVE_ARMV5 - emit_clz(temp,temp); -#else - emit_movs(temp,HOST_TEMPREG); - emit_movimm(0,temp); - emit_jeq((int)out+4*4); - emit_addpl_imm(temp,1,temp); - emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); -#endif - emit_writeword(sl,(int)®_cop2d[30]); - emit_writeword(temp,(int)®_cop2d[31]); - break; - case 31: - break; - default: - emit_writeword(sl,(int)®_cop2d[copr]); - break; - } -} - -static void cop2_assemble(int i,struct regstat *i_regs) -{ - u_int copr=(source[i]>>11)&0x1f; - signed char temp=get_reg(i_regs->regmap,-1); - if (opcode2[i]==0) { // MFC2 - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - cop2_get_dreg(copr,tl,temp); - } - else if (opcode2[i]==4) { // MTC2 - signed char sl=get_reg(i_regs->regmap,rs1[i]); - cop2_put_dreg(copr,sl,temp); - } - else if (opcode2[i]==2) // CFC2 - { - signed char tl=get_reg(i_regs->regmap,rt1[i]); - if(tl>=0&&rt1[i]!=0) - emit_readword((int)®_cop2c[copr],tl); - } - else if (opcode2[i]==6) // CTC2 - { - signed char sl=get_reg(i_regs->regmap,rs1[i]); - switch(copr) { - case 4: - case 12: - case 20: - case 26: - case 27: - case 29: - case 30: - emit_signextend16(sl,temp); - break; - case 31: - //value = value & 0x7ffff000; - //if (value & 0x7f87e000) value |= 0x80000000; - emit_shrimm(sl,12,temp); - emit_shlimm(temp,12,temp); - emit_testimm(temp,0x7f000000); - emit_testeqimm(temp,0x00870000); - emit_testeqimm(temp,0x0000e000); - emit_orrne_imm(temp,0x80000000,temp); - break; - default: - temp=sl; - break; - } - emit_writeword(temp,(int)®_cop2c[copr]); - assert(sl>=0); - } -} - -static void c2op_prologue(u_int op,u_int reglist) -{ - save_regs_all(reglist); -#ifdef PCNT - emit_movimm(op,0); - emit_call((int)pcnt_gte_start); -#endif - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs -} - -static void c2op_epilogue(u_int op,u_int reglist) -{ -#ifdef PCNT - emit_movimm(op,0); - emit_call((int)pcnt_gte_end); -#endif - restore_regs_all(reglist); -} - -static void c2op_call_MACtoIR(int lm,int need_flags) -{ - if(need_flags) - emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0)); - else - emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf)); -} - -static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags) -{ - emit_call((int)func); - // func is C code and trashes r0 - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - if(need_flags||need_ir) - c2op_call_MACtoIR(lm,need_flags); - emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf)); -} - -static void c2op_assemble(int i,struct regstat *i_regs) -{ - u_int c2op=source[i]&0x3f; - u_int hr,reglist_full=0,reglist; - int need_flags,need_ir; - for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works - need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00; - assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n", - source[i],gte_unneeded[i+1],need_flags,need_ir); - if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS) - need_flags=0; - int shift = (source[i] >> 19) & 1; - int lm = (source[i] >> 10) & 1; - switch(c2op) { -#ifndef DRC_DBG - case GTE_MVMVA: { -#ifdef HAVE_ARMV5 - int v = (source[i] >> 15) & 3; - int cv = (source[i] >> 13) & 3; - int mx = (source[i] >> 17) & 3; - reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} - c2op_prologue(c2op,reglist); - /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ - if(v<3) - emit_ldrd(v*8,0,4); - else { - emit_movzwl_indexed(9*4,0,4); // gteIR - emit_movzwl_indexed(10*4,0,6); - emit_movzwl_indexed(11*4,0,5); - emit_orrshl_imm(6,16,4); - } - if(mx<3) - emit_addimm(0,32*4+mx*8*4,6); - else - emit_readword((int)&zeromem_ptr,6); - if(cv<3) - emit_addimm(0,32*4+(cv*8+5)*4,7); - else - emit_readword((int)&zeromem_ptr,7); -#ifdef __ARM_NEON__ - emit_movimm(source[i],1); // opcode - emit_call((int)gteMVMVA_part_neon); - if(need_flags) { - emit_movimm(lm,1); - emit_call((int)gteMACtoIR_flags_neon); - } -#else - if(cv==3&&shift) - emit_call((int)gteMVMVA_part_cv3sh12_arm); - else { - emit_movimm(shift,1); - emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm)); - } - if(need_flags||need_ir) - c2op_call_MACtoIR(lm,need_flags); -#endif -#else /* if not HAVE_ARMV5 */ - c2op_prologue(c2op,reglist); - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); -#endif - break; - } - case GTE_OP: - c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); - if(need_flags||need_ir) { - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - c2op_call_MACtoIR(lm,need_flags); - } - break; - case GTE_DPCS: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags); - break; - case GTE_INTPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags); - break; - case GTE_SQR: - c2op_prologue(c2op,reglist); - emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift)); - if(need_flags||need_ir) { - emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); - c2op_call_MACtoIR(lm,need_flags); - } - break; - case GTE_DCPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags); - break; - case GTE_GPF: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags); - break; - case GTE_GPL: - c2op_prologue(c2op,reglist); - c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags); - break; -#endif - default: - c2op_prologue(c2op,reglist); -#ifdef DRC_DBG - emit_movimm(source[i],1); // opcode - emit_writeword(1,(int)&psxRegs.code); -#endif - emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); - break; - } - c2op_epilogue(c2op,reglist); - } -} - -static void cop1_unusable(int i,struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - if(!cop1_usable) { - int jaddr=(int)out; - emit_jmp(0); - add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0); - cop1_usable=1; - } -} - -static void cop1_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void fconv_assemble_arm(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} -#define fconv_assemble fconv_assemble_arm - -static void fcomp_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void float_assemble(int i,struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); -} - -static void multdiv_assemble_arm(int i,struct regstat *i_regs) -{ - // case 0x18: MULT - // case 0x19: MULTU - // case 0x1A: DIV - // case 0x1B: DIVU - // case 0x1C: DMULT - // case 0x1D: DMULTU - // case 0x1E: DDIV - // case 0x1F: DDIVU - if(rs1[i]&&rs2[i]) - { - if((opcode2[i]&4)==0) // 32-bit - { - if(opcode2[i]==0x18) // MULT - { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); - signed char hi=get_reg(i_regs->regmap,HIREG); - signed char lo=get_reg(i_regs->regmap,LOREG); - assert(m1>=0); - assert(m2>=0); - assert(hi>=0); - assert(lo>=0); - emit_smull(m1,m2,hi,lo); - } - if(opcode2[i]==0x19) // MULTU - { - signed char m1=get_reg(i_regs->regmap,rs1[i]); - signed char m2=get_reg(i_regs->regmap,rs2[i]); - signed char hi=get_reg(i_regs->regmap,HIREG); - signed char lo=get_reg(i_regs->regmap,LOREG); - assert(m1>=0); - assert(m2>=0); - assert(hi>=0); - assert(lo>=0); - emit_umull(m1,m2,hi,lo); - } - if(opcode2[i]==0x1A) // DIV - { - signed char d1=get_reg(i_regs->regmap,rs1[i]); - signed char d2=get_reg(i_regs->regmap,rs2[i]); - assert(d1>=0); - assert(d2>=0); - signed char quotient=get_reg(i_regs->regmap,LOREG); - signed char remainder=get_reg(i_regs->regmap,HIREG); - assert(quotient>=0); - assert(remainder>=0); - emit_movs(d1,remainder); - emit_movimm(0xffffffff,quotient); - emit_negmi(quotient,quotient); // .. quotient and .. - emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) - emit_movs(d2,HOST_TEMPREG); - emit_jeq((int)out+52); // Division by zero - emit_negsmi(HOST_TEMPREG,HOST_TEMPREG); -#ifdef HAVE_ARMV5 - emit_clz(HOST_TEMPREG,quotient); - emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); -#else - emit_movimm(0,quotient); - emit_addpl_imm(quotient,1,quotient); - emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jns((int)out-2*4); -#endif - emit_orimm(quotient,1<<31,quotient); - emit_shr(quotient,quotient,quotient); - emit_cmp(remainder,HOST_TEMPREG); - emit_subcs(remainder,HOST_TEMPREG,remainder); - emit_adcs(quotient,quotient,quotient); - emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_jcc((int)out-16); // -4 - emit_teq(d1,d2); - emit_negmi(quotient,quotient); - emit_test(d1,d1); - emit_negmi(remainder,remainder); - } - if(opcode2[i]==0x1B) // DIVU - { - signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend - signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor - assert(d1>=0); - assert(d2>=0); - signed char quotient=get_reg(i_regs->regmap,LOREG); - signed char remainder=get_reg(i_regs->regmap,HIREG); - assert(quotient>=0); - assert(remainder>=0); - emit_mov(d1,remainder); - emit_movimm(0xffffffff,quotient); // div0 case - emit_test(d2,d2); - emit_jeq((int)out+40); // Division by zero -#ifdef HAVE_ARMV5 - emit_clz(d2,HOST_TEMPREG); - emit_movimm(1<<31,quotient); - emit_shl(d2,HOST_TEMPREG,d2); -#else - emit_movimm(0,HOST_TEMPREG); - emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG); - emit_lslpls_imm(d2,1,d2); - emit_jns((int)out-2*4); - emit_movimm(1<<31,quotient); -#endif - emit_shr(quotient,HOST_TEMPREG,quotient); - emit_cmp(remainder,d2); - emit_subcs(remainder,d2,remainder); - emit_adcs(quotient,quotient,quotient); - emit_shrcc_imm(d2,1,d2); - emit_jcc((int)out-16); // -4 - } - } - else // 64-bit - assert(0); - } - else - { - // Multiply by zero is zero. - // MIPS does not have a divide by zero exception. - // The result is undefined, we return zero. - signed char hr=get_reg(i_regs->regmap,HIREG); - signed char lr=get_reg(i_regs->regmap,LOREG); - if(hr>=0) emit_zeroreg(hr); - if(lr>=0) emit_zeroreg(lr); - } -} -#define multdiv_assemble multdiv_assemble_arm - -static void do_preload_rhash(int r) { - // Don't need this for ARM. On x86, this puts the value 0xf8 into the - // register. On ARM the hash can be done with a single instruction (below) -} - -static void do_preload_rhtbl(int ht) { - emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht); -} - -static void do_rhash(int rs,int rh) { - emit_andimm(rs,0xf8,rh); -} - -static void do_miniht_load(int ht,int rh) { - assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]); - output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh)); -} - -static void do_miniht_jump(int rs,int rh,int ht) { - emit_cmp(rh,rs); - emit_ldreq_indexed(ht,4,15); - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - emit_mov(rs,7); - emit_jmp(jump_vaddr_reg[7]); - #else - emit_jmp(jump_vaddr_reg[rs]); - #endif -} - -static void do_miniht_insert(u_int return_address,int rt,int temp) { - #ifndef HAVE_ARMV7 - emit_movimm(return_address,rt); // PC into link register - add_to_linker((int)out,return_address,1); - emit_pcreladdr(temp); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); - #else - emit_movw(return_address&0x0000FFFF,rt); - add_to_linker((int)out,return_address,1); - emit_pcreladdr(temp); - emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]); - emit_movt(return_address&0xFFFF0000,rt); - emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]); - #endif -} - -static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu) -{ - //if(dirty_pre==dirty) return; - int hr,reg; - for(hr=0;hr>(reg&63))&1) { - if(reg>0) { - if(((dirty_pre&~dirty)>>hr)&1) { - if(reg>0&®<34) { - emit_storereg(reg,hr); - if( ((is32_pre&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } - } - else if(reg>=64) { - emit_storereg(reg,hr); - } - } - } - } - } - } -} - - -/* using strd could possibly help but you'd have to allocate registers in pairs -static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu) -{ - int hr; - int wrote=-1; - for(hr=HOST_REGS-1;hr>=0;hr--) { - if(hr!=EXCLUDE_REG) { - if(pre[hr]!=entry[hr]) { - if(pre[hr]>=0) { - if((dirty>>hr)&1) { - if(get_reg(entry,pre[hr])<0) { - if(pre[hr]<64) { - if(!((u>>pre[hr])&1)) { - if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) { - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr+1); - emit_strdreg(pre[hr],hr); - } - else - emit_storereg(pre[hr],hr); - }else{ - emit_storereg(pre[hr],hr); - if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) { - emit_sarimm(hr,31,hr); - emit_storereg(pre[hr]|64,hr); - } - } - } - }else{ - if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) { - emit_storereg(pre[hr],hr); - } - } - wrote=hr; - } - } - } - } - } - } - for(hr=0;hr=0) { - int nr; - if((nr=get_reg(entry,pre[hr]))>=0) { - emit_mov(hr,nr); - } - } - } - } - } -} -#define wb_invalidate wb_invalidate_arm -*/ - -static void mark_clear_cache(void *target) -{ - u_long offset = (char *)target - (char *)BASE_ADDR; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; - } -} - -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_int start,end; - for(j=0;j<32;j++) - { - if(bitmap&(1< + +#include "emu_if.h" +#include "pcsxmem.h" +#include "../../../psxhle.h" +#include "../../../r3000a.h" +#include "../../../cdrom.h" +#include "../../../psxdma.h" +#include "../../../mdec.h" +#include "../../../gte_arm.h" +#include "../../../gte_neon.h" +#define FLAGLESS +#include "../../../gte.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +//#define evprintf printf +#define evprintf(...) + +char invalid_code[0x100000]; +static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); +u32 event_cycles[PSXINT_COUNT]; + +static void schedule_timeslice(void) +{ + u32 i, c = psxRegs.cycle; + u32 irqs = psxRegs.interrupt; + s32 min, dif; + + min = PSXCLK; + for (i = 0; irqs != 0; i++, irqs >>= 1) { + if (!(irqs & 1)) + continue; + dif = event_cycles[i] - c; + //evprintf(" ev %d\n", dif); + if (0 < dif && dif < min) + min = dif; + } + next_interupt = c + min; +} + +typedef void (irq_func)(); + +static irq_func * const irq_funcs[] = { + [PSXINT_SIO] = sioInterrupt, + [PSXINT_CDR] = cdrInterrupt, + [PSXINT_CDREAD] = cdrReadInterrupt, + [PSXINT_GPUDMA] = gpuInterrupt, + [PSXINT_MDECOUTDMA] = mdec1Interrupt, + [PSXINT_SPUDMA] = spuInterrupt, + [PSXINT_MDECINDMA] = mdec0Interrupt, + [PSXINT_GPUOTCDMA] = gpuotcInterrupt, + [PSXINT_CDRDMA] = cdrDmaInterrupt, + [PSXINT_CDRLID] = cdrLidSeekInterrupt, + [PSXINT_CDRPLAY] = cdrPlayInterrupt, + [PSXINT_SPU_UPDATE] = spuUpdate, + [PSXINT_RCNT] = psxRcntUpdate, +}; + +/* local dupe of psxBranchTest, using event_cycles */ +static void irq_test(void) +{ + u32 irqs = psxRegs.interrupt; + u32 cycle = psxRegs.cycle; + u32 irq, irq_bits; + + // irq_funcs() may queue more irqs + psxRegs.interrupt = 0; + + for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { + if (!(irq_bits & 1)) + continue; + if ((s32)(cycle - event_cycles[irq]) >= 0) { + irqs &= ~(1 << irq); + irq_funcs[irq](); + } + } + psxRegs.interrupt |= irqs; + + if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { + psxException(0x400, 0); + pending_exception = 1; + } +} + +void gen_interupt() +{ + evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); + + irq_test(); + //psxBranchTest(); + //pending_exception = 1; + + schedule_timeslice(); + + evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, + next_interupt, next_interupt - psxRegs.cycle); +} + +// from interpreter +extern void MTC0(int reg, u32 val); + +void pcsx_mtc0(u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); + gen_interupt(); + if (Cause & Status & 0x0300) // possible sw irq + pending_exception = 1; +} + +void pcsx_mtc0_ds(u32 reg, u32 val) +{ + evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); + MTC0(reg, val); +} + +void new_dyna_before_save(void) +{ + psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat + + // psxRegs.intCycle is always maintained, no need to convert +} + +void new_dyna_after_save(void) +{ + psxRegs.interrupt |= 1 << PSXINT_RCNT; +} + +static void new_dyna_restore(void) +{ + int i; + for (i = 0; i < PSXINT_COUNT; i++) + event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; + + event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; + psxRegs.interrupt |= 1 << PSXINT_RCNT; + psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; + + new_dyna_pcsx_mem_load_state(); +} + +void new_dyna_freeze(void *f, int mode) +{ + const char header_save[8] = "ariblks"; + uint32_t addrs[1024 * 4]; + int32_t size = 0; + int bytes; + char header[8]; + + if (mode != 0) { // save + size = new_dynarec_save_blocks(addrs, sizeof(addrs)); + if (size == 0) + return; + + SaveFuncs.write(f, header_save, sizeof(header_save)); + SaveFuncs.write(f, &size, sizeof(size)); + SaveFuncs.write(f, addrs, size); + } + else { + new_dyna_restore(); + + bytes = SaveFuncs.read(f, header, sizeof(header)); + if (bytes != sizeof(header) || strcmp(header, header_save)) { + if (bytes > 0) + SaveFuncs.seek(f, -bytes, SEEK_CUR); + return; + } + SaveFuncs.read(f, &size, sizeof(size)); + if (size <= 0) + return; + if (size > sizeof(addrs)) { + bytes = size - sizeof(addrs); + SaveFuncs.seek(f, bytes, SEEK_CUR); + size = sizeof(addrs); + } + bytes = SaveFuncs.read(f, addrs, size); + if (bytes != size) + return; + + new_dynarec_load_blocks(addrs, size); + } + + //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); +} + +/* GTE stuff */ +void *gte_handlers[64]; + +void *gte_handlers_nf[64] = { + NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 + NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 + gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 + NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 + gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 + gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 +}; + +const char *gte_regnames[64] = { + NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 + NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 + "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 + NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 + "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 + "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 +}; + +/* from gte.txt.. not sure if this is any good. */ +const char gte_cycletab[64] = { + /* 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, + 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, + 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, + 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, +}; + +#define GCBIT(x) \ + (1ll << (32+x)) +#define GDBIT(x) \ + (1ll << (x)) +#define GCBITS3(b0,b1,b2) \ + (GCBIT(b0) | GCBIT(b1) | GCBIT(b2)) +#define GDBITS2(b0,b1) \ + (GDBIT(b0) | GDBIT(b1)) +#define GDBITS3(b0,b1,b2) \ + (GDBITS2(b0,b1) | GDBIT(b2)) +#define GDBITS4(b0,b1,b2,b3) \ + (GDBITS3(b0,b1,b2) | GDBIT(b3)) +#define GDBITS5(b0,b1,b2,b3,b4) \ + (GDBITS4(b0,b1,b2,b3) | GDBIT(b4)) +#define GDBITS6(b0,b1,b2,b3,b4,b5) \ + (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5)) +#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \ + (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6)) +#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \ + (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7)) +#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \ + (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8)) +#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \ + (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9)) + +const uint64_t gte_reg_reads[64] = { + [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19), + [GTE_NCLIP] = GDBITS3(12,13,14), + [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), + [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), + [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further? + [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), + [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22), + [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), + [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), + [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), + [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), + [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), + [GTE_SQR] = GDBITS3(9,10,11), + [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), + [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22), + [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19), + [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19), + [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19), + [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22), + [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27), + [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), +}; + +// note: this excludes gteFLAG that is always written to +const uint64_t gte_reg_writes[64] = { + [GTE_RTPS] = 0x0f0f7f00ll, + [GTE_NCLIP] = GDBIT(24), + [GTE_OP] = GDBITS6(9,10,11,25,26,27), + [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27), + [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_SQR] = GDBITS6(9,10,11,25,26,27), + [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_AVSZ3] = GDBITS2(7,24), + [GTE_AVSZ4] = GDBITS2(7,24), + [GTE_RTPT] = 0x0f0f7f00ll, + [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27), + [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), +}; + +static int ari64_init() +{ + extern void (*psxCP2[64])(); + extern void psxNULL(); + extern unsigned char *out; + size_t i; + + new_dynarec_init(); + new_dyna_pcsx_mem_init(); + + for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) + if (psxCP2[i] != psxNULL) + gte_handlers[i] = psxCP2[i]; + +#if defined(__arm__) && !defined(DRC_DBG) + gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif + psxH_ptr = psxH; + zeromem_ptr = zero_mem; + scratch_buf_ptr = scratch_buf; + + SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); + SysPrintf("%08x/%08x/%08x/%08x/%08x\n", + psxM, psxH, psxR, mem_rtab, out); + + return 0; +} + +static void ari64_reset() +{ + printf("ari64_reset\n"); + new_dyna_pcsx_mem_reset(); + invalidate_all_pages(); + new_dyna_restore(); + pending_exception = 1; +} + +// execute until predefined leave points +// (HLE softcall exit and BIOS fastboot end) +static void ari64_execute_until() +{ + schedule_timeslice(); + + evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, + psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); + + new_dyna_start(); + + evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, + psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); +} + +static void ari64_execute() +{ + while (!stop) { + ari64_execute_until(); + evprintf("drc left @%08x\n", psxRegs.pc); + } +} + +static void ari64_clear(u32 addr, u32 size) +{ + u32 start, end, main_ram; + + size *= 4; /* PCSX uses DMA units (words) */ + + evprintf("ari64_clear %08x %04x\n", addr, size); + + /* check for RAM mirrors */ + main_ram = (addr & 0xffe00000) == 0x80000000; + + start = addr >> 12; + end = (addr + size) >> 12; + + for (; start <= end; start++) + if (!main_ram || !invalid_code[start]) + invalidate_block(start); +} + +static void ari64_shutdown() +{ + new_dynarec_cleanup(); + new_dyna_pcsx_mem_shutdown(); +} + +extern void intExecute(); +extern void intExecuteT(); +extern void intExecuteBlock(); +extern void intExecuteBlockT(); +#ifndef DRC_DBG +#define intExecuteT intExecute +#define intExecuteBlockT intExecuteBlock +#endif + +R3000Acpu psxRec = { + ari64_init, + ari64_reset, +#ifndef DRC_DISABLE + ari64_execute, + ari64_execute_until, +#else + intExecuteT, + intExecuteBlockT, +#endif + ari64_clear, + ari64_shutdown +}; + +// TODO: rm +#ifndef DRC_DBG +void do_insn_trace() {} +void do_insn_cmp() {} +#endif + +#ifdef DRC_DISABLE +unsigned int address; +int pending_exception, stop; +u32 next_interupt; +int new_dynarec_did_compile; +int cycle_multiplier; +int new_dynarec_hacks; +void *psxH_ptr; +void *zeromem_ptr; +u8 zero_mem[0x1000]; +unsigned char *out; +void *mem_rtab; +void *scratch_buf_ptr; +void new_dynarec_init() { (void)ari64_execute; } +void new_dyna_start() {} +void new_dynarec_cleanup() {} +void new_dynarec_clear_full() {} +void invalidate_all_pages() {} +void invalidate_block(unsigned int block) {} +void new_dyna_pcsx_mem_init(void) {} +void new_dyna_pcsx_mem_reset(void) {} +void new_dyna_pcsx_mem_load_state(void) {} +void new_dyna_pcsx_mem_shutdown(void) {} +int new_dynarec_save_blocks(void *save, int size) { return 0; } +void new_dynarec_load_blocks(const void *save, int size) {} +#endif + +#ifdef DRC_DBG + +#include +static FILE *f; +extern u32 last_io_addr; + +static void dump_mem(const char *fname, void *mem, size_t size) +{ + FILE *f1 = fopen(fname, "wb"); + if (f1 == NULL) + f1 = fopen(strrchr(fname, '/') + 1, "wb"); + fwrite(mem, 1, size, f1); + fclose(f1); +} + +static u32 memcheck_read(u32 a) +{ + if ((a >> 16) == 0x1f80) + // scratchpad/IO + return *(u32 *)(psxH + (a & 0xfffc)); + + if ((a >> 16) == 0x1f00) + // parallel + return *(u32 *)(psxP + (a & 0xfffc)); + +// if ((a & ~0xe0600000) < 0x200000) + // RAM + return *(u32 *)(psxM + (a & 0x1ffffc)); +} + +void do_insn_trace(void) +{ + static psxRegisters oldregs; + static u32 old_io_addr = (u32)-1; + static u32 old_io_data = 0xbad0c0de; + static u32 event_cycles_o[PSXINT_COUNT]; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_o = (void *)&oldregs; + u32 io_data; + int i; + u8 byte; + + //last_io_addr = 0x5e2c8; + if (f == NULL) + f = fopen("tracelog", "wb"); + + // log reg changes + oldregs.code = psxRegs.code; // don't care + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_o[i]) { + fwrite(&i, 1, 1, f); + fwrite(&allregs_p[i], 1, 4, f); + allregs_o[i] = allregs_p[i]; + } + } + // log event changes + for (i = 0; i < PSXINT_COUNT; i++) { + if (event_cycles[i] != event_cycles_o[i]) { + byte = 0xfc; + fwrite(&byte, 1, 1, f); + fwrite(&i, 1, 1, f); + fwrite(&event_cycles[i], 1, 4, f); + event_cycles_o[i] = event_cycles[i]; + } + } + // log last io + if (old_io_addr != last_io_addr) { + byte = 0xfd; + fwrite(&byte, 1, 1, f); + fwrite(&last_io_addr, 1, 4, f); + old_io_addr = last_io_addr; + } + io_data = memcheck_read(last_io_addr); + if (old_io_data != io_data) { + byte = 0xfe; + fwrite(&byte, 1, 1, f); + fwrite(&io_data, 1, 4, f); + old_io_data = io_data; + } + byte = 0xff; + fwrite(&byte, 1, 1, f); + +#if 0 + if (psxRegs.cycle == 190230) { + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000); + printf("dumped\n"); + exit(1); + } +#endif +} + +static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "lo", "hi", + "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7", + "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15", + "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23", + "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31", + + "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7", + "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15", + "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23", + "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31", + + "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7", + "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15", + "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23", + "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31", + + "PC", "code", "cycle", "interrupt", +}; + +static struct { + int reg; + u32 val, val_expect; + u32 pc, cycle; +} miss_log[64]; +static int miss_log_i; +#define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0])) +#define miss_log_mask (miss_log_len-1) + +static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle) +{ + miss_log[miss_log_i].reg = reg; + miss_log[miss_log_i].val = val; + miss_log[miss_log_i].val_expect = val_expect; + miss_log[miss_log_i].pc = pc; + miss_log[miss_log_i].cycle = cycle; + miss_log_i = (miss_log_i + 1) & miss_log_mask; +} + +void breakme() {} + +void do_insn_cmp(void) +{ + static psxRegisters rregs; + static u32 mem_addr, mem_val; + u32 *allregs_p = (void *)&psxRegs; + u32 *allregs_e = (void *)&rregs; + static u32 ppc, failcount; + int i, ret, bad = 0, which_event = -1; + u32 ev_cycles = 0; + u8 code; + + if (f == NULL) + f = fopen("tracelog", "rb"); + + while (1) { + if ((ret = fread(&code, 1, 1, f)) <= 0) + break; + if (ret <= 0) + break; + if (code == 0xff) + break; + switch (code) { + case 0xfc: + which_event = 0; + fread(&which_event, 1, 1, f); + fread(&ev_cycles, 1, 4, f); + continue; + case 0xfd: + fread(&mem_addr, 1, 4, f); + continue; + case 0xfe: + fread(&mem_val, 1, 4, f); + continue; + } + fread(&allregs_e[code], 1, 4, f); + } + + if (ret <= 0) { + printf("EOF?\n"); + goto end; + } + + psxRegs.code = rregs.code; // don't care + psxRegs.cycle = rregs.cycle; + psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count + + //if (psxRegs.cycle == 166172) breakme(); + + if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && + mem_val == memcheck_read(mem_addr) + ) { + failcount = 0; + goto ok; + } + + for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { + if (allregs_p[i] != allregs_e[i]) { + miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); + bad++; + } + } + + if (mem_val != memcheck_read(mem_addr)) { + printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); + goto end; + } + + if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { + printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); + goto end; + } + + if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { + static int last_mcycle; + if (last_mcycle != psxRegs.cycle >> 20) { + printf("%u\n", psxRegs.cycle); + last_mcycle = psxRegs.cycle >> 20; + } + failcount++; + goto ok; + } + +end: + for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) + printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", + regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, + miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle); + printf("-- %d\n", bad); + for (i = 0; i < 8; i++) + printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], + i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); + printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); + dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); + exit(1); +ok: + psxRegs.cycle = rregs.cycle + 2; // sync timing + ppc = psxRegs.pc; +} + +#endif diff --git a/libpcsxcore/new_dynarec/backends/psx/emu_if.h b/libpcsxcore/new_dynarec/backends/psx/emu_if.h new file mode 100644 index 0000000..d8c7990 --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/emu_if.h @@ -0,0 +1,108 @@ +#include "../../new_dynarec.h" +#include "../../../r3000a.h" + +extern char invalid_code[0x100000]; + +/* weird stuff */ +#define EAX 0 +#define ECX 1 + +/* same as psxRegs */ +extern int reg[]; + +/* same as psxRegs.GPR.n.* */ +extern int hi, lo; + +/* same as psxRegs.CP0.n.* */ +extern int reg_cop0[]; +#define Status psxRegs.CP0.n.Status +#define Cause psxRegs.CP0.n.Cause +#define EPC psxRegs.CP0.n.EPC +#define BadVAddr psxRegs.CP0.n.BadVAddr +#define Context psxRegs.CP0.n.Context +#define EntryHi psxRegs.CP0.n.EntryHi +#define Count psxRegs.cycle // psxRegs.CP0.n.Count + +/* COP2/GTE */ +enum gte_opcodes { + GTE_RTPS = 0x01, + GTE_NCLIP = 0x06, + GTE_OP = 0x0c, + GTE_DPCS = 0x10, + GTE_INTPL = 0x11, + GTE_MVMVA = 0x12, + GTE_NCDS = 0x13, + GTE_CDP = 0x14, + GTE_NCDT = 0x16, + GTE_NCCS = 0x1b, + GTE_CC = 0x1c, + GTE_NCS = 0x1e, + GTE_NCT = 0x20, + GTE_SQR = 0x28, + GTE_DCPL = 0x29, + GTE_DPCT = 0x2a, + GTE_AVSZ3 = 0x2d, + GTE_AVSZ4 = 0x2e, + GTE_RTPT = 0x30, + GTE_GPF = 0x3d, + GTE_GPL = 0x3e, + GTE_NCCT = 0x3f, +}; + +extern int reg_cop2d[], reg_cop2c[]; +extern void *gte_handlers[64]; +extern void *gte_handlers_nf[64]; +extern const char *gte_regnames[64]; +extern const char gte_cycletab[64]; +extern const uint64_t gte_reg_reads[64]; +extern const uint64_t gte_reg_writes[64]; + +/* dummy */ +extern int FCR0, FCR31; + +/* mem */ +extern void *mem_rtab; +extern void *mem_wtab; + +void jump_handler_read8(u32 addr, u32 *table, u32 cycles); +void jump_handler_read16(u32 addr, u32 *table, u32 cycles); +void jump_handler_read32(u32 addr, u32 *table, u32 cycles); +void jump_handler_write8(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write16(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); +void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); +void jump_handle_swl(u32 addr, u32 data, u32 cycles); +void jump_handle_swr(u32 addr, u32 data, u32 cycles); +void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); +void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); + +extern unsigned int address; +extern void *psxH_ptr; +extern void *zeromem_ptr; +extern void *scratch_buf_ptr; + +// same as invalid_code, just a region for ram write checks (inclusive) +extern u32 inv_code_start, inv_code_end; + +/* cycles/irqs */ +extern u32 next_interupt; +extern int pending_exception; + +/* called by drc */ +void pcsx_mtc0(u32 reg, u32 val); +void pcsx_mtc0_ds(u32 reg, u32 val); + +/* misc */ +extern void (*psxHLEt[])(); + +extern void SysPrintf(const char *fmt, ...); + +#ifdef RAM_FIXED +#define rdram ((u_int)0x80000000) +#else +#define rdram ((u_int)psxM) +#endif diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c new file mode 100644 index 0000000..647981e --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.c @@ -0,0 +1,494 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2010-2011 + * + * This work is licensed under the terms of GNU GPL version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "../../../psxhw.h" +#include "../../../cdrom.h" +#include "../../../mdec.h" +#include "../../../gpu.h" +#include "../../../psxmem_map.h" +#include "emu_if.h" +#include "pcsxmem.h" + +#ifdef __thumb__ +#error the dynarec is incompatible with Thumb functions, +#error please add -marm to compile flags +#endif + +//#define memprintf printf +#define memprintf(...) + +static u32 *mem_readtab; +static u32 *mem_writetab; +static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; +static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; +//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; + +// When this is called in a loop, and 'h' is a function pointer, clang will crash. +#ifdef __clang__ +static __attribute__ ((noinline)) void map_item(u32 *out, const void *h, u32 flag) +#else +static void map_item(u32 *out, const void *h, u32 flag) +#endif +{ + u32 hv = (u32)h; + if (hv & 1) { + SysPrintf("FATAL: %p has LSB set\n", h); + abort(); + } + *out = (hv >> 1) | (flag << 31); +} + +// size must be power of 2, at least 4k +#define map_l1_mem(tab, i, addr, size, base) \ + map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) + +#define IOMEM32(a) (((a) & 0xfff) / 4) +#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) +#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) + +u8 zero_mem[0x1000]; + +u32 read_mem_dummy() +{ + return 0; +} + +static void write_mem_dummy(u32 data) +{ + memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); +} + +/* IO handlers */ +static u32 io_read_sio16() +{ + return sioRead8() | (sioRead8() << 8); +} + +static u32 io_read_sio32() +{ + return sioRead8() | (sioRead8() << 8) | (sioRead8() << 16) | (sioRead8() << 24); +} + +static void io_write_sio16(u32 value) +{ + sioWrite8((unsigned char)value); + sioWrite8((unsigned char)(value>>8)); +} + +static void io_write_sio32(u32 value) +{ + sioWrite8((unsigned char)value); + sioWrite8((unsigned char)(value >> 8)); + sioWrite8((unsigned char)(value >> 16)); + sioWrite8((unsigned char)(value >> 24)); +} + +#ifndef DRC_DBG + +static void map_rcnt_rcount0(u32 mode) +{ + if (mode & 0x100) { // pixel clock + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m0, 1); + } +} + +static void map_rcnt_rcount1(u32 mode) +{ + if (mode & 0x100) { // hcnt + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m0, 1); + } +} + +static void map_rcnt_rcount2(u32 mode) +{ + if (mode & 0x01) { // gate + map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); + map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); + } + else if (mode & 0x200) { // clk/8 + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m1, 1); + } + else { + map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m0, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m0, 1); + } +} + +#else +#define map_rcnt_rcount0(mode) +#define map_rcnt_rcount1(mode) +#define map_rcnt_rcount2(mode) +#endif + +#define make_rcnt_funcs(i) \ +static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ +static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ +static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ +static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ +static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); map_rcnt_rcount##i(val); } \ +static void io_rcnt_write_target##i(u32 val) { psxRcntWtarget(i, val & 0xffff); } + +make_rcnt_funcs(0) +make_rcnt_funcs(1) +make_rcnt_funcs(2) + +static void io_write_ireg16(u32 value) +{ + //if (Config.Sio) psxHu16ref(0x1070) |= 0x80; + if (Config.SpuIrq) psxHu16ref(0x1070) |= 0x200; + psxHu16ref(0x1070) &= value; +} + +static void io_write_imask16(u32 value) +{ + psxHu16ref(0x1074) = value; + if (psxHu16ref(0x1070) & value) + new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); +} + +static void io_write_ireg32(u32 value) +{ + //if (Config.Sio) psxHu32ref(0x1070) |= 0x80; + if (Config.SpuIrq) psxHu32ref(0x1070) |= 0x200; + psxHu32ref(0x1070) &= value; +} + +static void io_write_imask32(u32 value) +{ + psxHu32ref(0x1074) = value; + if (psxHu32ref(0x1070) & value) + new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); +} + +static void io_write_dma_icr32(u32 value) +{ + u32 tmp = value & 0x00ff803f; + tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; + if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) + || tmp & HW_DMA_ICR_BUS_ERROR) { + if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) + psxHu32ref(0x1070) |= SWAP32(8); + tmp |= HW_DMA_ICR_IRQ_SENT; + } + HW_DMA_ICR = SWAPu32(tmp); +} + +#define make_dma_func(n) \ +static void io_write_chcr##n(u32 value) \ +{ \ + HW_DMA##n##_CHCR = value; \ + if (value & 0x01000000 && HW_DMA_PCR & (8 << (n * 4))) { \ + psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, value); \ + } \ +} + +make_dma_func(0) +make_dma_func(1) +make_dma_func(2) +make_dma_func(3) +make_dma_func(4) +make_dma_func(6) + +static void io_spu_write16(u32 value) +{ + // meh + SPU_writeRegister(address, value, psxRegs.cycle); +} + +static void io_spu_write32(u32 value) +{ + SPUwriteRegister wfunc = SPU_writeRegister; + u32 a = address; + + wfunc(a, value & 0xffff, psxRegs.cycle); + wfunc(a + 2, value >> 16, psxRegs.cycle); +} + +static u32 io_gpu_read_status(void) +{ + u32 v; + + // meh2, syncing for img bit, might want to avoid it.. + gpuSyncPluginSR(); + v = HW_GPU_STATUS; + + // XXX: because of large timeslices can't use hSyncCount, using rough + // approximization instead. Perhaps better use hcounter code here or something. + if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) + v |= PSXGPU_LCF & (psxRegs.cycle << 20); + return v; +} + +static void io_gpu_write_status(u32 value) +{ + GPU_writeStatus(value); + gpuSyncPluginSR(); +} + +static void map_ram_write(void) +{ + int i; + + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); + } +} + +static void unmap_ram_write(void) +{ + int i; + + for (i = 0; i < (0x800000 >> 12); i++) { + map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); + map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); + map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); + } +} + +static void write_biu(u32 value) +{ + memprintf("write_biu %08x, %08x @%08x %u\n", address, value, psxRegs.pc, psxRegs.cycle); + + if (address != 0xfffe0130) + return; + + switch (value) { + case 0x800: case 0x804: + unmap_ram_write(); + break; + case 0: case 0x1e988: + map_ram_write(); + break; + default: + printf("write_biu: unexpected val: %08x\n", value); + break; + } +} + +void new_dyna_pcsx_mem_load_state(void) +{ + map_rcnt_rcount0(rcnts[0].mode); + map_rcnt_rcount1(rcnts[1].mode); + map_rcnt_rcount2(rcnts[2].mode); +} + +int pcsxmem_is_handler_dynamic(unsigned int addr) +{ + if ((addr & 0xfffff000) != 0x1f801000) + return 0; + + addr &= 0xffff; + return addr == 0x1100 || addr == 0x1110 || addr == 0x1120; +} + +void new_dyna_pcsx_mem_init(void) +{ + int i; + + // have to map these further to keep tcache close to .text + mem_readtab = psxMap(0x08000000, 0x200000 * 4, 0, MAP_TAG_LUTS); + if (mem_readtab == NULL) { + SysPrintf("failed to map mem tables\n"); + exit(1); + } + mem_writetab = mem_readtab + 0x100000; + + // 1st level lookup: + // 0: direct mem + // 1: use 2nd lookup + // 2nd level lookup: + // 0: direct mem variable + // 1: memhandler + + // default/unmapped memhandlers + for (i = 0; i < 0x100000; i++) { + //map_item(&mem_readtab[i], mem_unmrtab, 1); + map_l1_mem(mem_readtab, i, 0, 0x1000, zero_mem); + map_item(&mem_writetab[i], mem_unmwtab, 1); + } + + // RAM and it's mirrors + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); + } + map_ram_write(); + + // BIOS and it's mirrors + for (i = 0; i < (0x80000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x1fc00000, 0x80000, psxR); + map_l1_mem(mem_readtab, i, 0xbfc00000, 0x80000, psxR); + } + + // scratchpad + map_l1_mem(mem_readtab, 0, 0x1f800000, 0x1000, psxH); + map_l1_mem(mem_readtab, 0, 0x9f800000, 0x1000, psxH); + map_l1_mem(mem_writetab, 0, 0x1f800000, 0x1000, psxH); + map_l1_mem(mem_writetab, 0, 0x9f800000, 0x1000, psxH); + + // I/O + map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); + map_item(&mem_readtab[0x9f801000 >> 12], mem_iortab, 1); + map_item(&mem_readtab[0xbf801000 >> 12], mem_iortab, 1); + map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0x9f801000 >> 12], mem_iowtab, 1); + map_item(&mem_writetab[0xbf801000 >> 12], mem_iowtab, 1); + + // L2 + // unmapped tables + for (i = 0; i < (1+2+4) * 0x1000 / 4; i++) + map_item(&mem_unmwtab[i], write_mem_dummy, 1); + + // fill IO tables + for (i = 0; i < 0x1000/4; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + + map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); + map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); +// map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); + map_item(&mem_iortab[IOMEM32(0x1814)], io_gpu_read_status, 1); + map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); + map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); + + map_item(&mem_iortab[IOMEM16(0x1040)], io_read_sio16, 1); + map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); + map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); + map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); + map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); + + map_item(&mem_iortab[IOMEM8(0x1040)], sioRead8, 1); + map_item(&mem_iortab[IOMEM8(0x1800)], cdrRead0, 1); + map_item(&mem_iortab[IOMEM8(0x1801)], cdrRead1, 1); + map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); + map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); + + // write(u32 data) + map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); + map_item(&mem_iowtab[IOMEM32(0x1070)], io_write_ireg32, 1); + map_item(&mem_iowtab[IOMEM32(0x1074)], io_write_imask32, 1); + map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); + map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); + map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); + map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); + map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); + map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); + map_item(&mem_iowtab[IOMEM32(0x10f4)], io_write_dma_icr32, 1); + map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); + map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); + map_item(&mem_iowtab[IOMEM32(0x1108)], io_rcnt_write_target0, 1); + map_item(&mem_iowtab[IOMEM32(0x1110)], io_rcnt_write_count1, 1); + map_item(&mem_iowtab[IOMEM32(0x1114)], io_rcnt_write_mode1, 1); + map_item(&mem_iowtab[IOMEM32(0x1118)], io_rcnt_write_target1, 1); + map_item(&mem_iowtab[IOMEM32(0x1120)], io_rcnt_write_count2, 1); + map_item(&mem_iowtab[IOMEM32(0x1124)], io_rcnt_write_mode2, 1); + map_item(&mem_iowtab[IOMEM32(0x1128)], io_rcnt_write_target2, 1); +// map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); + map_item(&mem_iowtab[IOMEM32(0x1814)], io_gpu_write_status, 1); + map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); + map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); + + map_item(&mem_iowtab[IOMEM16(0x1040)], io_write_sio16, 1); + map_item(&mem_iowtab[IOMEM16(0x1044)], sioWriteStat16, 1); + map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); + map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); + map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); + map_item(&mem_iowtab[IOMEM16(0x1070)], io_write_ireg16, 1); + map_item(&mem_iowtab[IOMEM16(0x1074)], io_write_imask16, 1); + map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); + map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); + map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); + map_item(&mem_iowtab[IOMEM16(0x1110)], io_rcnt_write_count1, 1); + map_item(&mem_iowtab[IOMEM16(0x1114)], io_rcnt_write_mode1, 1); + map_item(&mem_iowtab[IOMEM16(0x1118)], io_rcnt_write_target1, 1); + map_item(&mem_iowtab[IOMEM16(0x1120)], io_rcnt_write_count2, 1); + map_item(&mem_iowtab[IOMEM16(0x1124)], io_rcnt_write_mode2, 1); + map_item(&mem_iowtab[IOMEM16(0x1128)], io_rcnt_write_target2, 1); + + map_item(&mem_iowtab[IOMEM8(0x1040)], sioWrite8, 1); + map_item(&mem_iowtab[IOMEM8(0x1800)], cdrWrite0, 1); + map_item(&mem_iowtab[IOMEM8(0x1801)], cdrWrite1, 1); + map_item(&mem_iowtab[IOMEM8(0x1802)], cdrWrite2, 1); + map_item(&mem_iowtab[IOMEM8(0x1803)], cdrWrite3, 1); + + for (i = 0x1c00; i < 0x1e00; i += 2) { + map_item(&mem_iowtab[IOMEM16(i)], io_spu_write16, 1); + map_item(&mem_iowtab[IOMEM32(i)], io_spu_write32, 1); + } + + // misc + map_item(&mem_writetab[0xfffe0130 >> 12], mem_ffwtab, 1); + for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) + map_item(&mem_ffwtab[i], write_biu, 1); + + mem_rtab = mem_readtab; + mem_wtab = mem_writetab; + + new_dyna_pcsx_mem_load_state(); +} + +void new_dyna_pcsx_mem_reset(void) +{ + int i; + + // plugins might change so update the pointers + map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); + + for (i = 0x1c00; i < 0x1e00; i += 2) + map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); + + map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); +} + +void new_dyna_pcsx_mem_shutdown(void) +{ + psxUnmap(mem_readtab, 0x200000 * 4, MAP_TAG_LUTS); + mem_writetab = mem_readtab = NULL; +} diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h new file mode 100644 index 0000000..72892a8 --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem.h @@ -0,0 +1,9 @@ + +extern u8 zero_mem[0x1000]; + +void new_dyna_pcsx_mem_init(void); +void new_dyna_pcsx_mem_reset(void); +void new_dyna_pcsx_mem_load_state(void); +void new_dyna_pcsx_mem_shutdown(void); + +int pcsxmem_is_handler_dynamic(unsigned int addr); diff --git a/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c b/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c new file mode 100644 index 0000000..305931a --- /dev/null +++ b/libpcsxcore/new_dynarec/backends/psx/pcsxmem_inline.c @@ -0,0 +1,66 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of GNU GPL version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef DRC_DBG + +static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) +{ + if ((addr & 0xfffff000) == 0x1f801000) { + u_int t; + switch (addr & 0xffff) { + case 0x1120: // rcnt2 count + if (rt < 0) goto dont_care; + if (cc < 0) return 0; + emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); + emit_readword((int)&rcnts[2].cycleStart, rt); + emit_testimm(HOST_TEMPREG, 0x200); + emit_readword((int)&last_count, HOST_TEMPREG); + emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); + emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); + if (cc_adj) + emit_addimm(HOST_TEMPREG, cc_adj, rt); + emit_shrne_imm(rt, 3, rt); + mov_loadtype_adj(type!=LOADW_STUB?type:LOADH_STUB, rt, rt); + goto hit; + case 0x1104: + case 0x1114: + case 0x1124: // rcnt mode + if (rt < 0) return 0; + t = (addr >> 4) & 3; + emit_readword((int)&rcnts[t].mode, rt); + emit_andimm(rt, ~0x1800, HOST_TEMPREG); + emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); + mov_loadtype_adj(type, rt, rt); + goto hit; + } + } + else { + if (rt < 0) + goto dont_care; + } + + return 0; + +hit: + assem_debug("pcsx_direct_read %08x end\n", addr); + return 1; + +dont_care: + assem_debug("pcsx_direct_read %08x dummy\n", addr); + return 1; +} + +#else + +static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) +{ + return 0; +} + +#endif + +// vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c deleted file mode 100644 index 8aebd64..0000000 --- a/libpcsxcore/new_dynarec/emu_if.c +++ /dev/null @@ -1,695 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2010-2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include - -#include "emu_if.h" -#include "pcsxmem.h" -#include "../psxhle.h" -#include "../r3000a.h" -#include "../cdrom.h" -#include "../psxdma.h" -#include "../mdec.h" -#include "../gte_arm.h" -#include "../gte_neon.h" -#define FLAGLESS -#include "../gte.h" - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - -//#define evprintf printf -#define evprintf(...) - -char invalid_code[0x100000]; -static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); -u32 event_cycles[PSXINT_COUNT]; - -static void schedule_timeslice(void) -{ - u32 i, c = psxRegs.cycle; - u32 irqs = psxRegs.interrupt; - s32 min, dif; - - min = PSXCLK; - for (i = 0; irqs != 0; i++, irqs >>= 1) { - if (!(irqs & 1)) - continue; - dif = event_cycles[i] - c; - //evprintf(" ev %d\n", dif); - if (0 < dif && dif < min) - min = dif; - } - next_interupt = c + min; -} - -typedef void (irq_func)(); - -static irq_func * const irq_funcs[] = { - [PSXINT_SIO] = sioInterrupt, - [PSXINT_CDR] = cdrInterrupt, - [PSXINT_CDREAD] = cdrReadInterrupt, - [PSXINT_GPUDMA] = gpuInterrupt, - [PSXINT_MDECOUTDMA] = mdec1Interrupt, - [PSXINT_SPUDMA] = spuInterrupt, - [PSXINT_MDECINDMA] = mdec0Interrupt, - [PSXINT_GPUOTCDMA] = gpuotcInterrupt, - [PSXINT_CDRDMA] = cdrDmaInterrupt, - [PSXINT_CDRLID] = cdrLidSeekInterrupt, - [PSXINT_CDRPLAY] = cdrPlayInterrupt, - [PSXINT_SPU_UPDATE] = spuUpdate, - [PSXINT_RCNT] = psxRcntUpdate, -}; - -/* local dupe of psxBranchTest, using event_cycles */ -static void irq_test(void) -{ - u32 irqs = psxRegs.interrupt; - u32 cycle = psxRegs.cycle; - u32 irq, irq_bits; - - // irq_funcs() may queue more irqs - psxRegs.interrupt = 0; - - for (irq = 0, irq_bits = irqs; irq_bits != 0; irq++, irq_bits >>= 1) { - if (!(irq_bits & 1)) - continue; - if ((s32)(cycle - event_cycles[irq]) >= 0) { - irqs &= ~(1 << irq); - irq_funcs[irq](); - } - } - psxRegs.interrupt |= irqs; - - if ((psxHu32(0x1070) & psxHu32(0x1074)) && (Status & 0x401) == 0x401) { - psxException(0x400, 0); - pending_exception = 1; - } -} - -void gen_interupt() -{ - evprintf(" +ge %08x, %u->%u\n", psxRegs.pc, psxRegs.cycle, next_interupt); - - irq_test(); - //psxBranchTest(); - //pending_exception = 1; - - schedule_timeslice(); - - evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle, - next_interupt, next_interupt - psxRegs.cycle); -} - -// from interpreter -extern void MTC0(int reg, u32 val); - -void pcsx_mtc0(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); - gen_interupt(); - if (Cause & Status & 0x0300) // possible sw irq - pending_exception = 1; -} - -void pcsx_mtc0_ds(u32 reg, u32 val) -{ - evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); - MTC0(reg, val); -} - -void new_dyna_before_save(void) -{ - psxRegs.interrupt &= ~(1 << PSXINT_RCNT); // old savestate compat - - // psxRegs.intCycle is always maintained, no need to convert -} - -void new_dyna_after_save(void) -{ - psxRegs.interrupt |= 1 << PSXINT_RCNT; -} - -static void new_dyna_restore(void) -{ - int i; - for (i = 0; i < PSXINT_COUNT; i++) - event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle; - - event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter; - psxRegs.interrupt |= 1 << PSXINT_RCNT; - psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1; - - new_dyna_pcsx_mem_load_state(); -} - -void new_dyna_freeze(void *f, int mode) -{ - const char header_save[8] = "ariblks"; - uint32_t addrs[1024 * 4]; - int32_t size = 0; - int bytes; - char header[8]; - - if (mode != 0) { // save - size = new_dynarec_save_blocks(addrs, sizeof(addrs)); - if (size == 0) - return; - - SaveFuncs.write(f, header_save, sizeof(header_save)); - SaveFuncs.write(f, &size, sizeof(size)); - SaveFuncs.write(f, addrs, size); - } - else { - new_dyna_restore(); - - bytes = SaveFuncs.read(f, header, sizeof(header)); - if (bytes != sizeof(header) || strcmp(header, header_save)) { - if (bytes > 0) - SaveFuncs.seek(f, -bytes, SEEK_CUR); - return; - } - SaveFuncs.read(f, &size, sizeof(size)); - if (size <= 0) - return; - if (size > sizeof(addrs)) { - bytes = size - sizeof(addrs); - SaveFuncs.seek(f, bytes, SEEK_CUR); - size = sizeof(addrs); - } - bytes = SaveFuncs.read(f, addrs, size); - if (bytes != size) - return; - - new_dynarec_load_blocks(addrs, size); - } - - //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); -} - -/* GTE stuff */ -void *gte_handlers[64]; - -void *gte_handlers_nf[64] = { - NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 - NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 - gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 - NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 - gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 - gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 - gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 - NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 -}; - -const char *gte_regnames[64] = { - NULL , "RTPS" , NULL , NULL , NULL , NULL , "NCLIP", NULL , // 00 - NULL , NULL , NULL , NULL , "OP" , NULL , NULL , NULL , // 08 - "DPCS", "INTPL", "MVMVA", "NCDS", "CDP", NULL , "NCDT" , NULL , // 10 - NULL , NULL , NULL , "NCCS", "CC" , NULL , "NCS" , NULL , // 18 - "NCT" , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 - "SQR" , "DCPL" , "DPCT" , NULL , NULL , "AVSZ3", "AVSZ4", NULL , // 28 - "RTPT", NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 - NULL , NULL , NULL , NULL , NULL , "GPF" , "GPL" , "NCCT", // 38 -}; - -/* from gte.txt.. not sure if this is any good. */ -const char gte_cycletab[64] = { - /* 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 15, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 6, 0, 0, 0, - 8, 8, 8, 19, 13, 0, 44, 0, 0, 0, 0, 17, 11, 0, 14, 0, - 30, 0, 0, 0, 0, 0, 0, 0, 5, 8, 17, 0, 0, 5, 6, 0, - 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39, -}; - -#define GCBIT(x) \ - (1ll << (32+x)) -#define GDBIT(x) \ - (1ll << (x)) -#define GCBITS3(b0,b1,b2) \ - (GCBIT(b0) | GCBIT(b1) | GCBIT(b2)) -#define GDBITS2(b0,b1) \ - (GDBIT(b0) | GDBIT(b1)) -#define GDBITS3(b0,b1,b2) \ - (GDBITS2(b0,b1) | GDBIT(b2)) -#define GDBITS4(b0,b1,b2,b3) \ - (GDBITS3(b0,b1,b2) | GDBIT(b3)) -#define GDBITS5(b0,b1,b2,b3,b4) \ - (GDBITS4(b0,b1,b2,b3) | GDBIT(b4)) -#define GDBITS6(b0,b1,b2,b3,b4,b5) \ - (GDBITS5(b0,b1,b2,b3,b4) | GDBIT(b5)) -#define GDBITS7(b0,b1,b2,b3,b4,b5,b6) \ - (GDBITS6(b0,b1,b2,b3,b4,b5) | GDBIT(b6)) -#define GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) \ - (GDBITS7(b0,b1,b2,b3,b4,b5,b6) | GDBIT(b7)) -#define GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) \ - (GDBITS8(b0,b1,b2,b3,b4,b5,b6,b7) | GDBIT(b8)) -#define GDBITS10(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9) \ - (GDBITS9(b0,b1,b2,b3,b4,b5,b6,b7,b8) | GDBIT(b9)) - -const uint64_t gte_reg_reads[64] = { - [GTE_RTPS] = 0x1f0000ff00000000ll | GDBITS7(0,1,13,14,17,18,19), - [GTE_NCLIP] = GDBITS3(12,13,14), - [GTE_OP] = GCBITS3(0,2,4) | GDBITS3(9,10,11), - [GTE_DPCS] = GCBITS3(21,22,23) | GDBITS4(6,8,21,22), - [GTE_INTPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), - [GTE_MVMVA] = 0x00ffffff00000000ll | GDBITS9(0,1,2,3,4,5,9,10,11), // XXX: maybe decode further? - [GTE_NCDS] = 0x00ffff0000000000ll | GDBITS6(0,1,6,8,21,22), - [GTE_CDP] = 0x00ffe00000000000ll | GDBITS7(6,8,9,10,11,21,22), - [GTE_NCDT] = 0x00ffff0000000000ll | GDBITS8(0,1,2,3,4,5,6,8), - [GTE_NCCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), - [GTE_CC] = 0x001fe00000000000ll | GDBITS6(6,9,10,11,21,22), - [GTE_NCS] = 0x001fff0000000000ll | GDBITS5(0,1,6,21,22), - [GTE_NCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), - [GTE_SQR] = GDBITS3(9,10,11), - [GTE_DCPL] = GCBITS3(21,22,23) | GDBITS7(6,8,9,10,11,21,22), - [GTE_DPCT] = GCBITS3(21,22,23) | GDBITS4(8,20,21,22), - [GTE_AVSZ3] = GCBIT(29) | GDBITS3(17,18,19), - [GTE_AVSZ4] = GCBIT(30) | GDBITS4(16,17,18,19), - [GTE_RTPT] = 0x1f0000ff00000000ll | GDBITS7(0,1,2,3,4,5,19), - [GTE_GPF] = GDBITS7(6,8,9,10,11,21,22), - [GTE_GPL] = GDBITS10(6,8,9,10,11,21,22,25,26,27), - [GTE_NCCT] = 0x001fff0000000000ll | GDBITS7(0,1,2,3,4,5,6), -}; - -// note: this excludes gteFLAG that is always written to -const uint64_t gte_reg_writes[64] = { - [GTE_RTPS] = 0x0f0f7f00ll, - [GTE_NCLIP] = GDBIT(24), - [GTE_OP] = GDBITS6(9,10,11,25,26,27), - [GTE_DPCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_INTPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_MVMVA] = GDBITS6(9,10,11,25,26,27), - [GTE_NCDS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_CDP] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCDT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_CC] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCS] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_SQR] = GDBITS6(9,10,11,25,26,27), - [GTE_DCPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_DPCT] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_AVSZ3] = GDBITS2(7,24), - [GTE_AVSZ4] = GDBITS2(7,24), - [GTE_RTPT] = 0x0f0f7f00ll, - [GTE_GPF] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_GPL] = GDBITS9(9,10,11,20,21,22,25,26,27), - [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), -}; - -static int ari64_init() -{ - extern void (*psxCP2[64])(); - extern void psxNULL(); - extern unsigned char *out; - size_t i; - - new_dynarec_init(); - new_dyna_pcsx_mem_init(); - - for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != psxNULL) - gte_handlers[i] = psxCP2[i]; - -#if defined(__arm__) && !defined(DRC_DBG) - gte_handlers[0x06] = gteNCLIP_arm; -#ifdef HAVE_ARMV5 - gte_handlers_nf[0x01] = gteRTPS_nf_arm; - gte_handlers_nf[0x30] = gteRTPT_nf_arm; -#endif -#ifdef __ARM_NEON__ - // compiler's _nf version is still a lot slower than neon - // _nf_arm RTPS is roughly the same, RTPT slower - gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; -#endif -#endif -#ifdef DRC_DBG - memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); -#endif - psxH_ptr = psxH; - zeromem_ptr = zero_mem; - scratch_buf_ptr = scratch_buf; - - SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n"); - SysPrintf("%08x/%08x/%08x/%08x/%08x\n", - psxM, psxH, psxR, mem_rtab, out); - - return 0; -} - -static void ari64_reset() -{ - printf("ari64_reset\n"); - new_dyna_pcsx_mem_reset(); - invalidate_all_pages(); - new_dyna_restore(); - pending_exception = 1; -} - -// execute until predefined leave points -// (HLE softcall exit and BIOS fastboot end) -static void ari64_execute_until() -{ - schedule_timeslice(); - - evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); - - new_dyna_start(); - - evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc, - psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle); -} - -static void ari64_execute() -{ - while (!stop) { - ari64_execute_until(); - evprintf("drc left @%08x\n", psxRegs.pc); - } -} - -static void ari64_clear(u32 addr, u32 size) -{ - u32 start, end, main_ram; - - size *= 4; /* PCSX uses DMA units (words) */ - - evprintf("ari64_clear %08x %04x\n", addr, size); - - /* check for RAM mirrors */ - main_ram = (addr & 0xffe00000) == 0x80000000; - - start = addr >> 12; - end = (addr + size) >> 12; - - for (; start <= end; start++) - if (!main_ram || !invalid_code[start]) - invalidate_block(start); -} - -static void ari64_shutdown() -{ - new_dynarec_cleanup(); - new_dyna_pcsx_mem_shutdown(); -} - -extern void intExecute(); -extern void intExecuteT(); -extern void intExecuteBlock(); -extern void intExecuteBlockT(); -#ifndef DRC_DBG -#define intExecuteT intExecute -#define intExecuteBlockT intExecuteBlock -#endif - -R3000Acpu psxRec = { - ari64_init, - ari64_reset, -#ifndef DRC_DISABLE - ari64_execute, - ari64_execute_until, -#else - intExecuteT, - intExecuteBlockT, -#endif - ari64_clear, - ari64_shutdown -}; - -// TODO: rm -#ifndef DRC_DBG -void do_insn_trace() {} -void do_insn_cmp() {} -#endif - -#ifdef DRC_DISABLE -unsigned int address; -int pending_exception, stop; -u32 next_interupt; -int new_dynarec_did_compile; -int cycle_multiplier; -int new_dynarec_hacks; -void *psxH_ptr; -void *zeromem_ptr; -u8 zero_mem[0x1000]; -unsigned char *out; -void *mem_rtab; -void *scratch_buf_ptr; -void new_dynarec_init() { (void)ari64_execute; } -void new_dyna_start() {} -void new_dynarec_cleanup() {} -void new_dynarec_clear_full() {} -void invalidate_all_pages() {} -void invalidate_block(unsigned int block) {} -void new_dyna_pcsx_mem_init(void) {} -void new_dyna_pcsx_mem_reset(void) {} -void new_dyna_pcsx_mem_load_state(void) {} -void new_dyna_pcsx_mem_shutdown(void) {} -int new_dynarec_save_blocks(void *save, int size) { return 0; } -void new_dynarec_load_blocks(const void *save, int size) {} -#endif - -#ifdef DRC_DBG - -#include -static FILE *f; -extern u32 last_io_addr; - -static void dump_mem(const char *fname, void *mem, size_t size) -{ - FILE *f1 = fopen(fname, "wb"); - if (f1 == NULL) - f1 = fopen(strrchr(fname, '/') + 1, "wb"); - fwrite(mem, 1, size, f1); - fclose(f1); -} - -static u32 memcheck_read(u32 a) -{ - if ((a >> 16) == 0x1f80) - // scratchpad/IO - return *(u32 *)(psxH + (a & 0xfffc)); - - if ((a >> 16) == 0x1f00) - // parallel - return *(u32 *)(psxP + (a & 0xfffc)); - -// if ((a & ~0xe0600000) < 0x200000) - // RAM - return *(u32 *)(psxM + (a & 0x1ffffc)); -} - -void do_insn_trace(void) -{ - static psxRegisters oldregs; - static u32 old_io_addr = (u32)-1; - static u32 old_io_data = 0xbad0c0de; - static u32 event_cycles_o[PSXINT_COUNT]; - u32 *allregs_p = (void *)&psxRegs; - u32 *allregs_o = (void *)&oldregs; - u32 io_data; - int i; - u8 byte; - - //last_io_addr = 0x5e2c8; - if (f == NULL) - f = fopen("tracelog", "wb"); - - // log reg changes - oldregs.code = psxRegs.code; // don't care - for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { - if (allregs_p[i] != allregs_o[i]) { - fwrite(&i, 1, 1, f); - fwrite(&allregs_p[i], 1, 4, f); - allregs_o[i] = allregs_p[i]; - } - } - // log event changes - for (i = 0; i < PSXINT_COUNT; i++) { - if (event_cycles[i] != event_cycles_o[i]) { - byte = 0xfc; - fwrite(&byte, 1, 1, f); - fwrite(&i, 1, 1, f); - fwrite(&event_cycles[i], 1, 4, f); - event_cycles_o[i] = event_cycles[i]; - } - } - // log last io - if (old_io_addr != last_io_addr) { - byte = 0xfd; - fwrite(&byte, 1, 1, f); - fwrite(&last_io_addr, 1, 4, f); - old_io_addr = last_io_addr; - } - io_data = memcheck_read(last_io_addr); - if (old_io_data != io_data) { - byte = 0xfe; - fwrite(&byte, 1, 1, f); - fwrite(&io_data, 1, 4, f); - old_io_data = io_data; - } - byte = 0xff; - fwrite(&byte, 1, 1, f); - -#if 0 - if (psxRegs.cycle == 190230) { - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram_i.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs_i.dump", psxH, 0x10000); - printf("dumped\n"); - exit(1); - } -#endif -} - -static const char *regnames[offsetof(psxRegisters, intCycle) / 4] = { - "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", - "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", - "lo", "hi", - "C0_0", "C0_1", "C0_2", "C0_3", "C0_4", "C0_5", "C0_6", "C0_7", - "C0_8", "C0_9", "C0_10", "C0_11", "C0_12", "C0_13", "C0_14", "C0_15", - "C0_16", "C0_17", "C0_18", "C0_19", "C0_20", "C0_21", "C0_22", "C0_23", - "C0_24", "C0_25", "C0_26", "C0_27", "C0_28", "C0_29", "C0_30", "C0_31", - - "C2D0", "C2D1", "C2D2", "C2D3", "C2D4", "C2D5", "C2D6", "C2D7", - "C2D8", "C2D9", "C2D10", "C2D11", "C2D12", "C2D13", "C2D14", "C2D15", - "C2D16", "C2D17", "C2D18", "C2D19", "C2D20", "C2D21", "C2D22", "C2D23", - "C2D24", "C2D25", "C2D26", "C2D27", "C2D28", "C2D29", "C2D30", "C2D31", - - "C2C0", "C2C1", "C2C2", "C2C3", "C2C4", "C2C5", "C2C6", "C2C7", - "C2C8", "C2C9", "C2C10", "C2C11", "C2C12", "C2C13", "C2C14", "C2C15", - "C2C16", "C2C17", "C2C18", "C2C19", "C2C20", "C2C21", "C2C22", "C2C23", - "C2C24", "C2C25", "C2C26", "C2C27", "C2C28", "C2C29", "C2C30", "C2C31", - - "PC", "code", "cycle", "interrupt", -}; - -static struct { - int reg; - u32 val, val_expect; - u32 pc, cycle; -} miss_log[64]; -static int miss_log_i; -#define miss_log_len (sizeof(miss_log)/sizeof(miss_log[0])) -#define miss_log_mask (miss_log_len-1) - -static void miss_log_add(int reg, u32 val, u32 val_expect, u32 pc, u32 cycle) -{ - miss_log[miss_log_i].reg = reg; - miss_log[miss_log_i].val = val; - miss_log[miss_log_i].val_expect = val_expect; - miss_log[miss_log_i].pc = pc; - miss_log[miss_log_i].cycle = cycle; - miss_log_i = (miss_log_i + 1) & miss_log_mask; -} - -void breakme() {} - -void do_insn_cmp(void) -{ - static psxRegisters rregs; - static u32 mem_addr, mem_val; - u32 *allregs_p = (void *)&psxRegs; - u32 *allregs_e = (void *)&rregs; - static u32 ppc, failcount; - int i, ret, bad = 0, which_event = -1; - u32 ev_cycles = 0; - u8 code; - - if (f == NULL) - f = fopen("tracelog", "rb"); - - while (1) { - if ((ret = fread(&code, 1, 1, f)) <= 0) - break; - if (ret <= 0) - break; - if (code == 0xff) - break; - switch (code) { - case 0xfc: - which_event = 0; - fread(&which_event, 1, 1, f); - fread(&ev_cycles, 1, 4, f); - continue; - case 0xfd: - fread(&mem_addr, 1, 4, f); - continue; - case 0xfe: - fread(&mem_val, 1, 4, f); - continue; - } - fread(&allregs_e[code], 1, 4, f); - } - - if (ret <= 0) { - printf("EOF?\n"); - goto end; - } - - psxRegs.code = rregs.code; // don't care - psxRegs.cycle = rregs.cycle; - psxRegs.CP0.r[9] = rregs.CP0.r[9]; // Count - - //if (psxRegs.cycle == 166172) breakme(); - - if (memcmp(&psxRegs, &rregs, offsetof(psxRegisters, intCycle)) == 0 && - mem_val == memcheck_read(mem_addr) - ) { - failcount = 0; - goto ok; - } - - for (i = 0; i < offsetof(psxRegisters, intCycle) / 4; i++) { - if (allregs_p[i] != allregs_e[i]) { - miss_log_add(i, allregs_p[i], allregs_e[i], psxRegs.pc, psxRegs.cycle); - bad++; - } - } - - if (mem_val != memcheck_read(mem_addr)) { - printf("bad mem @%08x: %08x %08x\n", mem_addr, memcheck_read(mem_addr), mem_val); - goto end; - } - - if (which_event >= 0 && event_cycles[which_event] != ev_cycles) { - printf("bad ev_cycles #%d: %08x %08x\n", which_event, event_cycles[which_event], ev_cycles); - goto end; - } - - if (psxRegs.pc == rregs.pc && bad < 6 && failcount < 32) { - static int last_mcycle; - if (last_mcycle != psxRegs.cycle >> 20) { - printf("%u\n", psxRegs.cycle); - last_mcycle = psxRegs.cycle >> 20; - } - failcount++; - goto ok; - } - -end: - for (i = 0; i < miss_log_len; i++, miss_log_i = (miss_log_i + 1) & miss_log_mask) - printf("bad %5s: %08x %08x, pc=%08x, cycle %u\n", - regnames[miss_log[miss_log_i].reg], miss_log[miss_log_i].val, - miss_log[miss_log_i].val_expect, miss_log[miss_log_i].pc, miss_log[miss_log_i].cycle); - printf("-- %d\n", bad); - for (i = 0; i < 8; i++) - printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i], - i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]); - printf("PC: %08x/%08x, cycle %u\n", psxRegs.pc, ppc, psxRegs.cycle); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxram.dump", psxM, 0x200000); - dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000); - exit(1); -ok: - psxRegs.cycle = rregs.cycle + 2; // sync timing - ppc = psxRegs.pc; -} - -#endif diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h deleted file mode 100644 index 73f842b..0000000 --- a/libpcsxcore/new_dynarec/emu_if.h +++ /dev/null @@ -1,108 +0,0 @@ -#include "new_dynarec.h" -#include "../r3000a.h" - -extern char invalid_code[0x100000]; - -/* weird stuff */ -#define EAX 0 -#define ECX 1 - -/* same as psxRegs */ -extern int reg[]; - -/* same as psxRegs.GPR.n.* */ -extern int hi, lo; - -/* same as psxRegs.CP0.n.* */ -extern int reg_cop0[]; -#define Status psxRegs.CP0.n.Status -#define Cause psxRegs.CP0.n.Cause -#define EPC psxRegs.CP0.n.EPC -#define BadVAddr psxRegs.CP0.n.BadVAddr -#define Context psxRegs.CP0.n.Context -#define EntryHi psxRegs.CP0.n.EntryHi -#define Count psxRegs.cycle // psxRegs.CP0.n.Count - -/* COP2/GTE */ -enum gte_opcodes { - GTE_RTPS = 0x01, - GTE_NCLIP = 0x06, - GTE_OP = 0x0c, - GTE_DPCS = 0x10, - GTE_INTPL = 0x11, - GTE_MVMVA = 0x12, - GTE_NCDS = 0x13, - GTE_CDP = 0x14, - GTE_NCDT = 0x16, - GTE_NCCS = 0x1b, - GTE_CC = 0x1c, - GTE_NCS = 0x1e, - GTE_NCT = 0x20, - GTE_SQR = 0x28, - GTE_DCPL = 0x29, - GTE_DPCT = 0x2a, - GTE_AVSZ3 = 0x2d, - GTE_AVSZ4 = 0x2e, - GTE_RTPT = 0x30, - GTE_GPF = 0x3d, - GTE_GPL = 0x3e, - GTE_NCCT = 0x3f, -}; - -extern int reg_cop2d[], reg_cop2c[]; -extern void *gte_handlers[64]; -extern void *gte_handlers_nf[64]; -extern const char *gte_regnames[64]; -extern const char gte_cycletab[64]; -extern const uint64_t gte_reg_reads[64]; -extern const uint64_t gte_reg_writes[64]; - -/* dummy */ -extern int FCR0, FCR31; - -/* mem */ -extern void *mem_rtab; -extern void *mem_wtab; - -void jump_handler_read8(u32 addr, u32 *table, u32 cycles); -void jump_handler_read16(u32 addr, u32 *table, u32 cycles); -void jump_handler_read32(u32 addr, u32 *table, u32 cycles); -void jump_handler_write8(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write16(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write32(u32 addr, u32 data, u32 cycles, u32 *table); -void jump_handler_write_h(u32 addr, u32 data, u32 cycles, void *handler); -void jump_handle_swl(u32 addr, u32 data, u32 cycles); -void jump_handle_swr(u32 addr, u32 data, u32 cycles); -void rcnt0_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt0_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt1_read_count_m1(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m0(u32 addr, u32, u32 cycles); -void rcnt2_read_count_m1(u32 addr, u32, u32 cycles); - -extern unsigned int address; -extern void *psxH_ptr; -extern void *zeromem_ptr; -extern void *scratch_buf_ptr; - -// same as invalid_code, just a region for ram write checks (inclusive) -extern u32 inv_code_start, inv_code_end; - -/* cycles/irqs */ -extern u32 next_interupt; -extern int pending_exception; - -/* called by drc */ -void pcsx_mtc0(u32 reg, u32 val); -void pcsx_mtc0_ds(u32 reg, u32 val); - -/* misc */ -extern void (*psxHLEt[])(); - -extern void SysPrintf(const char *fmt, ...); - -#ifdef RAM_FIXED -#define rdram ((u_int)0x80000000) -#else -#define rdram ((u_int)psxM) -#endif diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S deleted file mode 100644 index 147b0df..0000000 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ /dev/null @@ -1,866 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * linkage_arm.s for PCSX * - * Copyright (C) 2009-2011 Ari64 * - * Copyright (C) 2010-2013 Gražvydas "notaz" Ignotas * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU General Public License * - * along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include "arm_features.h" -#include "new_dynarec_config.h" -#include "linkage_offsets.h" - - -#ifdef __MACH__ -#define dynarec_local ESYM(dynarec_local) -#define add_link ESYM(add_link) -#define new_recompile_block ESYM(new_recompile_block) -#define get_addr ESYM(get_addr) -#define get_addr_ht ESYM(get_addr_ht) -#define clean_blocks ESYM(clean_blocks) -#define gen_interupt ESYM(gen_interupt) -#define psxException ESYM(psxException) -#define execI ESYM(execI) -#define invalidate_addr ESYM(invalidate_addr) -#endif - - .bss - .align 4 - .global dynarec_local - .type dynarec_local, %object - .size dynarec_local, LO_dynarec_local_size -dynarec_local: - .space LO_dynarec_local_size - -#define DRC_VAR_(name, vname, size_) \ - vname = dynarec_local + LO_##name; \ - .global vname; \ - .type vname, %object; \ - .size vname, size_ - -#define DRC_VAR(name, size_) \ - DRC_VAR_(name, ESYM(name), size_) - -DRC_VAR(next_interupt, 4) -DRC_VAR(cycle_count, 4) -DRC_VAR(last_count, 4) -DRC_VAR(pending_exception, 4) -DRC_VAR(stop, 4) -DRC_VAR(invc_ptr, 4) -DRC_VAR(address, 4) -DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) - -/* psxRegs */ -DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) -DRC_VAR(reg_cop0, 128) -DRC_VAR(reg_cop2d, 128) -DRC_VAR(reg_cop2c, 128) -DRC_VAR(pcaddr, 4) -@DRC_VAR(code, 4) -@DRC_VAR(cycle, 4) -@DRC_VAR(interrupt, 4) -@DRC_VAR(intCycle, 256) - -DRC_VAR(rcnts, 7*4*4) -DRC_VAR(mem_rtab, 4) -DRC_VAR(mem_wtab, 4) -DRC_VAR(psxH_ptr, 4) -DRC_VAR(zeromem_ptr, 4) -DRC_VAR(inv_code_start, 4) -DRC_VAR(inv_code_end, 4) -DRC_VAR(branch_target, 4) -DRC_VAR(scratch_buf_ptr, 4) -@DRC_VAR(align0, 12) /* unused/alignment */ -DRC_VAR(mini_ht, 256) -DRC_VAR(restore_candidate, 512) - -/* unused */ -DRC_VAR(FCR0, 4) -DRC_VAR(FCR31, 4) - -#ifdef __MACH__ - .data - .align 2 -ptr_jump_in: - .word ESYM(jump_in) -ptr_jump_dirty: - .word ESYM(jump_dirty) -ptr_hash_table: - .word ESYM(hash_table) -#endif - - - .syntax unified - .text - .align 2 - -#ifndef HAVE_ARMV5 -.macro blx rd - mov lr, pc - bx \rd -.endm -#endif - -.macro load_varadr reg var -#if defined(HAVE_ARMV7) && !defined(__PIC__) - movw \reg, #:lower16:\var - movt \reg, #:upper16:\var -#elif defined(HAVE_ARMV7) && defined(__MACH__) - movw \reg, #:lower16:(\var-(1678f+8)) - movt \reg, #:upper16:(\var-(1678f+8)) -1678: - add \reg, pc -#else - ldr \reg, =\var -#endif -.endm - -.macro load_varadr_ext reg var -#if defined(HAVE_ARMV7) && defined(__MACH__) && defined(__PIC__) - movw \reg, #:lower16:(ptr_\var-(1678f+8)) - movt \reg, #:upper16:(ptr_\var-(1678f+8)) -1678: - ldr \reg, [pc, \reg] -#else - load_varadr \reg \var -#endif -.endm - -.macro mov_16 reg imm -#ifdef HAVE_ARMV7 - movw \reg, #\imm -#else - mov \reg, #(\imm & 0x00ff) - orr \reg, #(\imm & 0xff00) -#endif -.endm - -.macro mov_24 reg imm -#ifdef HAVE_ARMV7 - movw \reg, #(\imm & 0xffff) - movt \reg, #(\imm >> 16) -#else - mov \reg, #(\imm & 0x0000ff) - orr \reg, #(\imm & 0x00ff00) - orr \reg, #(\imm & 0xff0000) -#endif -.endm - -/* r0 = virtual target address */ -/* r1 = instruction to patch */ -.macro dyna_linker_main -#ifndef NO_WRITE_EXEC - load_varadr_ext r3, jump_in - /* get_page */ - lsr r2, r0, #12 - mov r6, #4096 - bic r2, r2, #0xe0000 - sub r6, r6, #1 - cmp r2, #0x1000 - ldr r7, [r1] - biclt r2, #0x0e00 - and r6, r6, r2 - cmp r2, #2048 - add r12, r7, #2 - orrcs r2, r6, #2048 - ldr r5, [r3, r2, lsl #2] - lsl r12, r12, #8 - add r6, r1, r12, asr #6 - mov r8, #0 - /* jump_in lookup */ -1: - movs r4, r5 - beq 2f - ldr r3, [r5] /* ll_entry .vaddr */ - ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */ - teq r3, r0 - bne 1b - teq r4, r6 - moveq pc, r4 /* Stale i-cache */ - mov r8, r4 - b 1b /* jump_in may have dupes, continue search */ -2: - tst r8, r8 - beq 3f /* r0 not in jump_in */ - - mov r5, r1 - mov r1, r6 - bl add_link - sub r2, r8, r5 - and r1, r7, #0xff000000 - lsl r2, r2, #6 - sub r1, r1, #2 - add r1, r1, r2, lsr #8 - str r1, [r5] - mov pc, r8 -3: - /* hash_table lookup */ - cmp r2, #2048 - load_varadr_ext r3, jump_dirty - eor r4, r0, r0, lsl #16 - lslcc r2, r0, #9 - load_varadr_ext r6, hash_table - lsr r4, r4, #12 - lsrcc r2, r2, #21 - bic r4, r4, #15 - ldr r5, [r3, r2, lsl #2] - ldr r7, [r6, r4]! - teq r7, r0 - ldreq pc, [r6, #4] - ldr r7, [r6, #8] - teq r7, r0 - ldreq pc, [r6, #12] - /* jump_dirty lookup */ -6: - movs r4, r5 - beq 8f - ldr r3, [r5] - ldr r5, [r4, #12] - teq r3, r0 - bne 6b -7: - ldr r1, [r4, #8] - /* hash_table insert */ - ldr r2, [r6] - ldr r3, [r6, #4] - str r0, [r6] - str r1, [r6, #4] - str r2, [r6, #8] - str r3, [r6, #12] - mov pc, r1 -8: -#else - /* XXX: should be able to do better than this... */ - bl get_addr_ht - mov pc, r0 -#endif -.endm - - -FUNCTION(dyna_linker): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - dyna_linker_main - - mov r4, r0 - mov r5, r1 - bl new_recompile_block - tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker - /* pagefault */ - mov r1, r0 - mov r2, #8 - .size dyna_linker, .-dyna_linker - -FUNCTION(exec_pagefault): - /* r0 = instruction pointer */ - /* r1 = fault address */ - /* r2 = cause */ - ldr r3, [fp, #LO_reg_cop0+48] /* Status */ - mvn r6, #0xF000000F - ldr r4, [fp, #LO_reg_cop0+16] /* Context */ - bic r6, r6, #0x0F800000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r3, r3, #2 - str r1, [fp, #LO_reg_cop0+32] /* BadVAddr */ - bic r4, r4, r6 - str r3, [fp, #LO_reg_cop0+48] /* Status */ - and r5, r6, r1, lsr #9 - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - and r1, r1, r6, lsl #9 - str r1, [fp, #LO_reg_cop0+40] /* EntryHi */ - orr r4, r4, r5 - str r4, [fp, #LO_reg_cop0+16] /* Context */ - mov r0, #0x80000000 - bl get_addr_ht - mov pc, r0 - .size exec_pagefault, .-exec_pagefault - -/* Special dynamic linker for the case where a page fault - may occur in a branch delay slot */ -FUNCTION(dyna_linker_ds): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ - dyna_linker_main - - mov r4, r0 - bic r0, r0, #7 - mov r5, r1 - orr r0, r0, #1 - bl new_recompile_block - tst r0, r0 - mov r0, r4 - mov r1, r5 - beq dyna_linker_ds - /* pagefault */ - bic r1, r0, #7 - mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */ - sub r0, r1, #4 - b exec_pagefault - .size dyna_linker_ds, .-dyna_linker_ds - - .align 2 - -FUNCTION(jump_vaddr_r0): - eor r2, r0, r0, lsl #16 - b jump_vaddr - .size jump_vaddr_r0, .-jump_vaddr_r0 -FUNCTION(jump_vaddr_r1): - eor r2, r1, r1, lsl #16 - mov r0, r1 - b jump_vaddr - .size jump_vaddr_r1, .-jump_vaddr_r1 -FUNCTION(jump_vaddr_r2): - mov r0, r2 - eor r2, r2, r2, lsl #16 - b jump_vaddr - .size jump_vaddr_r2, .-jump_vaddr_r2 -FUNCTION(jump_vaddr_r3): - eor r2, r3, r3, lsl #16 - mov r0, r3 - b jump_vaddr - .size jump_vaddr_r3, .-jump_vaddr_r3 -FUNCTION(jump_vaddr_r4): - eor r2, r4, r4, lsl #16 - mov r0, r4 - b jump_vaddr - .size jump_vaddr_r4, .-jump_vaddr_r4 -FUNCTION(jump_vaddr_r5): - eor r2, r5, r5, lsl #16 - mov r0, r5 - b jump_vaddr - .size jump_vaddr_r5, .-jump_vaddr_r5 -FUNCTION(jump_vaddr_r6): - eor r2, r6, r6, lsl #16 - mov r0, r6 - b jump_vaddr - .size jump_vaddr_r6, .-jump_vaddr_r6 -FUNCTION(jump_vaddr_r8): - eor r2, r8, r8, lsl #16 - mov r0, r8 - b jump_vaddr - .size jump_vaddr_r8, .-jump_vaddr_r8 -FUNCTION(jump_vaddr_r9): - eor r2, r9, r9, lsl #16 - mov r0, r9 - b jump_vaddr - .size jump_vaddr_r9, .-jump_vaddr_r9 -FUNCTION(jump_vaddr_r10): - eor r2, r10, r10, lsl #16 - mov r0, r10 - b jump_vaddr - .size jump_vaddr_r10, .-jump_vaddr_r10 -FUNCTION(jump_vaddr_r12): - eor r2, r12, r12, lsl #16 - mov r0, r12 - b jump_vaddr - .size jump_vaddr_r12, .-jump_vaddr_r12 -FUNCTION(jump_vaddr_r7): - eor r2, r7, r7, lsl #16 - add r0, r7, #0 - .size jump_vaddr_r7, .-jump_vaddr_r7 -FUNCTION(jump_vaddr): - load_varadr_ext r1, hash_table - mvn r3, #15 - and r2, r3, r2, lsr #12 - ldr r2, [r1, r2]! - teq r2, r0 - ldreq pc, [r1, #4] - ldr r2, [r1, #8] - teq r2, r0 - ldreq pc, [r1, #12] - str r10, [fp, #LO_cycle_count] - bl get_addr - ldr r10, [fp, #LO_cycle_count] - mov pc, r0 - .size jump_vaddr, .-jump_vaddr - - .align 2 - -FUNCTION(verify_code_ds): - str r8, [fp, #LO_branch_target] -FUNCTION(verify_code_vm): -FUNCTION(verify_code): - /* r1 = source */ - /* r2 = target */ - /* r3 = length */ - tst r3, #4 - mov r4, #0 - add r3, r1, r3 - mov r5, #0 - ldrne r4, [r1], #4 - mov r12, #0 - ldrne r5, [r2], #4 - teq r1, r3 - beq .D3 -.D2: - ldr r7, [r1], #4 - eor r9, r4, r5 - ldr r8, [r2], #4 - orrs r9, r9, r12 - bne .D4 - ldr r4, [r1], #4 - eor r12, r7, r8 - ldr r5, [r2], #4 - cmp r1, r3 - bcc .D2 - teq r7, r8 -.D3: - teqeq r4, r5 -.D4: - ldr r8, [fp, #LO_branch_target] - moveq pc, lr -.D5: - bl get_addr - mov pc, r0 - .size verify_code, .-verify_code - .size verify_code_vm, .-verify_code_vm - - .align 2 -FUNCTION(cc_interrupt): - ldr r0, [fp, #LO_last_count] - mov r1, #0 - mov r2, #0x1fc - add r10, r0, r10 - str r1, [fp, #LO_pending_exception] - and r2, r2, r10, lsr #17 - add r3, fp, #LO_restore_candidate - str r10, [fp, #LO_cycle] /* PCSX cycles */ -@@ str r10, [fp, #LO_reg_cop0+36] /* Count */ - ldr r4, [r2, r3] - mov r10, lr - tst r4, r4 - bne .E4 -.E1: - bl gen_interupt - mov lr, r10 - ldr r10, [fp, #LO_cycle] - ldr r0, [fp, #LO_next_interupt] - ldr r1, [fp, #LO_pending_exception] - ldr r2, [fp, #LO_stop] - str r0, [fp, #LO_last_count] - sub r10, r10, r0 - tst r2, r2 - ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - tst r1, r1 - moveq pc, lr -.E2: - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - mov pc, r0 -.E4: - /* Move 'dirty' blocks to the 'clean' list */ - lsl r5, r2, #3 - str r1, [r2, r3] -.E5: - lsrs r4, r4, #1 - mov r0, r5 - add r5, r5, #1 - blcs clean_blocks - tst r5, #31 - bne .E5 - b .E1 - .size cc_interrupt, .-cc_interrupt - - .align 2 -FUNCTION(do_interrupt): - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - add r10, r10, #2 - mov pc, r0 - .size do_interrupt, .-do_interrupt - - .align 2 -FUNCTION(fp_exception): - mov r2, #0x10000000 -.E7: - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - add r2, r2, #0x2c - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size fp_exception, .-fp_exception - .align 2 -FUNCTION(fp_exception_ds): - mov r2, #0x90000000 /* Set high bit if delay slot */ - b .E7 - .size fp_exception_ds, .-fp_exception_ds - - .align 2 -FUNCTION(jump_syscall): - ldr r1, [fp, #LO_reg_cop0+48] /* Status */ - mov r3, #0x80000000 - str r0, [fp, #LO_reg_cop0+56] /* EPC */ - orr r1, #2 - mov r2, #0x20 - str r1, [fp, #LO_reg_cop0+48] /* Status */ - str r2, [fp, #LO_reg_cop0+52] /* Cause */ - add r0, r3, #0x80 - bl get_addr_ht - mov pc, r0 - .size jump_syscall, .-jump_syscall - .align 2 - - .align 2 -FUNCTION(jump_syscall_hle): - str r0, [fp, #LO_pcaddr] /* PC must be set to EPC for psxException */ - ldr r2, [fp, #LO_last_count] - mov r1, #0 /* in delay slot */ - add r2, r2, r10 - mov r0, #0x20 /* cause */ - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bl psxException - - /* note: psxException might do recursive recompiler call from it's HLE code, - * so be ready for this */ -pcsx_return: - ldr r1, [fp, #LO_next_interupt] - ldr r10, [fp, #LO_cycle] - ldr r0, [fp, #LO_pcaddr] - sub r10, r10, r1 - str r1, [fp, #LO_last_count] - bl get_addr_ht - mov pc, r0 - .size jump_syscall_hle, .-jump_syscall_hle - - .align 2 -FUNCTION(jump_hlecall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - bx r1 - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(jump_intcall): - ldr r2, [fp, #LO_last_count] - str r0, [fp, #LO_pcaddr] - add r2, r2, r10 - adr lr, pcsx_return - str r2, [fp, #LO_cycle] /* PCSX cycle counter */ - b execI - .size jump_hlecall, .-jump_hlecall - - .align 2 -FUNCTION(new_dyna_leave): - ldr r0, [fp, #LO_last_count] - add r12, fp, #28 - add r10, r0, r10 - str r10, [fp, #LO_cycle] - ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc} - .size new_dyna_leave, .-new_dyna_leave - - .align 2 -FUNCTION(invalidate_addr_r0): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - b invalidate_addr_call - .size invalidate_addr_r0, .-invalidate_addr_r0 - .align 2 -FUNCTION(invalidate_addr_r1): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r1 - b invalidate_addr_call - .size invalidate_addr_r1, .-invalidate_addr_r1 - .align 2 -FUNCTION(invalidate_addr_r2): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r2 - b invalidate_addr_call - .size invalidate_addr_r2, .-invalidate_addr_r2 - .align 2 -FUNCTION(invalidate_addr_r3): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r3 - b invalidate_addr_call - .size invalidate_addr_r3, .-invalidate_addr_r3 - .align 2 -FUNCTION(invalidate_addr_r4): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r4 - b invalidate_addr_call - .size invalidate_addr_r4, .-invalidate_addr_r4 - .align 2 -FUNCTION(invalidate_addr_r5): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r5 - b invalidate_addr_call - .size invalidate_addr_r5, .-invalidate_addr_r5 - .align 2 -FUNCTION(invalidate_addr_r6): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r6 - b invalidate_addr_call - .size invalidate_addr_r6, .-invalidate_addr_r6 - .align 2 -FUNCTION(invalidate_addr_r7): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r7 - b invalidate_addr_call - .size invalidate_addr_r7, .-invalidate_addr_r7 - .align 2 -FUNCTION(invalidate_addr_r8): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r8 - b invalidate_addr_call - .size invalidate_addr_r8, .-invalidate_addr_r8 - .align 2 -FUNCTION(invalidate_addr_r9): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r9 - b invalidate_addr_call - .size invalidate_addr_r9, .-invalidate_addr_r9 - .align 2 -FUNCTION(invalidate_addr_r10): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r10 - b invalidate_addr_call - .size invalidate_addr_r10, .-invalidate_addr_r10 - .align 2 -FUNCTION(invalidate_addr_r12): - stmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, lr} - mov r0, r12 - .size invalidate_addr_r12, .-invalidate_addr_r12 - .align 2 -invalidate_addr_call: - ldr r12, [fp, #LO_inv_code_start] - ldr lr, [fp, #LO_inv_code_end] - cmp r0, r12 - cmpcs lr, r0 - blcc invalidate_addr - ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} - .size invalidate_addr_call, .-invalidate_addr_call - - .align 2 -FUNCTION(new_dyna_start): - /* ip is stored to conform EABI alignment */ - stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} - load_varadr fp, dynarec_local - ldr r0, [fp, #LO_pcaddr] - bl get_addr_ht - ldr r1, [fp, #LO_next_interupt] - ldr r10, [fp, #LO_cycle] - str r1, [fp, #LO_last_count] - sub r10, r10, r1 - mov pc, r0 - .size new_dyna_start, .-new_dyna_start - -/* --------------------------------------- */ - -.align 2 - -.macro pcsx_read_mem readop tab_shift - /* r0 = address, r1 = handler_tab, r2 = cycles */ - lsl r3, r0, #20 - lsr r3, #(20+\tab_shift) - ldr r12, [fp, #LO_last_count] - ldr r1, [r1, r3, lsl #2] - add r2, r2, r12 - lsls r1, #1 -.if \tab_shift == 1 - lsl r3, #1 - \readop r0, [r1, r3] -.else - \readop r0, [r1, r3, lsl #\tab_shift] -.endif - movcc pc, lr - str r2, [fp, #LO_cycle] - bx r1 -.endm - -FUNCTION(jump_handler_read8): - add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_read_mem ldrbcc, 0 - -FUNCTION(jump_handler_read16): - add r1, #0x1000/4*4 @ shift to r16 part - pcsx_read_mem ldrhcc, 1 - -FUNCTION(jump_handler_read32): - pcsx_read_mem ldrcc, 2 - - -.macro pcsx_write_mem wrtop tab_shift - /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ - lsl r12,r0, #20 - lsr r12, #(20+\tab_shift) - ldr r3, [r3, r12, lsl #2] - str r0, [fp, #LO_address] @ some handlers still need it.. - lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store -.if \tab_shift == 1 - lsl r12, #1 - \wrtop r1, [r3, r12] -.else - \wrtop r1, [r3, r12, lsl #\tab_shift] -.endif - movcc pc, lr - ldr r12, [fp, #LO_last_count] - mov r0, r1 - add r2, r2, r12 - push {r2, lr} - str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 -.endm - -FUNCTION(jump_handler_write8): - add r3, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_write_mem strbcc, 0 - -FUNCTION(jump_handler_write16): - add r3, #0x1000/4*4 @ shift to r16 part - pcsx_write_mem strhcc, 1 - -FUNCTION(jump_handler_write32): - pcsx_write_mem strcc, 2 - -FUNCTION(jump_handler_write_h): - /* r0 = address, r1 = data, r2 = cycles, r3 = handler */ - ldr r12, [fp, #LO_last_count] - str r0, [fp, #LO_address] @ some handlers still need it.. - add r2, r2, r12 - mov r0, r1 - push {r2, lr} - str r2, [fp, #LO_cycle] - blx r3 - - ldr r0, [fp, #LO_next_interupt] - pop {r2, r3} - str r0, [fp, #LO_last_count] - sub r0, r2, r0 - bx r3 - -FUNCTION(jump_handle_swl): - /* r0 = address, r1 = data, r2 = cycles */ - ldr r3, [fp, #LO_mem_wtab] - mov r12,r0,lsr #12 - ldr r3, [r3, r12, lsl #2] - lsls r3, #1 - bcs 4f - add r3, r0, r3 - mov r0, r2 - tst r3, #2 - beq 101f - tst r3, #1 - beq 2f -3: - str r1, [r3, #-3] - bx lr -2: - lsr r2, r1, #8 - lsr r1, #24 - strh r2, [r3, #-2] - strb r1, [r3] - bx lr -101: - tst r3, #1 - lsrne r1, #16 @ 1 - lsreq r12, r1, #24 @ 0 - strhne r1, [r3, #-1] - strbeq r12, [r3] - bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? - - -FUNCTION(jump_handle_swr): - /* r0 = address, r1 = data, r2 = cycles */ - ldr r3, [fp, #LO_mem_wtab] - mov r12,r0,lsr #12 - ldr r3, [r3, r12, lsl #2] - lsls r3, #1 - bcs 4f - add r3, r0, r3 - and r12,r3, #3 - mov r0, r2 - cmp r12,#2 - strbgt r1, [r3] @ 3 - strheq r1, [r3] @ 2 - cmp r12,#1 - strlt r1, [r3] @ 0 - bxne lr - lsr r2, r1, #8 @ 1 - strb r1, [r3] - strh r2, [r3, #1] - bx lr -4: - mov r0, r2 -@ b abort - bx lr @ TODO? - - -.macro rcntx_read_mode0 num - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*\num] @ cycleStart - mov r0, r2, lsl #16 - sub r0, r0, r3, lsl #16 - lsr r0, #16 - bx lr -.endm - -FUNCTION(rcnt0_read_count_m0): - rcntx_read_mode0 0 - -FUNCTION(rcnt1_read_count_m0): - rcntx_read_mode0 1 - -FUNCTION(rcnt2_read_count_m0): - rcntx_read_mode0 2 - -FUNCTION(rcnt0_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*0] @ cycleStart - mov_16 r1, 0x3334 - sub r2, r2, r3 - mul r0, r1, r2 @ /= 5 - lsr r0, #16 - bx lr - -FUNCTION(rcnt1_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*1] - mov_24 r1, 0x1e6cde - sub r2, r2, r3 - umull r3, r0, r1, r2 @ ~ /= hsync_cycles, max ~0x1e6cdd - bx lr - -FUNCTION(rcnt2_read_count_m1): - /* r0 = address, r2 = cycles */ - ldr r3, [fp, #LO_rcnts+6*4+7*4*2] - mov r0, r2, lsl #16-3 - sub r0, r0, r3, lsl #16-3 - lsr r0, #16 @ /= 8 - bx lr - -@ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/linkage_offsets.h b/libpcsxcore/new_dynarec/linkage_offsets.h deleted file mode 100644 index f7e1911..0000000 --- a/libpcsxcore/new_dynarec/linkage_offsets.h +++ /dev/null @@ -1,41 +0,0 @@ - -#define LO_next_interupt 64 -#define LO_cycle_count (LO_next_interupt + 4) -#define LO_last_count (LO_cycle_count + 4) -#define LO_pending_exception (LO_last_count + 4) -#define LO_stop (LO_pending_exception + 4) -#define LO_invc_ptr (LO_stop + 4) -#define LO_address (LO_invc_ptr + 4) -#define LO_psxRegs (LO_address + 4) -#define LO_reg (LO_psxRegs) -#define LO_lo (LO_reg + 128) -#define LO_hi (LO_lo + 4) -#define LO_reg_cop0 (LO_hi + 4) -#define LO_reg_cop2d (LO_reg_cop0 + 128) -#define LO_reg_cop2c (LO_reg_cop2d + 128) -#define LO_PC (LO_reg_cop2c + 128) -#define LO_pcaddr (LO_PC) -#define LO_code (LO_PC + 4) -#define LO_cycle (LO_code + 4) -#define LO_interrupt (LO_cycle + 4) -#define LO_intCycle (LO_interrupt + 4) -#define LO_psxRegs_end (LO_intCycle + 256) -#define LO_rcnts (LO_psxRegs_end) -#define LO_rcnts_end (LO_rcnts + 7*4*4) -#define LO_mem_rtab (LO_rcnts_end) -#define LO_mem_wtab (LO_mem_rtab + 4) -#define LO_psxH_ptr (LO_mem_wtab + 4) -#define LO_zeromem_ptr (LO_psxH_ptr + 4) -#define LO_inv_code_start (LO_zeromem_ptr + 4) -#define LO_inv_code_end (LO_inv_code_start + 4) -#define LO_branch_target (LO_inv_code_end + 4) -#define LO_scratch_buf_ptr (LO_branch_target + 4) -#define LO_align0 (LO_scratch_buf_ptr + 4) -#define LO_mini_ht (LO_align0 + 12) -#define LO_restore_candidate (LO_mini_ht + 256) -#define LO_dynarec_local_size (LO_restore_candidate + 512) - -#define LO_FCR0 (LO_align0) -#define LO_FCR31 (LO_align0) - -#define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index d8c2372..02f335c 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -36,7 +36,7 @@ int getVMBlock(); #endif #include "new_dynarec_config.h" -#include "emu_if.h" //emulator interface +#include "backends/psx/emu_if.h" //emulator interface //#define DISASM //#define assem_debug printf @@ -51,7 +51,7 @@ int getVMBlock(); #include "assem_x64.h" #endif #ifdef __arm__ -#include "assem_arm.h" +#include "arm/assem_arm.h" #endif #ifdef VITA @@ -774,7 +774,7 @@ void alloc_all(struct regstat *cur,int i) #include "assem_x64.c" #endif #ifdef __arm__ -#include "assem_arm.c" +#include "arm/assem_arm.c" #endif // Add virtual address mapping to linked list diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c deleted file mode 100644 index 9376ff4..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2010-2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include -#include "../psxhw.h" -#include "../cdrom.h" -#include "../mdec.h" -#include "../gpu.h" -#include "../psxmem_map.h" -#include "emu_if.h" -#include "pcsxmem.h" - -#ifdef __thumb__ -#error the dynarec is incompatible with Thumb functions, -#error please add -marm to compile flags -#endif - -//#define memprintf printf -#define memprintf(...) - -static u32 *mem_readtab; -static u32 *mem_writetab; -static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; -static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_ffwtab[(1+2+4) * 0x1000 / 4]; -//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; -static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; - -// When this is called in a loop, and 'h' is a function pointer, clang will crash. -#ifdef __clang__ -static __attribute__ ((noinline)) void map_item(u32 *out, const void *h, u32 flag) -#else -static void map_item(u32 *out, const void *h, u32 flag) -#endif -{ - u32 hv = (u32)h; - if (hv & 1) { - SysPrintf("FATAL: %p has LSB set\n", h); - abort(); - } - *out = (hv >> 1) | (flag << 31); -} - -// size must be power of 2, at least 4k -#define map_l1_mem(tab, i, addr, size, base) \ - map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) - -#define IOMEM32(a) (((a) & 0xfff) / 4) -#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) -#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) - -u8 zero_mem[0x1000]; - -u32 read_mem_dummy() -{ - return 0; -} - -static void write_mem_dummy(u32 data) -{ - memprintf("unmapped w %08x, %08x @%08x %u\n", address, data, psxRegs.pc, psxRegs.cycle); -} - -/* IO handlers */ -static u32 io_read_sio16() -{ - return sioRead8() | (sioRead8() << 8); -} - -static u32 io_read_sio32() -{ - return sioRead8() | (sioRead8() << 8) | (sioRead8() << 16) | (sioRead8() << 24); -} - -static void io_write_sio16(u32 value) -{ - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)(value>>8)); -} - -static void io_write_sio32(u32 value) -{ - sioWrite8((unsigned char)value); - sioWrite8((unsigned char)(value >> 8)); - sioWrite8((unsigned char)(value >> 16)); - sioWrite8((unsigned char)(value >> 24)); -} - -#ifndef DRC_DBG - -static void map_rcnt_rcount0(u32 mode) -{ - if (mode & 0x100) { // pixel clock - map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1100)], rcnt0_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], rcnt0_read_count_m0, 1); - } -} - -static void map_rcnt_rcount1(u32 mode) -{ - if (mode & 0x100) { // hcnt - map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1110)], rcnt1_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], rcnt1_read_count_m0, 1); - } -} - -static void map_rcnt_rcount2(u32 mode) -{ - if (mode & 0x01) { // gate - map_item(&mem_iortab[IOMEM32(0x1120)], &psxH[0x1000], 0); - map_item(&mem_iortab[IOMEM16(0x1120)], &psxH[0x1000], 0); - } - else if (mode & 0x200) { // clk/8 - map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m1, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m1, 1); - } - else { - map_item(&mem_iortab[IOMEM32(0x1120)], rcnt2_read_count_m0, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], rcnt2_read_count_m0, 1); - } -} - -#else -#define map_rcnt_rcount0(mode) -#define map_rcnt_rcount1(mode) -#define map_rcnt_rcount2(mode) -#endif - -#define make_rcnt_funcs(i) \ -static u32 io_rcnt_read_count##i() { return psxRcntRcount(i); } \ -static u32 io_rcnt_read_mode##i() { return psxRcntRmode(i); } \ -static u32 io_rcnt_read_target##i() { return psxRcntRtarget(i); } \ -static void io_rcnt_write_count##i(u32 val) { psxRcntWcount(i, val & 0xffff); } \ -static void io_rcnt_write_mode##i(u32 val) { psxRcntWmode(i, val); map_rcnt_rcount##i(val); } \ -static void io_rcnt_write_target##i(u32 val) { psxRcntWtarget(i, val & 0xffff); } - -make_rcnt_funcs(0) -make_rcnt_funcs(1) -make_rcnt_funcs(2) - -static void io_write_ireg16(u32 value) -{ - //if (Config.Sio) psxHu16ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu16ref(0x1070) |= 0x200; - psxHu16ref(0x1070) &= value; -} - -static void io_write_imask16(u32 value) -{ - psxHu16ref(0x1074) = value; - if (psxHu16ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_ireg32(u32 value) -{ - //if (Config.Sio) psxHu32ref(0x1070) |= 0x80; - if (Config.SpuIrq) psxHu32ref(0x1070) |= 0x200; - psxHu32ref(0x1070) &= value; -} - -static void io_write_imask32(u32 value) -{ - psxHu32ref(0x1074) = value; - if (psxHu32ref(0x1070) & value) - new_dyna_set_event(PSXINT_NEWDRC_CHECK, 1); -} - -static void io_write_dma_icr32(u32 value) -{ - u32 tmp = value & 0x00ff803f; - tmp |= (SWAPu32(HW_DMA_ICR) & ~value) & 0x7f000000; - if ((tmp & HW_DMA_ICR_GLOBAL_ENABLE && tmp & 0x7f000000) - || tmp & HW_DMA_ICR_BUS_ERROR) { - if (!(SWAPu32(HW_DMA_ICR) & HW_DMA_ICR_IRQ_SENT)) - psxHu32ref(0x1070) |= SWAP32(8); - tmp |= HW_DMA_ICR_IRQ_SENT; - } - HW_DMA_ICR = SWAPu32(tmp); -} - -#define make_dma_func(n) \ -static void io_write_chcr##n(u32 value) \ -{ \ - HW_DMA##n##_CHCR = value; \ - if (value & 0x01000000 && HW_DMA_PCR & (8 << (n * 4))) { \ - psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, value); \ - } \ -} - -make_dma_func(0) -make_dma_func(1) -make_dma_func(2) -make_dma_func(3) -make_dma_func(4) -make_dma_func(6) - -static void io_spu_write16(u32 value) -{ - // meh - SPU_writeRegister(address, value, psxRegs.cycle); -} - -static void io_spu_write32(u32 value) -{ - SPUwriteRegister wfunc = SPU_writeRegister; - u32 a = address; - - wfunc(a, value & 0xffff, psxRegs.cycle); - wfunc(a + 2, value >> 16, psxRegs.cycle); -} - -static u32 io_gpu_read_status(void) -{ - u32 v; - - // meh2, syncing for img bit, might want to avoid it.. - gpuSyncPluginSR(); - v = HW_GPU_STATUS; - - // XXX: because of large timeslices can't use hSyncCount, using rough - // approximization instead. Perhaps better use hcounter code here or something. - if (hSyncCount < 240 && (HW_GPU_STATUS & PSXGPU_ILACE_BITS) != PSXGPU_ILACE_BITS) - v |= PSXGPU_LCF & (psxRegs.cycle << 20); - return v; -} - -static void io_gpu_write_status(u32 value) -{ - GPU_writeStatus(value); - gpuSyncPluginSR(); -} - -static void map_ram_write(void) -{ - int i; - - for (i = 0; i < (0x800000 >> 12); i++) { - map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); - map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); - } -} - -static void unmap_ram_write(void) -{ - int i; - - for (i = 0; i < (0x800000 >> 12); i++) { - map_item(&mem_writetab[0x80000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0x00000|i], mem_unmwtab, 1); - map_item(&mem_writetab[0xa0000|i], mem_unmwtab, 1); - } -} - -static void write_biu(u32 value) -{ - memprintf("write_biu %08x, %08x @%08x %u\n", address, value, psxRegs.pc, psxRegs.cycle); - - if (address != 0xfffe0130) - return; - - switch (value) { - case 0x800: case 0x804: - unmap_ram_write(); - break; - case 0: case 0x1e988: - map_ram_write(); - break; - default: - printf("write_biu: unexpected val: %08x\n", value); - break; - } -} - -void new_dyna_pcsx_mem_load_state(void) -{ - map_rcnt_rcount0(rcnts[0].mode); - map_rcnt_rcount1(rcnts[1].mode); - map_rcnt_rcount2(rcnts[2].mode); -} - -int pcsxmem_is_handler_dynamic(unsigned int addr) -{ - if ((addr & 0xfffff000) != 0x1f801000) - return 0; - - addr &= 0xffff; - return addr == 0x1100 || addr == 0x1110 || addr == 0x1120; -} - -void new_dyna_pcsx_mem_init(void) -{ - int i; - - // have to map these further to keep tcache close to .text - mem_readtab = psxMap(0x08000000, 0x200000 * 4, 0, MAP_TAG_LUTS); - if (mem_readtab == NULL) { - SysPrintf("failed to map mem tables\n"); - exit(1); - } - mem_writetab = mem_readtab + 0x100000; - - // 1st level lookup: - // 0: direct mem - // 1: use 2nd lookup - // 2nd level lookup: - // 0: direct mem variable - // 1: memhandler - - // default/unmapped memhandlers - for (i = 0; i < 0x100000; i++) { - //map_item(&mem_readtab[i], mem_unmrtab, 1); - map_l1_mem(mem_readtab, i, 0, 0x1000, zero_mem); - map_item(&mem_writetab[i], mem_unmwtab, 1); - } - - // RAM and it's mirrors - for (i = 0; i < (0x800000 >> 12); i++) { - map_l1_mem(mem_readtab, i, 0x80000000, 0x200000, psxM); - map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); - map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); - } - map_ram_write(); - - // BIOS and it's mirrors - for (i = 0; i < (0x80000 >> 12); i++) { - map_l1_mem(mem_readtab, i, 0x1fc00000, 0x80000, psxR); - map_l1_mem(mem_readtab, i, 0xbfc00000, 0x80000, psxR); - } - - // scratchpad - map_l1_mem(mem_readtab, 0, 0x1f800000, 0x1000, psxH); - map_l1_mem(mem_readtab, 0, 0x9f800000, 0x1000, psxH); - map_l1_mem(mem_writetab, 0, 0x1f800000, 0x1000, psxH); - map_l1_mem(mem_writetab, 0, 0x9f800000, 0x1000, psxH); - - // I/O - map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0x9f801000 >> 12], mem_iortab, 1); - map_item(&mem_readtab[0xbf801000 >> 12], mem_iortab, 1); - map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0x9f801000 >> 12], mem_iowtab, 1); - map_item(&mem_writetab[0xbf801000 >> 12], mem_iowtab, 1); - - // L2 - // unmapped tables - for (i = 0; i < (1+2+4) * 0x1000 / 4; i++) - map_item(&mem_unmwtab[i], write_mem_dummy, 1); - - // fill IO tables - for (i = 0; i < 0x1000/4; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - for (; i < 0x1000/4 + 0x1000/2; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - for (; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { - map_item(&mem_iortab[i], &psxH[0x1000], 0); - map_item(&mem_iowtab[i], &psxH[0x1000], 0); - } - - map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); - map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); - map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); - map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); - map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); - map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); - map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); - map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); -// map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - map_item(&mem_iortab[IOMEM32(0x1814)], io_gpu_read_status, 1); - map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); - map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); - - map_item(&mem_iortab[IOMEM16(0x1040)], io_read_sio16, 1); - map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); - map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); - map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); - map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); - map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); - map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); - map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); - map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); - map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); - map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); - map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); - map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); - map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); - - map_item(&mem_iortab[IOMEM8(0x1040)], sioRead8, 1); - map_item(&mem_iortab[IOMEM8(0x1800)], cdrRead0, 1); - map_item(&mem_iortab[IOMEM8(0x1801)], cdrRead1, 1); - map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); - map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); - - // write(u32 data) - map_item(&mem_iowtab[IOMEM32(0x1040)], io_write_sio32, 1); - map_item(&mem_iowtab[IOMEM32(0x1070)], io_write_ireg32, 1); - map_item(&mem_iowtab[IOMEM32(0x1074)], io_write_imask32, 1); - map_item(&mem_iowtab[IOMEM32(0x1088)], io_write_chcr0, 1); - map_item(&mem_iowtab[IOMEM32(0x1098)], io_write_chcr1, 1); - map_item(&mem_iowtab[IOMEM32(0x10a8)], io_write_chcr2, 1); - map_item(&mem_iowtab[IOMEM32(0x10b8)], io_write_chcr3, 1); - map_item(&mem_iowtab[IOMEM32(0x10c8)], io_write_chcr4, 1); - map_item(&mem_iowtab[IOMEM32(0x10e8)], io_write_chcr6, 1); - map_item(&mem_iowtab[IOMEM32(0x10f4)], io_write_dma_icr32, 1); - map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1); - map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1); - map_item(&mem_iowtab[IOMEM32(0x1108)], io_rcnt_write_target0, 1); - map_item(&mem_iowtab[IOMEM32(0x1110)], io_rcnt_write_count1, 1); - map_item(&mem_iowtab[IOMEM32(0x1114)], io_rcnt_write_mode1, 1); - map_item(&mem_iowtab[IOMEM32(0x1118)], io_rcnt_write_target1, 1); - map_item(&mem_iowtab[IOMEM32(0x1120)], io_rcnt_write_count2, 1); - map_item(&mem_iowtab[IOMEM32(0x1124)], io_rcnt_write_mode2, 1); - map_item(&mem_iowtab[IOMEM32(0x1128)], io_rcnt_write_target2, 1); -// map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); - map_item(&mem_iowtab[IOMEM32(0x1814)], io_gpu_write_status, 1); - map_item(&mem_iowtab[IOMEM32(0x1820)], mdecWrite0, 1); - map_item(&mem_iowtab[IOMEM32(0x1824)], mdecWrite1, 1); - - map_item(&mem_iowtab[IOMEM16(0x1040)], io_write_sio16, 1); - map_item(&mem_iowtab[IOMEM16(0x1044)], sioWriteStat16, 1); - map_item(&mem_iowtab[IOMEM16(0x1048)], sioWriteMode16, 1); - map_item(&mem_iowtab[IOMEM16(0x104a)], sioWriteCtrl16, 1); - map_item(&mem_iowtab[IOMEM16(0x104e)], sioWriteBaud16, 1); - map_item(&mem_iowtab[IOMEM16(0x1070)], io_write_ireg16, 1); - map_item(&mem_iowtab[IOMEM16(0x1074)], io_write_imask16, 1); - map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1); - map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1); - map_item(&mem_iowtab[IOMEM16(0x1108)], io_rcnt_write_target0, 1); - map_item(&mem_iowtab[IOMEM16(0x1110)], io_rcnt_write_count1, 1); - map_item(&mem_iowtab[IOMEM16(0x1114)], io_rcnt_write_mode1, 1); - map_item(&mem_iowtab[IOMEM16(0x1118)], io_rcnt_write_target1, 1); - map_item(&mem_iowtab[IOMEM16(0x1120)], io_rcnt_write_count2, 1); - map_item(&mem_iowtab[IOMEM16(0x1124)], io_rcnt_write_mode2, 1); - map_item(&mem_iowtab[IOMEM16(0x1128)], io_rcnt_write_target2, 1); - - map_item(&mem_iowtab[IOMEM8(0x1040)], sioWrite8, 1); - map_item(&mem_iowtab[IOMEM8(0x1800)], cdrWrite0, 1); - map_item(&mem_iowtab[IOMEM8(0x1801)], cdrWrite1, 1); - map_item(&mem_iowtab[IOMEM8(0x1802)], cdrWrite2, 1); - map_item(&mem_iowtab[IOMEM8(0x1803)], cdrWrite3, 1); - - for (i = 0x1c00; i < 0x1e00; i += 2) { - map_item(&mem_iowtab[IOMEM16(i)], io_spu_write16, 1); - map_item(&mem_iowtab[IOMEM32(i)], io_spu_write32, 1); - } - - // misc - map_item(&mem_writetab[0xfffe0130 >> 12], mem_ffwtab, 1); - for (i = 0; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) - map_item(&mem_ffwtab[i], write_biu, 1); - - mem_rtab = mem_readtab; - mem_wtab = mem_writetab; - - new_dyna_pcsx_mem_load_state(); -} - -void new_dyna_pcsx_mem_reset(void) -{ - int i; - - // plugins might change so update the pointers - map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); - - for (i = 0x1c00; i < 0x1e00; i += 2) - map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); - - map_item(&mem_iowtab[IOMEM32(0x1810)], GPU_writeData, 1); -} - -void new_dyna_pcsx_mem_shutdown(void) -{ - psxUnmap(mem_readtab, 0x200000 * 4, MAP_TAG_LUTS); - mem_writetab = mem_readtab = NULL; -} diff --git a/libpcsxcore/new_dynarec/pcsxmem.h b/libpcsxcore/new_dynarec/pcsxmem.h deleted file mode 100644 index 72892a8..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem.h +++ /dev/null @@ -1,9 +0,0 @@ - -extern u8 zero_mem[0x1000]; - -void new_dyna_pcsx_mem_init(void); -void new_dyna_pcsx_mem_reset(void); -void new_dyna_pcsx_mem_load_state(void); -void new_dyna_pcsx_mem_shutdown(void); - -int pcsxmem_is_handler_dynamic(unsigned int addr); diff --git a/libpcsxcore/new_dynarec/pcsxmem_inline.c b/libpcsxcore/new_dynarec/pcsxmem_inline.c deleted file mode 100644 index 305931a..0000000 --- a/libpcsxcore/new_dynarec/pcsxmem_inline.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * (C) Gražvydas "notaz" Ignotas, 2011 - * - * This work is licensed under the terms of GNU GPL version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef DRC_DBG - -static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) -{ - if ((addr & 0xfffff000) == 0x1f801000) { - u_int t; - switch (addr & 0xffff) { - case 0x1120: // rcnt2 count - if (rt < 0) goto dont_care; - if (cc < 0) return 0; - emit_readword((int)&rcnts[2].mode, HOST_TEMPREG); - emit_readword((int)&rcnts[2].cycleStart, rt); - emit_testimm(HOST_TEMPREG, 0x200); - emit_readword((int)&last_count, HOST_TEMPREG); - emit_sub(HOST_TEMPREG, rt, HOST_TEMPREG); - emit_add(HOST_TEMPREG, cc, HOST_TEMPREG); - if (cc_adj) - emit_addimm(HOST_TEMPREG, cc_adj, rt); - emit_shrne_imm(rt, 3, rt); - mov_loadtype_adj(type!=LOADW_STUB?type:LOADH_STUB, rt, rt); - goto hit; - case 0x1104: - case 0x1114: - case 0x1124: // rcnt mode - if (rt < 0) return 0; - t = (addr >> 4) & 3; - emit_readword((int)&rcnts[t].mode, rt); - emit_andimm(rt, ~0x1800, HOST_TEMPREG); - emit_writeword(HOST_TEMPREG, (int)&rcnts[t].mode); - mov_loadtype_adj(type, rt, rt); - goto hit; - } - } - else { - if (rt < 0) - goto dont_care; - } - - return 0; - -hit: - assem_debug("pcsx_direct_read %08x end\n", addr); - return 1; - -dont_care: - assem_debug("pcsx_direct_read %08x dummy\n", addr); - return 1; -} - -#else - -static int pcsx_direct_read(int type, u_int addr, int cc_adj, int cc, int rs, int rt) -{ - return 0; -} - -#endif - -// vim:shiftwidth=2:expandtab -- cgit v1.2.3 From 6f7b3633b990b77d940b14a7405affcf1d187b20 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Tue, 27 Sep 2016 05:20:36 +0200 Subject: Update makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 46539c5..2a6be72 100644 --- a/Makefile +++ b/Makefile @@ -62,8 +62,8 @@ libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull # dynarec ifeq "$(USE_DYNAREC)" "1" -OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o -OBJS += libpcsxcore/new_dynarec/pcsxmem.o +OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/arm/linkage_arm.o +OBJS += libpcsxcore/new_dynarec/backends/psx/pcsxmem.o else libpcsxcore/new_dynarec/backends/psx/emu_if.o: CFLAGS += -DDRC_DISABLE frontend/libretro.o: CFLAGS += -DDRC_DISABLE -- cgit v1.2.3 From d404093f31d5cc0a60aa8d32389e4d10be303204 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Tue, 27 Sep 2016 22:54:15 +0200 Subject: Update new_dynarec - working towards a common shared ari64 codebase for both mupen64plus and pcsx rearmed - see libretro/ari64 --- libpcsxcore/new_dynarec/new_dynarec.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 02f335c..588eaaa 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -45,10 +45,10 @@ int getVMBlock(); #define inv_debug(...) #ifdef __i386__ -#include "assem_x86.h" +#include "x86/assem_x86.h" #endif #ifdef __x86_64__ -#include "assem_x64.h" +#include "x64/assem_x64.h" #endif #ifdef __arm__ #include "arm/assem_arm.h" @@ -768,10 +768,10 @@ void alloc_all(struct regstat *cur,int i) } #ifdef __i386__ -#include "assem_x86.c" +#include "x86/assem_x86.c" #endif #ifdef __x86_64__ -#include "assem_x64.c" +#include "x64/assem_x64.c" #endif #ifdef __arm__ #include "arm/assem_arm.c" @@ -1700,7 +1700,8 @@ void syscall_alloc(struct regstat *current,int i) void delayslot_alloc(struct regstat *current,int i) { - switch(itype[i]) { + switch(itype[i]) + { case UJUMP: case CJUMP: case SJUMP: @@ -1850,7 +1851,8 @@ void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32 } } -int mchecksum() +#if 0 +static int mchecksum(void) { //if(!tracedebug) return 0; int i; @@ -1863,7 +1865,8 @@ int mchecksum() } return sum; } -int rchecksum() + +static int rchecksum(void) { int i; int sum=0; @@ -1871,7 +1874,8 @@ int rchecksum() sum^=((u_int *)reg)[i]; return sum; } -void rlist() + +static void rlist(void) { int i; printf("TRACE: "); @@ -1880,12 +1884,12 @@ void rlist() printf("\n"); } -void enabletrace() +static void enabletrace(void) { tracedebug=1; } -void memdebug(int i) +static void memdebug(int i) { //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]); //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum()); @@ -1910,6 +1914,7 @@ void memdebug(int i) } //printf("TRACE: %x\n",(&i)[-1]); } +#endif void alu_assemble(int i,struct regstat *i_regs) { -- cgit v1.2.3 From 65a0505f57d57ddedb89e3c66734fb3b0a69e1e8 Mon Sep 17 00:00:00 2001 From: Francisco José García García Date: Wed, 28 Sep 2016 00:44:37 +0200 Subject: Add support for software framebuffer --- frontend/libretro.c | 28 +++++-- frontend/libretro.h | 215 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 198 insertions(+), 45 deletions(-) diff --git a/frontend/libretro.c b/frontend/libretro.c index 81afc30..66175af 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -47,6 +47,7 @@ static retro_audio_sample_batch_t audio_batch_cb; static struct retro_rumble_interface rumble; static void *vout_buf; +static void * vout_buf_ptr; static int vout_width, vout_height; static int vout_doffs_old, vout_fb_dirty; static bool vout_can_dupe; @@ -133,14 +134,14 @@ static void convert(void *buf, size_t bytes) static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) { - unsigned short *dest = vout_buf; + unsigned short *dest = vout_buf_ptr; const unsigned short *src = vram; int dstride = vout_width, h1 = h; int doffs; if (vram == NULL) { // blanking - memset(vout_buf, 0, dstride * h * 2); + memset(vout_buf_ptr, 0, dstride * h * 2); goto out; } @@ -148,7 +149,7 @@ static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) doffs += (dstride - w) / 2 & ~1; if (doffs != vout_doffs_old) { // clear borders - memset(vout_buf, 0, dstride * h * 2); + memset(vout_buf_ptr, 0, dstride * h * 2); vout_doffs_old = doffs; } dest += doffs; @@ -171,7 +172,7 @@ static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) out: #ifndef FRONTEND_SUPPORTS_RGB565 - convert(vout_buf, vout_width * vout_height * 2); + convert(vout_buf_ptr, vout_width * vout_height * 2); #endif vout_fb_dirty = 1; pl_rearmed_cbs.flip_cnt++; @@ -1483,8 +1484,21 @@ void retro_run(void) stop = 0; psxCpu->Execute(); + + struct retro_framebuffer fb = {0}; + + fb.width = vout_width; + fb.height = vout_height; + fb.access_flags = RETRO_MEMORY_ACCESS_WRITE; + + vout_buf_ptr = vout_buf; + + if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) + { + vout_buf_ptr = (uint16_t*)fb.data; + } - video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf : NULL, + video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf_ptr : NULL, vout_width, vout_height, vout_width * 2); vout_fb_dirty = 0; } @@ -1593,7 +1607,9 @@ void retro_init(void) #else vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); #endif - + + vout_buf_ptr = vout_buf; + if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) { snprintf(Config.BiosDir, sizeof(Config.BiosDir), "%s", dir); diff --git a/frontend/libretro.h b/frontend/libretro.h index 16c274a..a231548 100755 --- a/frontend/libretro.h +++ b/frontend/libretro.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2010-2014 The RetroArch team +/* Copyright (C) 2010-2016 The RetroArch team * * --------------------------------------------------------------------------------------- * The following license statement only applies to this libretro API header (libretro.h). @@ -43,6 +43,40 @@ extern "C" { #endif #endif +#ifndef RETRO_CALLCONV +# if defined(__GNUC__) && defined(__i386__) && !defined(__x86_64__) +# define RETRO_CALLCONV __attribute__((cdecl)) +# elif defined(_MSC_VER) && defined(_M_X86) && !defined(_M_X64) +# define RETRO_CALLCONV __cdecl +# else +# define RETRO_CALLCONV /* all other platforms only have one calling convention each */ +# endif +#endif + +#ifndef RETRO_API +# if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) +# ifdef RETRO_IMPORT_SYMBOLS +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllimport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllimport) +# endif +# else +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllexport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllexport) +# endif +# endif +# else +# if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__CELLOS_LV2__) +# define RETRO_API RETRO_CALLCONV __attribute__((__visibility__("default"))) +# else +# define RETRO_API RETRO_CALLCONV +# endif +# endif +#endif + /* Used for checking API/ABI mismatches that can break libretro * implementations. * It is not incremented for compatible changes to the API. @@ -165,13 +199,15 @@ extern "C" { #define RETRO_DEVICE_ID_ANALOG_Y 1 /* Id values for MOUSE. */ -#define RETRO_DEVICE_ID_MOUSE_X 0 -#define RETRO_DEVICE_ID_MOUSE_Y 1 -#define RETRO_DEVICE_ID_MOUSE_LEFT 2 -#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 -#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 -#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 -#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 +#define RETRO_DEVICE_ID_MOUSE_X 0 +#define RETRO_DEVICE_ID_MOUSE_Y 1 +#define RETRO_DEVICE_ID_MOUSE_LEFT 2 +#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 +#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 +#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 +#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELUP 7 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELDOWN 8 /* Id values for LIGHTGUN types. */ #define RETRO_DEVICE_ID_LIGHTGUN_X 0 @@ -206,6 +242,8 @@ enum retro_language RETRO_LANGUAGE_KOREAN = 9, RETRO_LANGUAGE_CHINESE_TRADITIONAL = 10, RETRO_LANGUAGE_CHINESE_SIMPLIFIED = 11, + RETRO_LANGUAGE_ESPERANTO = 12, + RETRO_LANGUAGE_POLISH = 13, RETRO_LANGUAGE_LAST, /* Ensure sizeof(enum) == sizeof(int) */ @@ -693,9 +731,10 @@ enum retro_mod * location-based information from the host device, * such as current latitude / longitude. */ -#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 +#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 /* Old name, kept for compatibility. */ +#define RETRO_ENVIRONMENT_GET_CORE_ASSETS_DIRECTORY 30 /* const char ** -- - * Returns the "content" directory of the frontend. + * Returns the "core assets" directory of the frontend. * This directory can be used to store specific assets that the * core relies upon, such as art assets, * input data, etc etc. @@ -851,6 +890,61 @@ enum retro_mod * Returns the specified language of the frontend, if specified by the user. * It can be used by the core for localization purposes. */ +#define RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER (40 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_framebuffer * -- + * Returns a preallocated framebuffer which the core can use for rendering + * the frame into when not using SET_HW_RENDER. + * The framebuffer returned from this call must not be used + * after the current call to retro_run() returns. + * + * The goal of this call is to allow zero-copy behavior where a core + * can render directly into video memory, avoiding extra bandwidth cost by copying + * memory from core to video memory. + * + * If this call succeeds and the core renders into it, + * the framebuffer pointer and pitch can be passed to retro_video_refresh_t. + * If the buffer from GET_CURRENT_SOFTWARE_FRAMEBUFFER is to be used, + * the core must pass the exact + * same pointer as returned by GET_CURRENT_SOFTWARE_FRAMEBUFFER; + * i.e. passing a pointer which is offset from the + * buffer is undefined. The width, height and pitch parameters + * must also match exactly to the values obtained from GET_CURRENT_SOFTWARE_FRAMEBUFFER. + * + * It is possible for a frontend to return a different pixel format + * than the one used in SET_PIXEL_FORMAT. This can happen if the frontend + * needs to perform conversion. + * + * It is still valid for a core to render to a different buffer + * even if GET_CURRENT_SOFTWARE_FRAMEBUFFER succeeds. + * + * A frontend must make sure that the pointer obtained from this function is + * writeable (and readable). + */ + +enum retro_hw_render_interface_type +{ + RETRO_HW_RENDER_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_interface_* types + * contain at least these fields. */ +struct retro_hw_render_interface +{ + enum retro_hw_render_interface_type interface_type; + unsigned interface_version; +}; +#define RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE (41 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_interface ** -- + * Returns an API specific rendering interface for accessing API specific data. + * Not all HW rendering APIs support or need this. + * The contents of the returned pointer is specific to the rendering API + * being used. See the various headers like libretro_vulkan.h, etc. + * + * GET_HW_RENDER_INTERFACE cannot be called before context_reset has been called. + * Similarly, after context_destroyed callback returns, + * the contents of the HW_RENDER_INTERFACE are invalidated. + */ #define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ #define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ @@ -1125,6 +1219,10 @@ struct retro_log_callback #define RETRO_SIMD_VFPU (1 << 13) #define RETRO_SIMD_PS (1 << 14) #define RETRO_SIMD_AES (1 << 15) +#define RETRO_SIMD_VFPV3 (1 << 16) +#define RETRO_SIMD_VFPV4 (1 << 17) +#define RETRO_SIMD_POPCNT (1 << 18) +#define RETRO_SIMD_MOVBE (1 << 19) typedef uint64_t retro_perf_tick_t; typedef int64_t retro_time_t; @@ -1464,6 +1562,9 @@ enum retro_hw_context_type * use the corresponding enums directly. */ RETRO_HW_CONTEXT_OPENGLES_VERSION = 5, + /* Vulkan, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE. */ + RETRO_HW_CONTEXT_VULKAN = 6, + RETRO_HW_CONTEXT_DUMMY = INT_MAX }; @@ -1486,23 +1587,28 @@ struct retro_hw_render_callback */ retro_hw_context_reset_t context_reset; - /* Set by frontend. */ + /* Set by frontend. + * TODO: This is rather obsolete. The frontend should not + * be providing preallocated framebuffers. */ retro_hw_get_current_framebuffer_t get_current_framebuffer; /* Set by frontend. */ retro_hw_get_proc_address_t get_proc_address; - /* Set if render buffers should have depth component attached. */ + /* Set if render buffers should have depth component attached. + * TODO: Obsolete. */ bool depth; - /* Set if stencil buffers should be attached. */ + /* Set if stencil buffers should be attached. + * TODO: Obsolete. */ bool stencil; /* If depth and stencil are true, a packed 24/8 buffer will be added. * Only attaching stencil is invalid and will be ignored. */ /* Use conventional bottom-left origin convention. If false, - * standard libretro top-left origin semantics are used. */ + * standard libretro top-left origin semantics are used. + * TODO: Move to GL specific interface. */ bool bottom_left_origin; /* Major version number for core GL context or GLES 3.1+. */ @@ -1513,6 +1619,7 @@ struct retro_hw_render_callback /* If this is true, the frontend will go very far to avoid * resetting context in scenarios like toggling fullscreen, etc. + * TODO: Obsolete? Maybe frontend should just always assume this ... */ bool cache_context; @@ -1779,6 +1886,36 @@ struct retro_game_info const char *meta; /* String of implementation specific meta-data. */ }; +#define RETRO_MEMORY_ACCESS_WRITE (1 << 0) + /* The core will write to the buffer provided by retro_framebuffer::data. */ +#define RETRO_MEMORY_ACCESS_READ (1 << 1) + /* The core will read from retro_framebuffer::data. */ +#define RETRO_MEMORY_TYPE_CACHED (1 << 0) + /* The memory in data is cached. + * If not cached, random writes and/or reading from the buffer is expected to be very slow. */ +struct retro_framebuffer +{ + void *data; /* The framebuffer which the core can render into. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. + The initial contents of data are unspecified. */ + unsigned width; /* The framebuffer width used by the core. Set by core. */ + unsigned height; /* The framebuffer height used by the core. Set by core. */ + size_t pitch; /* The number of bytes between the beginning of a scanline, + and beginning of the next scanline. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + enum retro_pixel_format format; /* The pixel format the core must use to render into data. + This format could differ from the format used in + SET_PIXEL_FORMAT. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + + unsigned access_flags; /* How the core will access the memory in the framebuffer. + RETRO_MEMORY_ACCESS_* flags. + Set by core. */ + unsigned memory_flags; /* Flags telling core how the memory has been mapped. + RETRO_MEMORY_TYPE_* flags. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ +}; + /* Callbacks */ /* Environment callback. Gives implementations a way of performing @@ -1832,25 +1969,25 @@ typedef int16_t (*retro_input_state_t)(unsigned port, unsigned device, * * The rest of the set_* functions are guaranteed to have been called * before the first call to retro_run() is made. */ -void retro_set_environment(retro_environment_t); -void retro_set_video_refresh(retro_video_refresh_t); -void retro_set_audio_sample(retro_audio_sample_t); -void retro_set_audio_sample_batch(retro_audio_sample_batch_t); -void retro_set_input_poll(retro_input_poll_t); -void retro_set_input_state(retro_input_state_t); +RETRO_API void retro_set_environment(retro_environment_t); +RETRO_API void retro_set_video_refresh(retro_video_refresh_t); +RETRO_API void retro_set_audio_sample(retro_audio_sample_t); +RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t); +RETRO_API void retro_set_input_poll(retro_input_poll_t); +RETRO_API void retro_set_input_state(retro_input_state_t); /* Library global initialization/deinitialization. */ -void retro_init(void); -void retro_deinit(void); +RETRO_API void retro_init(void); +RETRO_API void retro_deinit(void); /* Must return RETRO_API_VERSION. Used to validate ABI compatibility * when the API is revised. */ -unsigned retro_api_version(void); +RETRO_API unsigned retro_api_version(void); /* Gets statically known system info. Pointers provided in *info * must be statically allocated. * Can be called at any time, even before retro_init(). */ -void retro_get_system_info(struct retro_system_info *info); +RETRO_API void retro_get_system_info(struct retro_system_info *info); /* Gets information about system audio/video timings and geometry. * Can be called only after retro_load_game() has successfully completed. @@ -1858,7 +1995,7 @@ void retro_get_system_info(struct retro_system_info *info); * variable if needed. * E.g. geom.aspect_ratio might not be initialized if core doesn't * desire a particular aspect ratio. */ -void retro_get_system_av_info(struct retro_system_av_info *info); +RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info); /* Sets device to be used for player 'port'. * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all @@ -1868,10 +2005,10 @@ void retro_get_system_av_info(struct retro_system_av_info *info); * hint to the libretro core when a core cannot automatically detect the * appropriate input device type on its own. It is also relevant when a * core can change its behavior depending on device type. */ -void retro_set_controller_port_device(unsigned port, unsigned device); +RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device); /* Resets the current game. */ -void retro_reset(void); +RETRO_API void retro_reset(void); /* Runs the game for one video frame. * During retro_run(), input_poll callback must be called at least once. @@ -1881,7 +2018,7 @@ void retro_reset(void); * a frame if GET_CAN_DUPE returns true. * In this case, the video callback can take a NULL argument for data. */ -void retro_run(void); +RETRO_API void retro_run(void); /* Returns the amount of data the implementation requires to serialize * internal state (save states). @@ -1889,35 +2026,35 @@ void retro_run(void); * returned size is never allowed to be larger than a previous returned * value, to ensure that the frontend can allocate a save state buffer once. */ -size_t retro_serialize_size(void); +RETRO_API size_t retro_serialize_size(void); /* Serializes internal state. If failed, or size is lower than * retro_serialize_size(), it should return false, true otherwise. */ -bool retro_serialize(void *data, size_t size); -bool retro_unserialize(const void *data, size_t size); +RETRO_API bool retro_serialize(void *data, size_t size); +RETRO_API bool retro_unserialize(const void *data, size_t size); -void retro_cheat_reset(void); -void retro_cheat_set(unsigned index, bool enabled, const char *code); +RETRO_API void retro_cheat_reset(void); +RETRO_API void retro_cheat_set(unsigned index, bool enabled, const char *code); /* Loads a game. */ -bool retro_load_game(const struct retro_game_info *game); +RETRO_API bool retro_load_game(const struct retro_game_info *game); /* Loads a "special" kind of game. Should not be used, * except in extreme cases. */ -bool retro_load_game_special( +RETRO_API bool retro_load_game_special( unsigned game_type, const struct retro_game_info *info, size_t num_info ); /* Unloads a currently loaded game. */ -void retro_unload_game(void); +RETRO_API void retro_unload_game(void); /* Gets region of game. */ -unsigned retro_get_region(void); +RETRO_API unsigned retro_get_region(void); /* Gets region of memory. */ -void *retro_get_memory_data(unsigned id); -size_t retro_get_memory_size(unsigned id); +RETRO_API void *retro_get_memory_data(unsigned id); +RETRO_API size_t retro_get_memory_size(unsigned id); #ifdef __cplusplus } -- cgit v1.2.3 From 92d7982639bc07d3d6e9467427eb2db8c3fee29d Mon Sep 17 00:00:00 2001 From: twinaphex Date: Wed, 28 Sep 2016 02:15:37 +0200 Subject: (new_dynarec) Update --- libpcsxcore/new_dynarec/new_dynarec.c | 18 +++++++++++------- libpcsxcore/new_dynarec/new_dynarec.h | 10 +++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 588eaaa..3083e83 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -1055,19 +1055,23 @@ void invalidate_addr(u_int addr) // This is called when loading a save state. // Anything could have changed, so invalidate everything. -void invalidate_all_pages() +void invalidate_all_pages(void) { u_int page; for(page=0;page<4096;page++) invalidate_page(page); for(page=0;page<1048576;page++) - if(!invalid_code[page]) { + { + if(!invalid_code[page]) + { restore_candidate[(page&2047)>>3]|=1<<(page&7); restore_candidate[((page&2047)>>3)+256]|=1<<(page&7); } - #ifdef USE_MINI_HT + } + +#ifdef USE_MINI_HT memset(mini_ht,-1,sizeof(mini_ht)); - #endif +#endif } // Add an entry to jump_out after making a link @@ -7026,7 +7030,7 @@ static int new_dynarec_test(void) // clear the state completely, instead of just marking // things invalid like invalidate_all_pages() does -void new_dynarec_clear_full() +void new_dynarec_clear_full(void) { int n; out=(u_char *)BASE_ADDR; @@ -7047,7 +7051,7 @@ void new_dynarec_clear_full() for(n=0;n<4096;n++) ll_clear(jump_dirty+n); } -void new_dynarec_init() +void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); @@ -7103,7 +7107,7 @@ void new_dynarec_init() SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } -void new_dynarec_cleanup() +void new_dynarec_cleanup(void) { int n; #if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC) diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index ddc84a5..e7eb247 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -11,12 +11,12 @@ extern int cycle_multiplier; // 100 for 1.0 #define NDHACK_GTE_NO_FLAGS (1<<2) extern int new_dynarec_hacks; -void new_dynarec_init(); -void new_dynarec_cleanup(); -void new_dynarec_clear_full(); -void new_dyna_start(); +void new_dynarec_init(void); +void new_dynarec_cleanup(void); +void new_dynarec_clear_full(void); +void new_dyna_start(void); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); -void invalidate_all_pages(); +void invalidate_all_pages(void); void invalidate_block(unsigned int block); -- cgit v1.2.3 From 0bfdd1aadcf7674776186d7cb81d802296a4d96a Mon Sep 17 00:00:00 2001 From: twinaphex Date: Wed, 28 Sep 2016 03:03:19 +0200 Subject: (new_dynarec) Cleanups --- libpcsxcore/new_dynarec/new_dynarec.c | 191 ++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 76 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 3083e83..059730a 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -366,14 +366,16 @@ static u_int get_vpage(u_int vaddr) // This is called from the recompiled JR/JALR instructions void *get_addr(u_int vaddr) { - u_int page=get_page(vaddr); - u_int vpage=get_vpage(vaddr); - struct ll_entry *head; + struct ll_entry *head = NULL; + u_int page = get_page(vaddr); + u_int vpage = get_vpage(vaddr); //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page); head=jump_in[page]; - while(head!=NULL) { - if(head->vaddr==vaddr) { - //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); + while(head!=NULL) + { + if(head->vaddr==vaddr) + { + //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; ht_bin[3]=ht_bin[1]; ht_bin[2]=ht_bin[0]; @@ -384,39 +386,47 @@ void *get_addr(u_int vaddr) head=head->next; } head=jump_dirty[vpage]; - while(head!=NULL) { - if(head->vaddr==vaddr) { + while(head!=NULL) + { + if(head->vaddr==vaddr) + { //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr); // Don't restore blocks which are about to expire from the cache if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) - if(verify_dirty(head->addr)) { - //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); - invalid_code[vaddr>>12]=0; - inv_code_start=inv_code_end=~0; - if(vpage<2048) { - restore_candidate[vpage>>3]|=1<<(vpage&7); - } - else restore_candidate[page>>3]|=1<<(page&7); - u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; - if(ht_bin[0]==vaddr) { - ht_bin[1]=(u_int)head->addr; // Replace existing entry - } - else + if(verify_dirty(head->addr)) { - ht_bin[3]=ht_bin[1]; - ht_bin[2]=ht_bin[0]; - ht_bin[1]=(int)head->addr; - ht_bin[0]=vaddr; + //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); + invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; + if(vpage<2048) + { + restore_candidate[vpage>>3]|=1<<(vpage&7); + } + else + { + restore_candidate[page>>3]|=1<<(page&7); + } + u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF]; + + if(ht_bin[0]==vaddr) + ht_bin[1]=(u_int)head->addr; // Replace existing entry + else + { + ht_bin[3]=ht_bin[1]; + ht_bin[2]=ht_bin[0]; + ht_bin[1]=(int)head->addr; + ht_bin[0]=vaddr; + } + return head->addr; } - return head->addr; - } } head=head->next; } //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr); int r=new_recompile_block(vaddr); - if(r==0) return get_addr(vaddr); - // Execute in unmapped page, generate pagefault execption + if(r==0) + return get_addr(vaddr); + // Execute in unmapped page, generate pagefault exception Status|=2; Cause=(vaddr<<31)|0x8; EPC=(vaddr&1)?vaddr-5:vaddr; @@ -425,6 +435,7 @@ void *get_addr(u_int vaddr) EntryHi=BadVAddr&0xFFFFE000; return get_addr_ht(0x80000000); } + // Look up address in hash table first void *get_addr_ht(u_int vaddr) { @@ -948,23 +959,26 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) assert(last2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + if(vpage>2047||(head->vaddr>>12)==block) + { // Ignore vaddr hash collision get_bounds((int)head->addr,&start,&end); //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) + { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) + { if((((start-(u_int)rdram)>>12)&2047)>12)&2047; if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; } @@ -1096,37 +1113,48 @@ void clean_blocks(u_int page) struct ll_entry *head; inv_debug("INV: clean_blocks page=%d\n",page); head=jump_dirty[page]; - while(head!=NULL) { - if(!invalid_code[head->vaddr>>12]) { + while(head!=NULL) + { + if(!invalid_code[head->vaddr>>12]) + { // Don't restore blocks which are about to expire from the cache - if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) + { u_int start,end; - if(verify_dirty(head->addr)) { + if(verify_dirty(head->addr)) + { //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr); u_int i; u_int inv=0; get_bounds((int)head->addr,&start,&end); - if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) { + if(start-(u_int)rdram>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) + { inv|=invalid_code[i]; } } - else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) { + else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) + { inv=1; } - if(!inv) { + if(!inv) + { void * clean_addr=(void *)get_clean_addr((int)head->addr); - if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) { + if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) + { u_int ppage=page; inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr); //printf("page=%x, addr=%x\n",page,head->vaddr); //assert(head->vaddr>>12==(page|0x80000)); ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr); u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF]; - if(ht_bin[0]==head->vaddr) { + if(ht_bin[0]==head->vaddr) + { ht_bin[1]=(u_int)clean_addr; // Replace existing entry } - if(ht_bin[2]==head->vaddr) { + if(ht_bin[2]==head->vaddr) + { ht_bin[3]=(u_int)clean_addr; // Replace existing entry } } @@ -1138,15 +1166,17 @@ void clean_blocks(u_int page) } } - -void mov_alloc(struct regstat *current,int i) +static void mov_alloc(struct regstat *current,int i) { // Note: Don't need to actually alloc the source registers - if((~current->is32>>rs1[i])&1) { + if((~current->is32>>rs1[i])&1) + { //alloc_reg64(current,i,rs1[i]); alloc_reg64(current,i,rt1[i]); current->is32&=~(1LL<is32|=(1LL<Execute(); - - struct retro_framebuffer fb = {0}; - - fb.width = vout_width; - fb.height = vout_height; - fb.access_flags = RETRO_MEMORY_ACCESS_WRITE; - - vout_buf_ptr = vout_buf; - - if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb) && fb.format == RETRO_PIXEL_FORMAT_RGB565) - { - vout_buf_ptr = (uint16_t*)fb.data; - } video_cb((vout_fb_dirty || !vout_can_dupe || !duping_enable) ? vout_buf_ptr : NULL, vout_width, vout_height, vout_width * 2); -- cgit v1.2.3 From f02594f5c7ec18b8a6c6fd3802e199f07478a744 Mon Sep 17 00:00:00 2001 From: twinaphex Date: Thu, 29 Sep 2016 02:45:28 +0200 Subject: Define NO_WRITE_EXEC for Android --- jni/Android.mk | 2 +- libpcsxcore/new_dynarec/new_dynarec_config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jni/Android.mk b/jni/Android.mk index a0dbec3..43a98da 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -95,7 +95,7 @@ LOCAL_SRC_FILES += ../frontend/main.c ../frontend/plugin.c ../frontend/cspace.c # libretro LOCAL_SRC_FILES += ../frontend/libretro.c -LOCAL_CFLAGS += -O3 -ffast-math -funroll-loops -DNDEBUG -D_FILE_OFFSET_BITS=64 -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 +LOCAL_CFLAGS += -O3 -ffast-math -funroll-loops -DNDEBUG -D_FILE_OFFSET_BITS=64 -DHAVE_LIBRETRO -DNO_FRONTEND -DFRONTEND_SUPPORTS_RGB565 -DANDROID LOCAL_C_INCLUDES += $(LOCAL_PATH)/../include LOCAL_LDLIBS := -lz -llog diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index fbd08ac..601f1a2 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -4,7 +4,7 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 -#if defined(__MACH__) || defined(VITA) +#if defined(__MACH__) || defined(VITA) || defined(ANDROID) #define NO_WRITE_EXEC 1 #endif #ifdef VITA -- cgit v1.2.3 From fc7f8b604caf63053f960ca1944e8d2de9f1858e Mon Sep 17 00:00:00 2001 From: twinaphex Date: Fri, 30 Sep 2016 20:47:37 +0200 Subject: Backport drc: fix a mistake from w^x cahnge --- libpcsxcore/new_dynarec/new_dynarec.c | 2 +- libpcsxcore/new_dynarec/new_dynarec_config.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 059730a..f81e991 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -7121,7 +7121,7 @@ void new_dynarec_init(void) #else #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default - if (mprotect(out, 1<library_name = "PCSX-ReARMed"; - info->library_version = "r22"; +#ifndef GIT_VERSION +#define GIT_VERSION "" +#endif + info->library_version = "r22" GIT_VERSION; info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u"; info->need_fullpath = true; } diff --git a/jni/Android.mk b/jni/Android.mk index 43a98da..9dd9e39 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -2,6 +2,11 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + LOCAL_CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif + APP_DIR := ../../src #fix stupid change in ndk r11 that breaks compiling even when the exe would run fine -- cgit v1.2.3