diff options
Diffstat (limited to 'libpcsxcore/ix86_64')
-rw-r--r-- | libpcsxcore/ix86_64/README | 2 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/iGte.h | 79 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/iR3000A-64.c | 2963 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86-64.c | 3139 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86-64.h | 1776 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86_3dnow.c | 178 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86_cpudetect.c | 487 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86_fpu.c | 248 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86_mmx.c | 646 | ||||
-rw-r--r-- | libpcsxcore/ix86_64/ix86_sse.c | 1455 |
10 files changed, 10973 insertions, 0 deletions
diff --git a/libpcsxcore/ix86_64/README b/libpcsxcore/ix86_64/README new file mode 100644 index 0000000..af24e37 --- /dev/null +++ b/libpcsxcore/ix86_64/README @@ -0,0 +1,2 @@ +This is the AMD64 dynamic recompiler. +Made from opcodes from PCSX2 0.9.3 and the x86 recompiler modified to fit. ie. currently no AMD64/SSE specific code advantages. diff --git a/libpcsxcore/ix86_64/iGte.h b/libpcsxcore/ix86_64/iGte.h new file mode 100644 index 0000000..bcb2f4b --- /dev/null +++ b/libpcsxcore/ix86_64/iGte.h @@ -0,0 +1,79 @@ +/*************************************************************************** + * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * + ***************************************************************************/ + +#ifndef __IGTE_H__ +#define __IGTE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "../r3000a.h" +#include "../psxmem.h" + +#define CP2_FUNC(f) \ +void gte##f(); \ +static void rec##f() { \ + iFlushRegs(); \ + MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \ + CALLFunc((uptr)gte##f); \ +/* branch = 2; */\ +} + +#define CP2_FUNCNC(f) \ +void gte##f(); \ +static void rec##f() { \ + iFlushRegs(); \ + CALLFunc((uptr)gte##f); \ +/* branch = 2; */\ +} + +CP2_FUNC(MFC2); +CP2_FUNC(MTC2); +CP2_FUNC(CFC2); +CP2_FUNC(CTC2); +CP2_FUNC(LWC2); +CP2_FUNC(SWC2); +CP2_FUNCNC(RTPS); +CP2_FUNC(OP); +CP2_FUNCNC(NCLIP); +CP2_FUNC(DPCS); +CP2_FUNC(INTPL); +CP2_FUNC(MVMVA); +CP2_FUNCNC(NCDS); +CP2_FUNCNC(NCDT); +CP2_FUNCNC(CDP); +CP2_FUNCNC(NCCS); +CP2_FUNCNC(CC); +CP2_FUNCNC(NCS); +CP2_FUNCNC(NCT); +CP2_FUNC(SQR); +CP2_FUNC(DCPL); +CP2_FUNCNC(DPCT); +CP2_FUNCNC(AVSZ3); +CP2_FUNCNC(AVSZ4); +CP2_FUNCNC(RTPT); +CP2_FUNC(GPF); +CP2_FUNC(GPL); +CP2_FUNCNC(NCCT); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/libpcsxcore/ix86_64/iR3000A-64.c b/libpcsxcore/ix86_64/iR3000A-64.c new file mode 100644 index 0000000..4ca3e99 --- /dev/null +++ b/libpcsxcore/ix86_64/iR3000A-64.c @@ -0,0 +1,2963 @@ +/*************************************************************************** + * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * + ***************************************************************************/ + +/* +* i386 assembly functions for R3000A core. +*/ + +#include "ix86-64.h" +#include "../r3000a.h" +#include "../psxhle.h" + +#include <sys/mman.h> + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +uptr* psxRecLUT; + +#define PTRMULT (sizeof(uptr) / sizeof(u32)) + +#undef PC_REC +#undef PC_REC8 +#undef PC_REC16 +#undef PC_REC32 +#define PC_REC(x) (psxRecLUT[(x) >> 16] + PTRMULT * ((x) & 0xffff)) +#define PC_RECP(x) (*(uptr *)PC_REC(x)) + +#define RECMEM_SIZE (PTRMULT * 8 * 1024 * 1024) + +static char *recMem; /* the recompiled blocks will be here */ +static char *recRAM; /* and the ptr to the blocks here */ +static char *recROM; /* and here */ + +static u32 pc; /* recompiler pc */ +static u32 pcold; /* recompiler oldpc */ +static int count; /* recompiler intruction count */ +static int branch; /* set for branch */ +static u32 target; /* branch target */ +static u32 resp; + +typedef struct { + int state; + u32 k; + int reg; +} iRegisters; + +static iRegisters iRegs[32]; +static iRegisters iRegsS[32]; + +#define ST_UNK 0 +#define ST_CONST 1 +#define ST_MAPPED 2 + +#define IsConst(reg) (iRegs[reg].state == ST_CONST) +#define IsMapped(reg) (iRegs[reg].state == ST_MAPPED) + +static void (*recBSC[64])(); +static void (*recSPC[64])(); +static void (*recREG[32])(); +static void (*recCP0[32])(); +static void (*recCP2[64])(); +static void (*recCP2BSC[32])(); + +#define STACKSIZE 0x18 +static void StackRes() +{ +#ifdef __x86_64__ + ADD64ItoR(RSP, STACKSIZE); +#else + if (resp) ADD32ItoR(ESP, resp); +#endif +} + +static void MapConst(int reg, u32 _const) { + iRegs[reg].k = _const; + iRegs[reg].state = ST_CONST; +} + +static void iFlushReg(int reg) { + if (IsConst(reg)) { + MOV32ItoM((uptr)&psxRegs.GPR.r[reg], iRegs[reg].k); + } + iRegs[reg].state = ST_UNK; +} + +static void iFlushRegs() { + int i; + + for (i=1; i<32; i++) { + iFlushReg(i); + } +} + +static void iRet() { + /* store cycle */ + count = ((pc - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + StackRes(); + RET(); +} + +static int iLoadTest() { + u32 tmp; + + // check for load delay + tmp = psxRegs.code >> 26; + switch (tmp) { + case 0x10: // COP0 + switch (_Rs_) { + case 0x00: // MFC0 + case 0x02: // CFC0 + return 1; + } + break; + case 0x12: // COP2 + switch (_Funct_) { + case 0x00: + switch (_Rs_) { + case 0x00: // MFC2 + case 0x02: // CFC2 + return 1; + } + break; + } + break; + case 0x32: // LWC2 + return 1; + default: + if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR + return 1; + } + break; + } + return 0; +} + +/* set a pending branch */ +static void SetBranch() { + branch = 1; + psxRegs.code = PSXMu32(pc); + pc+=4; + + if (iLoadTest() == 1) { + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.code, psxRegs.code); + /* store cycle */ + count = ((pc - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + + //PUSH64M((uptr)&target); + MOV32MtoR(X86ARG2, (uptr)&target); + //PUSHI(_Rt_); + MOV64ItoR(X86ARG1, _Rt_); + CALLFunc((uptr)psxDelayTest); + StackRes(); + RET(); + return; + } + + recBSC[psxRegs.code>>26](); + + iFlushRegs(); + MOV32MtoR(EAX, (uptr)&target); + MOV32RtoM((uptr)&psxRegs.pc, EAX); + CALLFunc((uptr)psxBranchTest); + + iRet(); +} + +static void iJump(u32 branchPC) { + branch = 1; + psxRegs.code = PSXMu32(pc); + pc+=4; + + if (iLoadTest() == 1) { + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.code, psxRegs.code); + /* store cycle */ + count = ((pc - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + + //PUSHI(branchPC); + MOV64ItoR(X86ARG2, branchPC); + //PUSHI(_Rt_); + MOV64ItoR(X86ARG1, _Rt_); + CALLFunc((uptr)psxDelayTest); + //ADD32ItoR(ESP, 2*8); + StackRes(); + RET(); + return; + } + + recBSC[psxRegs.code>>26](); + + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.pc, branchPC); + CALLFunc((uptr)psxBranchTest); + /* store cycle */ + count = ((pc - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + StackRes(); + + RET(); + //* XXX? + // maybe just happened an interruption, check so + CMP32ItoM((uptr)&psxRegs.pc, branchPC); + j8Ptr[0] = JE8(0); + + RET(); + + x86SetJ8(j8Ptr[0]); + MOV64MtoR(RAX, PC_REC(branchPC)); + TEST64RtoR(RAX,RAX); + j8Ptr[1] = JNE8(0); + + RET(); + + x86SetJ8(j8Ptr[1]); + + RET(); + //JMP32R(EAX); + JMPR(EAX); + //*/ +} + +static void iBranch(u32 branchPC, int savectx) { + u32 respold=0; + + if (savectx) { + respold = resp; + memcpy(iRegsS, iRegs, sizeof(iRegs)); + } + + branch = 1; + psxRegs.code = PSXMu32(pc); + + // the delay test is only made when the branch is taken + // savectx == 0 will mean that :) + if (savectx == 0 && iLoadTest() == 1) { + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.code, psxRegs.code); + /* store cycle */ + count = (((pc+4) - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + //if (resp) ADD32ItoR(ESP, resp); + + //PUSHI(branchPC); + MOV64ItoR(X86ARG2, branchPC); + //PUSHI(_Rt_); + MOV64ItoR(X86ARG1,_Rt_); + CALLFunc((uptr)psxDelayTest); + StackRes(); + RET(); + return; + } + + pc+= 4; + recBSC[psxRegs.code>>26](); + + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.pc, branchPC); + CALLFunc((uptr)psxBranchTest); + /* store cycle */ + count = ((pc - pcold) / 4) * BIAS; + ADD32ItoM((uptr)&psxRegs.cycle, count); + + StackRes(); + + // maybe just happened an interruption, check so + CMP32ItoM((uptr)&psxRegs.pc, branchPC); + j8Ptr[1] = JE8(0); + + RET(); + + x86SetJ8(j8Ptr[1]); + MOV64MtoR(RAX, PC_REC(branchPC)); + TEST64RtoR(RAX, RAX); + j8Ptr[2] = JNE8(0); + + RET(); + + x86SetJ8(j8Ptr[2]); + //JMP32R(EAX); + JMPR(EAX); + + pc-= 4; + if (savectx) { + resp = respold; + memcpy(iRegs, iRegsS, sizeof(iRegs)); + } +} + + +char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n"; +char *txt1 = "EAX = %x\n"; +char *txt2 = "M32 = %x\n"; + +/* +void iLogX86() { + PUSHA32(); + + PUSH32R (EDX); + PUSH32R (ECX); + PUSH32R (EAX); + PUSH32M ((uptr)&txt0); + CALLFunc((uptr)SysPrintf); + ADD32ItoR(ESP, 4*4); + + POPA32(); +} +*/ + +void iLogEAX() { + PUSH64R (EAX); + PUSH64M ((uptr)&txt1); + CALLFunc((uptr)SysPrintf); + ADD32ItoR(ESP, 8*2); +} + +void iLogM32(u32 mem) { + PUSH64M (mem); + PUSH64M ((uptr)&txt2); + CALLFunc((uptr)SysPrintf); + ADD32ItoR(ESP, 8*2); +} + +static void iDumpRegs() { + int i, j; + + printf("%x %x\n", psxRegs.pc, psxRegs.cycle); + for (i = 0; i < 4; i++) { + for (j = 0; j < 8; j++) + printf("%x ", psxRegs.GPR.r[j*i]); + printf("\n"); + } +} + +void iDumpBlock(char *ptr) { + FILE *f; + u32 i; + + SysPrintf("dump1 %x:%x, %x\n", psxRegs.pc, pc, psxRegs.cycle); + + for (i = psxRegs.pc; i < pc; i+=4) + SysPrintf("%s\n", disR3000AF(PSXMu32(i), i)); + + fflush(stdout); + f = fopen("dump1", "w"); + fwrite(ptr, 1, (uptr)x86Ptr - (uptr)ptr, f); + fclose(f); + //system("ndisasm -b64 dump1"); + fflush(stdout); +} + +#define REC_FUNC(f) \ +void psx##f(); \ +static void rec##f() { \ + iFlushRegs(); \ + MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \ + MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \ + CALLFunc((uptr)psx##f); \ +/* branch = 2; */\ +} + +#define REC_SYS(f) \ +void psx##f(); \ +static void rec##f() { \ + iFlushRegs(); \ + MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \ + MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \ + CALLFunc((uptr)psx##f); \ + branch = 2; \ + iRet(); \ +} + +#define REC_BRANCH(f) \ +void psx##f(); \ +static void rec##f() { \ + iFlushRegs(); \ + MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \ + MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \ + CALLFunc((uptr)psx##f); \ + branch = 2; \ + iRet(); \ +} + +static void recRecompile(); + +static int recInit() { + int i; + + psxRecLUT = (uptr*) malloc(0x010000 * sizeof(uptr)); + + recMem = mmap(0, + RECMEM_SIZE + PTRMULT*0x1000, + PROT_EXEC | PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + recRAM = mmap(0, + 0x280000*PTRMULT, + PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + recROM = &recRAM[0x200000*PTRMULT]; + + if (recRAM == NULL || recROM == NULL || recMem == NULL || psxRecLUT == NULL) { + SysMessage("Error allocating memory"); return -1; + } + memset(recMem, 0, RECMEM_SIZE); + memset(recRAM, 0, 0x200000 * PTRMULT); + memset(recROM, 0, 0x080000 * PTRMULT); + + for (i=0; i<0x80; i++) psxRecLUT[i + 0x0000] = (uptr)&recRAM[PTRMULT*((i & 0x1f) << 16)]; + memcpy(psxRecLUT + 0x8000, psxRecLUT, 0x80 * sizeof(uptr)); + memcpy(psxRecLUT + 0xa000, psxRecLUT, 0x80 * sizeof(uptr)); + + for (i=0; i<0x08; i++) psxRecLUT[i + 0xbfc0] = (uptr)&recROM[PTRMULT*(i << 16)]; + + return 0; +} + +static void recReset() { + memset(recRAM, 0, 0x200000 * PTRMULT); + memset(recROM, 0, 0x080000 * PTRMULT); + + //x86Init(); + cpudetectInit(); + x86SetPtr(recMem); + + branch = 0; + memset(iRegs, 0, sizeof(iRegs)); + iRegs[0].state = ST_CONST; + iRegs[0].k = 0; +} + +static void recShutdown() { + if (recMem == NULL) return; + free(psxRecLUT); + munmap(recMem, RECMEM_SIZE + PTRMULT*0x1000); + munmap(recRAM, 0x280000*PTRMULT); + x86Shutdown(); +} + +static void recError() { + SysReset(); + ClosePlugins(); + SysMessage("Unrecoverable error while running recompiler\n"); + SysRunGui(); +} + +/*__inline*/ static void execute() { + void (*recFunc)(); + uptr *p; + + p = (uptr *)PC_REC(psxRegs.pc); + // if (!p) { recError(); return; } + + if (*p == 0) { + recRecompile(); + } + + if (*p < (uptr)recMem || *p >= (uptr)recMem + RECMEM_SIZE) + { + recError(); + return; + } + recFunc = (void (*)())*p; + (*recFunc)(); +} + +static void recExecute() { + for (;;) execute(); +} + +static void recExecuteBlock() { + execute(); +} + +static void recClear(u32 Addr, u32 Size) { + memset((void*)PC_REC(Addr), 0, Size * sizeof(uptr)); +} + +static void recNULL() { +// SysMessage("recUNK: %8.8x\n", psxRegs.code); +} + +/********************************************************* +* goes to opcodes tables... * +* Format: table[something....] * +*********************************************************/ + +//REC_SYS(SPECIAL); +#if 1 +static void recSPECIAL() { + recSPC[_Funct_](); +} +#endif + +static void recREGIMM() { + recREG[_Rt_](); +} + +static void recCOP0() { + recCP0[_Rs_](); +} + +//REC_SYS(COP2); +#if 1 +static void recCOP2() { + recCP2[_Funct_](); +} +#endif + +static void recBASIC() { + recCP2BSC[_Rs_](); +} + +//end of Tables opcodes... + +/********************************************************* +* Arithmetic with immediate operand * +* Format: OP rt, rs, immediate * +*********************************************************/ + +#if 0 +REC_FUNC(ADDI); +REC_FUNC(ADDIU); +REC_FUNC(ANDI); +REC_FUNC(ORI); +REC_FUNC(XORI); +REC_FUNC(SLTI); +REC_FUNC(SLTIU); +#endif + +#if 1 +static void recADDIU() { +// Rt = Rs + Im + if (!_Rt_) return; + +// iFlushRegs(); + + if (_Rs_ == _Rt_) { + if (IsConst(_Rt_)) { + iRegs[_Rt_].k+= _Imm_; + } else { + if (_Imm_ == 1) { + INC32M((uptr)&psxRegs.GPR.r[_Rt_]); + } else if (_Imm_ == -1) { + DEC32M((uptr)&psxRegs.GPR.r[_Rt_]); + } else if (_Imm_) { + ADD32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _Imm_); + } + } + } else { + if (IsConst(_Rs_)) { + MapConst(_Rt_, iRegs[_Rs_].k + _Imm_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_ == 1) { + INC32R(EAX); + } else if (_Imm_ == -1) { + DEC32R(EAX); + } else if (_Imm_) { + ADD32ItoR(EAX, _Imm_); + } + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } + } +} + +static void recADDI() { +// Rt = Rs + Im + recADDIU(); +} + +static void recSLTI() { +// Rt = Rs < Im (signed) + if (!_Rt_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + MapConst(_Rt_, (s32)iRegs[_Rs_].k < _Imm_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32ItoR(EAX, _Imm_); + SETL8R (EAX); + AND32ItoR(EAX, 0xff); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +} + +static void recSLTIU() { +// Rt = Rs < Im (unsigned) + if (!_Rt_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + MapConst(_Rt_, iRegs[_Rs_].k < _ImmU_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32ItoR(EAX, _Imm_); + SETB8R (EAX); + AND32ItoR(EAX, 0xff); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +} + +static void recANDI() { +// Rt = Rs And Im + if (!_Rt_) return; + +// iFlushRegs(); + + if (_Rs_ == _Rt_) { + if (IsConst(_Rt_)) { + iRegs[_Rt_].k&= _ImmU_; + } else { + AND32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_); + } + } else { + if (IsConst(_Rs_)) { + MapConst(_Rt_, iRegs[_Rs_].k & _ImmU_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + AND32ItoR(EAX, _ImmU_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } + } +} + +static void recORI() { +// Rt = Rs Or Im + if (!_Rt_) return; + +// iFlushRegs(); + + if (_Rs_ == _Rt_) { + if (IsConst(_Rt_)) { + iRegs[_Rt_].k|= _ImmU_; + } else { + OR32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_); + } + } else { + if (IsConst(_Rs_)) { + MapConst(_Rt_, iRegs[_Rs_].k | _ImmU_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_ImmU_) OR32ItoR (EAX, _ImmU_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } + } +} + +static void recXORI() { +// Rt = Rs Xor Im + if (!_Rt_) return; + +// iFlushRegs(); + + if (_Rs_ == _Rt_) { + if (IsConst(_Rt_)) { + iRegs[_Rt_].k^= _ImmU_; + } else { + XOR32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_); + } + } else { + if (IsConst(_Rs_)) { + MapConst(_Rt_, iRegs[_Rs_].k ^ _ImmU_); + } else { + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + XOR32ItoR(EAX, _ImmU_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } + } +} +#endif +//end of * Arithmetic with immediate operand + +/********************************************************* +* Load higher 16 bits of the first word in GPR with imm * +* Format: OP rt, immediate * +*********************************************************/ +//REC_FUNC(LUI); +#if 1 +static void recLUI() { +// Rt = Imm << 16 + if (!_Rt_) return; + + MapConst(_Rt_, psxRegs.code << 16); +} +#endif +//End of Load Higher ..... + + +/********************************************************* +* Register arithmetic * +* Format: OP rd, rs, rt * +*********************************************************/ + + +#if 0 +REC_FUNC(ADD); +REC_FUNC(ADDU); +REC_FUNC(SUB); +REC_FUNC(SUBU); +REC_FUNC(AND); +REC_FUNC(OR); +REC_FUNC(XOR); +REC_FUNC(NOR); +REC_FUNC(SLT); +REC_FUNC(SLTU); +#endif + +#if 1 +static void recADDU() { +// Rd = Rs + Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k + iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + if (_Rt_ == _Rd_) { + if (iRegs[_Rs_].k == 1) { + INC32M((uptr)&psxRegs.GPR.r[_Rd_]); + } else if (iRegs[_Rs_].k == -1) { + DEC32M((uptr)&psxRegs.GPR.r[_Rd_]); + } else if (iRegs[_Rs_].k) { + ADD32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rs_].k); + } + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + if (iRegs[_Rs_].k == 1) { + INC32R(EAX); + } else if (iRegs[_Rs_].k == 0xffffffff) { + DEC32R(EAX); + } else if (iRegs[_Rs_].k) { + ADD32ItoR(EAX, iRegs[_Rs_].k); + } + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + if (_Rs_ == _Rd_) { + if (iRegs[_Rt_].k == 1) { + INC32M((uptr)&psxRegs.GPR.r[_Rd_]); + } else if (iRegs[_Rt_].k == -1) { + DEC32M((uptr)&psxRegs.GPR.r[_Rd_]); + } else if (iRegs[_Rt_].k) { + ADD32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rt_].k); + } + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (iRegs[_Rt_].k == 1) { + INC32R(EAX); + } else if (iRegs[_Rt_].k == 0xffffffff) { + DEC32R(EAX); + } else if (iRegs[_Rt_].k) { + ADD32ItoR(EAX, iRegs[_Rt_].k); + } + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } else { + iRegs[_Rd_].state = ST_UNK; + + if (_Rs_ == _Rd_) { // Rd+= Rt + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + ADD32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (_Rt_ == _Rd_) { // Rd+= Rs + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + ADD32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { // Rd = Rs + Rt + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + ADD32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } +} + +static void recADD() { +// Rd = Rs + Rt + recADDU(); +} + +static void recSUBU() { +// Rd = Rs - Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k - iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + SUB32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + SUB32ItoR(EAX, iRegs[_Rt_].k); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + SUB32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSUB() { +// Rd = Rs - Rt + recSUBU(); +} + +static void recAND() { +// Rd = Rs And Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k & iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + if (_Rd_ == _Rt_) { // Rd&= Rs + AND32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rs_].k); + } else { + MOV32ItoR(EAX, iRegs[_Rs_].k); + AND32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + if (_Rd_ == _Rs_) { // Rd&= kRt + AND32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rt_].k); + } else { // Rd = Rs & kRt + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + AND32ItoR(EAX, iRegs[_Rt_].k); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } else { + iRegs[_Rd_].state = ST_UNK; + + if (_Rs_ == _Rd_) { // Rd&= Rt + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + AND32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (_Rt_ == _Rd_) { // Rd&= Rs + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + AND32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { // Rd = Rs & Rt + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + AND32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } + } +} + +static void recOR() { +// Rd = Rs Or Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k | iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + OR32ItoR (EAX, iRegs[_Rt_].k); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recXOR() { +// Rd = Rs Xor Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k ^ iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + XOR32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + XOR32ItoR(EAX, iRegs[_Rt_].k); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + XOR32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recNOR() { +// Rd = Rs Nor Rt + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, ~(iRegs[_Rs_].k | iRegs[_Rt_].k)); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + NOT32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + OR32ItoR (EAX, iRegs[_Rt_].k); + NOT32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + NOT32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSLT() { +// Rd = Rs < Rt (signed) + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, (s32)iRegs[_Rs_].k < (s32)iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + SETL8R (EAX); + AND32ItoR(EAX, 0xff); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32ItoR(EAX, iRegs[_Rt_].k); + SETL8R (EAX); + AND32ItoR(EAX, 0xff); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + SETL8R (EAX); + AND32ItoR(EAX, 0xff); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSLTU() { +// Rd = Rs < Rt (unsigned) + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rs_].k < iRegs[_Rt_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rs_].k); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + SBB32RtoR(EAX, EAX); + NEG32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32ItoR(EAX, iRegs[_Rt_].k); + SBB32RtoR(EAX, EAX); + NEG32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + SBB32RtoR(EAX, EAX); + NEG32R (EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} +#endif +//End of * Register arithmetic + +/********************************************************* +* Register mult/div & Register trap logic * +* Format: OP rs, rt * +*********************************************************/ + +#if 0 +REC_FUNC(MULT); +REC_FUNC(MULTU); +REC_FUNC(DIV); +REC_FUNC(DIVU); +#endif + +#if 1 +static void recMULT() { +// Lo/Hi = Rs * Rt (signed) + +// iFlushRegs(); + + if ((IsConst(_Rs_) && iRegs[_Rs_].k == 0) || + (IsConst(_Rt_) && iRegs[_Rt_].k == 0)) { + XOR32RtoR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX); + return; + } + + if (IsConst(_Rs_)) { + MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("multrsk %x\n", iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + } + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k);// printf("multrtk %x\n", iRegs[_Rt_].k); + IMUL32R (EDX); + } else { + IMUL32M ((uptr)&psxRegs.GPR.r[_Rt_]); + } + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX); +} + +static void recMULTU() { +// Lo/Hi = Rs * Rt (unsigned) + +// iFlushRegs(); + + if ((IsConst(_Rs_) && iRegs[_Rs_].k == 0) || + (IsConst(_Rt_) && iRegs[_Rt_].k == 0)) { + XOR32RtoR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX); + return; + } + + if (IsConst(_Rs_)) { + MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("multursk %x\n", iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + } + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k);// printf("multurtk %x\n", iRegs[_Rt_].k); + MUL32R (EDX); + } else { + MUL32M ((uptr)&psxRegs.GPR.r[_Rt_]); + } + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX); +} + +static void recDIV() { +// Lo/Hi = Rs / Rt (signed) + +// iFlushRegs(); + + if (IsConst(_Rt_)) { + if (iRegs[_Rt_].k == 0) return; + MOV32ItoR(ECX, iRegs[_Rt_].k);// printf("divrtk %x\n", iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + CMP32ItoR(ECX, 0); + j8Ptr[0] = JE8(0); + } + if (IsConst(_Rs_)) { + MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("divrsk %x\n", iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + } + CDQ(); + IDIV32R (ECX); + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX); + if (!IsConst(_Rt_)) { + x86SetJ8(j8Ptr[0]); + } +} + +static void recDIVU() { +// Lo/Hi = Rs / Rt (unsigned) + +// iFlushRegs(); + + if (IsConst(_Rt_)) { + if (iRegs[_Rt_].k == 0) return; + MOV32ItoR(ECX, iRegs[_Rt_].k);// printf("divurtk %x\n", iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + CMP32ItoR(ECX, 0); + j8Ptr[0] = JE8(0); + } + if (IsConst(_Rs_)) { + MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("divursk %x\n", iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + } + XOR32RtoR(EDX, EDX); + DIV32R (ECX); + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX); + if (!IsConst(_Rt_)) { + x86SetJ8(j8Ptr[0]); + } +} +#endif +//End of * Register mult/div & Register trap logic + +#if 0 +REC_FUNC(LB); +REC_FUNC(LBU); +REC_FUNC(LH); +REC_FUNC(LHU); +REC_FUNC(LW); + +REC_FUNC(SB); +REC_FUNC(SH); +REC_FUNC(SW); + +REC_FUNC(LWL); +REC_FUNC(LWR); +REC_FUNC(SWL); +REC_FUNC(SWR); +#endif + + +static void SetArg_OfB(x86IntRegType arg) { + if (IsConst(_Rs_)) +#ifdef __x86_64__ + MOV64ItoR(arg, iRegs[_Rs_].k + _Imm_); +#else + PUSH32I (iRegs[_Rs_].k + _Imm_); +#endif + else { +#ifdef __x86_64__ + MOV32MtoR(arg, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) + ADD32ItoR(arg, _Imm_); +#else + if (_Imm_) { + MOV32MtoR(EAX, (u32)&psxRegs.GPR.r[_Rs_]); + ADD32ItoR(EAX, _Imm_); + PUSH32R (EAX); + } else { + PUSH32M ((u32)&psxRegs.GPR.r[_Rs_]); + } +#endif + } +#ifndef __x86_64__ + resp += 4; +#endif +} + +#if 1 +static void recLB() { +// Rt = mem[Rs + Im] (signed) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + if (!_Rt_) return; + // since bios is readonly it won't change + MapConst(_Rt_, psxRs8(addr)); + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVSX32M8toR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVSX32M8toR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } +// SysPrintf("unhandled r8 %x\n", addr); + } + + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemRead8); + if (_Rt_) { + iRegs[_Rt_].state = ST_UNK; + MOVSX32R8toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +// ADD32ItoR(ESP, 4); +} + +static void recLBU() { +// Rt = mem[Rs + Im] (unsigned) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + if (!_Rt_) return; + // since bios is readonly it won't change + MapConst(_Rt_, psxRu8(addr)); + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVZX32M8toR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVZX32M8toR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } +// SysPrintf("unhandled r8u %x\n", addr); + } + + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemRead8); + if (_Rt_) { + iRegs[_Rt_].state = ST_UNK; + MOVZX32R8toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +// ADD32ItoR(ESP, 4); +} + +static void recLH() { +// Rt = mem[Rs + Im] (signed) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + if (!_Rt_) return; + // since bios is readonly it won't change + MapConst(_Rt_, psxRs16(addr)); + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVSX32M16toR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVSX32M16toR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } +// SysPrintf("unhandled r16 %x\n", addr); + } + + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemRead16); + if (_Rt_) { + iRegs[_Rt_].state = ST_UNK; + MOVSX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +// ADD32ItoR(ESP, 4); +} + +static void recLHU() { +// Rt = mem[Rs + Im] (unsigned) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + if (!_Rt_) return; + // since bios is readonly it won't change + MapConst(_Rt_, psxRu16(addr)); + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVZX32M16toR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOVZX32M16toR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80) { + if (addr >= 0x1f801c00 && addr < 0x1f801e00) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + //PUSHI (addr); + MOV64ItoR(X86ARG1, addr); + //CALLFunc ((uptr)SPU_readRegister); + MOV64ItoR(RAX, (uptr)SPU_readRegister); + CALL64R(RAX); + MOVZX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); +#ifndef __WIN32__ + resp+= 4; +#endif + return; + } + switch (addr) { + case 0x1f801100: case 0x1f801110: case 0x1f801120: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + //PUSHI((addr >> 4) & 0x3); + MOV64ItoR(X86ARG1, (addr >> 4) & 0x3); + CALLFunc((uptr)psxRcntRcount); + MOVZX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + resp+= 4; + return; + + case 0x1f801104: case 0x1f801114: case 0x1f801124: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOV64ItoR(X86ARG1, (addr >> 4) & 0x3); + CALLFunc((uptr)psxRcntRmode); + MOVZX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + resp+= 4; + return; + + case 0x1f801108: case 0x1f801118: case 0x1f801128: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOV64ItoR(X86ARG1, (addr >> 4) & 0x3); + CALLFunc((uptr)psxRcntRtarget); + MOVZX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + resp+= 4; + return; + } + } +// SysPrintf("unhandled r16u %x\n", addr); + } + + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemRead16); + if (_Rt_) { + iRegs[_Rt_].state = ST_UNK; + MOVZX32R16toR(EAX, EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +// ADD32ItoR(ESP, 4); +} + +static void recLW() { +// Rt = mem[Rs + Im] (unsigned) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + if (!_Rt_) return; + // since bios is readonly it won't change + MapConst(_Rt_, psxRu32(addr)); + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80) { + switch (addr) { + case 0x1f801080: case 0x1f801084: case 0x1f801088: + case 0x1f801090: case 0x1f801094: case 0x1f801098: + case 0x1f8010a0: case 0x1f8010a4: case 0x1f8010a8: + case 0x1f8010b0: case 0x1f8010b4: case 0x1f8010b8: + case 0x1f8010c0: case 0x1f8010c4: case 0x1f8010c8: + case 0x1f8010d0: case 0x1f8010d4: case 0x1f8010d8: + case 0x1f8010e0: case 0x1f8010e4: case 0x1f8010e8: + case 0x1f801070: case 0x1f801074: + case 0x1f8010f0: case 0x1f8010f4: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + + case 0x1f801810: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + CALLFunc((uptr)GPU_readData); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + + case 0x1f801814: + if (!_Rt_) return; + iRegs[_Rt_].state = ST_UNK; + + CALLFunc((uptr)GPU_readStatus); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + } +// SysPrintf("unhandled r32 %x\n", addr); + } + + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemRead32); + if (_Rt_) { + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + } +// ADD32ItoR(ESP, 4); +} + +extern u32 LWL_MASK[4]; +extern u32 LWL_SHIFT[4]; + +void iLWLk(u32 shift) { + if (IsConst(_Rt_)) { + MOV32ItoR(ECX, iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + AND32ItoR(ECX, LWL_MASK[shift]); + SHL32ItoR(EAX, LWL_SHIFT[shift]); + OR32RtoR (EAX, ECX); +} + +void recLWL() { +// Rt = Rt Merge mem[Rs + Im] + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]); + iLWLk(addr & 3); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]); + iLWLk(addr & 3); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + } + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + //PUSH64R (EAX); + AND32ItoR(EAX, ~3); + //PUSH64R (EAX); + MOV32RtoR(X86ARG1, EAX); + CALLFunc((uptr)psxMemRead32); + + if (_Rt_) { + //ADD32ItoR(ESP, 4); + //POP64R (EDX); + if (IsConst(_Rs_)) MOV32ItoR(EDX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EDX, _Imm_); + } + + AND32ItoR(EDX, 0x3); // shift = addr & 3; + + MOV64ItoR(ECX, (uptr)LWL_SHIFT); + MOV32RmStoR(ECX, ECX, EDX, 2); + SHL32CLtoR(EAX); // mem(EAX) << LWL_SHIFT[shift] + + MOV64ItoR(ECX, (uptr)LWL_MASK); + MOV32RmStoR(ECX, ECX, EDX, 2); + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k); + } else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + AND32RtoR(EDX, ECX); // _rRt_ & LWL_MASK[shift] + + OR32RtoR(EAX, EDX); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + //} else { + //ADD64ItoR(RSP, 8); + //resp+= 8; + } +} + +/* +static void recLWBlock(int count) { + u32 *code = PSXM(pc); + int i, respsave; +// Rt = mem[Rs + Im] (unsigned) + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0xfff0) == 0xbfc0) { + // since bios is readonly it won't change + for (i=0; i<count; i++, code++, addr+=4) { + if (_fRt_(*code)) { + MapConst(_fRt_(*code), psxRu32(addr)); + } + } + return; + } + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + for (i=0; i<count; i++, code++, addr+=4) { + if (!_fRt_(*code)) return; + iRegs[_fRt_(*code)].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1fffff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX); + } + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + for (i=0; i<count; i++, code++, addr+=4) { + if (!_fRt_(*code)) return; + iRegs[_fRt_(*code)].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX); + } + return; + } + } + + SysPrintf("recLWBlock %d: %d\n", count, IsConst(_Rs_)); + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemPointer); +// ADD32ItoR(ESP, 4); + + respsave = resp; resp = 0; + TEST64RtoR(RAX,RAX); + j32Ptr[4] = JZ32(0); + XOR32RtoR(ECX, ECX); + for (i=0; i<count; i++, code++) { + if (_fRt_(*code)) { + iRegs[_fRt_(*code)].state = ST_UNK; + + MOV64RmStoR(EDX, EAX, ECX, 2); + MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EDX); + } + if (i != (count-1)) INC32R(ECX); + } + j32Ptr[5] = JMP32(0); + x86SetJ32(j32Ptr[4]); + for (i=0, code = PSXM(pc); i<count; i++, code++) { + psxRegs.code = *code; + recLW(); + } +#ifndef __x86_64__ + ADD32ItoR(ESP, resp); +#endif + x86SetJ32(j32Ptr[5]); + resp = respsave; +} +*/ + +extern u32 LWR_MASK[4]; +extern u32 LWR_SHIFT[4]; + +void iLWRk(u32 shift) { + if (IsConst(_Rt_)) { + MOV32ItoR(ECX, iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + AND32ItoR(ECX, LWR_MASK[shift]); + SHR32ItoR(EAX, LWR_SHIFT[shift]); + OR32RtoR (EAX, ECX); +} + +void recLWR() { +// Rt = Rt Merge mem[Rs + Im] + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]); + iLWRk(addr & 3); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]); + iLWRk(addr & 3); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + return; + } + } + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + PUSHR(EAX); + AND32ItoR(EAX, ~3); + MOV32RtoR(X86ARG1, EAX); + CALLFunc((uptr)psxMemRead32); + + POPR (EDX); + if (_Rt_) { + AND32ItoR(EDX, 0x3); // shift = addr & 3; + + MOV64ItoR(ECX, (uptr)LWR_SHIFT); + MOV32RmStoR(ECX, ECX, EDX, 2); + SHR32CLtoR(EAX); // mem(EAX) >> LWR_SHIFT[shift] + + MOV64ItoR(ECX, (uptr)LWR_MASK); + MOV32RmStoR(ECX, ECX, EDX, 2); + + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k); + } else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + AND32RtoR(EDX, ECX); // _rRt_ & LWR_MASK[shift] + + OR32RtoR(EAX, EDX); + + iRegs[_Rt_].state = ST_UNK; + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); + //} else { + //resp+= 8; + } +} + +static void recSB() { +// mem[Rs + Im] = Rt + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (IsConst(_Rt_)) { + MOV8ItoM((uptr)&psxM[addr & 0x1fffff], (u8)iRegs[_Rt_].k); + } else { + MOV8MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV8RtoM((uptr)&psxM[addr & 0x1fffff], EAX); + } + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (IsConst(_Rt_)) { + MOV8ItoM((uptr)&psxH[addr & 0xfff], (u8)iRegs[_Rt_].k); + } else { + MOV8MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV8RtoM((uptr)&psxH[addr & 0xfff], EAX); + } + return; + } +// SysPrintf("unhandled w8 %x\n", addr); + } + + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG2, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemWrite8); +// ADD32ItoR(ESP, 8); +} + +static void recSH() { +// mem[Rs + Im] = Rt + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (IsConst(_Rt_)) { + MOV16ItoM((uptr)&psxM[addr & 0x1fffff], (u16)iRegs[_Rt_].k); + } else { + MOV16MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV16RtoM((uptr)&psxM[addr & 0x1fffff], EAX); + } + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (IsConst(_Rt_)) { + MOV16ItoM((uptr)&psxH[addr & 0xfff], (u16)iRegs[_Rt_].k); + } else { + MOV16MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV16RtoM((uptr)&psxH[addr & 0xfff], EAX); + } + return; + } + if (t == 0x1f80) { + if (addr >= 0x1f801c00 && addr < 0x1f801e00) { + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG2, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]); + } + MOV64ItoR(X86ARG1, addr); + CALLFunc ((uptr)SPU_writeRegister); +#ifndef __WIN32__ + //resp+= 8; +#endif + return; + } + } +// SysPrintf("unhandled w16 %x\n", addr); + } + + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG2, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemWrite16); +// ADD32ItoR(ESP, 8); +} + +static void recSW() { +// mem[Rs + Im] = Rt + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + if (IsConst(_Rt_)) { + MOV32ItoM((uptr)&psxM[addr & 0x1fffff], iRegs[_Rt_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxM[addr & 0x1fffff], EAX); + } + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + if (IsConst(_Rt_)) { + MOV32ItoM((uptr)&psxH[addr & 0xfff], iRegs[_Rt_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxH[addr & 0xfff], EAX); + } + return; + } + if (t == 0x1f80) { + switch (addr) { + case 0x1f801080: case 0x1f801084: + case 0x1f801090: case 0x1f801094: + case 0x1f8010a0: case 0x1f8010a4: + case 0x1f8010b0: case 0x1f8010b4: + case 0x1f8010c0: case 0x1f8010c4: + case 0x1f8010d0: case 0x1f8010d4: + case 0x1f8010e0: case 0x1f8010e4: + case 0x1f801074: + case 0x1f8010f0: + if (IsConst(_Rt_)) { + MOV32ItoM((uptr)&psxH[addr & 0xffff], iRegs[_Rt_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32RtoM((uptr)&psxH[addr & 0xffff], EAX); + } + return; + + case 0x1f801810: + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG1, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rt_]); + } + CALLFunc((uptr)GPU_writeData); +#ifndef __WIN32__ + //resp+= 4; +#endif + return; + + case 0x1f801814: + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG1, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rt_]); + } + CALLFunc((uptr)GPU_writeStatus); +#ifndef __WIN32__ + //resp+= 4; +#endif + } + } +// SysPrintf("unhandled w32 %x\n", addr); + } + + if (IsConst(_Rt_)) { + MOV64ItoR(X86ARG2, iRegs[_Rt_].k); + } else { + MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemWrite32); +// ADD32ItoR(ESP, 8); + //resp+= 8; +} + +/* +static void recSWBlock(int count) { + u32 *code; + int i, respsave; +// mem[Rs + Im] = Rt + +// iFlushRegs(); + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + code = PSXM(pc); + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + for (i=0; i<count; i++, code++, addr+=4) { + if (IsConst(_fRt_(*code))) { + MOV32ItoM((uptr)&psxM[addr & 0x1fffff], iRegs[_fRt_(*code)].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_fRt_(*code)]); + MOV32RtoM((uptr)&psxM[addr & 0x1fffff], EAX); + } + } + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + for (i=0; i<count; i++, code++, addr+=4) { + if (!_fRt_(*code)) return; + iRegs[_fRt_(*code)].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX); + } + return; + } + } + + SysPrintf("recSWBlock %d: %d\n", count, IsConst(_Rs_)); + SetArg_OfB(X86ARG1); + CALLFunc((uptr)psxMemPointer); +// ADD32ItoR(ESP, 4); + //resp+= 4; + + respsave = resp; resp = 0; + TEST64RtoR(RAX,RAX); + j32Ptr[4] = JZ32(0); + XOR32RtoR(ECX, ECX); + for (i=0, code = PSXM(pc); i<count; i++, code++) { + if (IsConst(_fRt_(*code))) { + MOV32ItoR(EDX, iRegs[_fRt_(*code)].k); + } else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_fRt_(*code)]); + } + MOV32RtoRmS(EAX, ECX, 2, EDX); + if (i != (count-1)) INC32R(ECX); + } + j32Ptr[5] = JMP32(0); + x86SetJ32(j32Ptr[4]); + for (i=0, code = PSXM(pc); i<count; i++, code++) { + psxRegs.code = *code; + recSW(); + } + //ADD32ItoR(ESP, resp); + x86SetJ32(j32Ptr[5]); + resp = respsave; +} +*/ + +extern u32 SWL_MASK[4]; +extern u32 SWL_SHIFT[4]; + +void iSWLk(u32 shift) { + if (IsConst(_Rt_)) { + MOV32ItoR(ECX, iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SHR32ItoR(ECX, SWL_SHIFT[shift]); + AND32ItoR(EAX, SWL_MASK[shift]); + OR32RtoR (EAX, ECX); +} + +void recSWL() { +// mem[Rs + Im] = Rt Merge mem[Rs + Im] + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]); + iSWLk(addr & 3); + MOV32RtoM((uptr)&psxM[addr & 0x1ffffc], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]); + iSWLk(addr & 3); + MOV32RtoM((uptr)&psxH[addr & 0xffc], EAX); + return; + } + } + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + PUSHR (EAX); + AND32ItoR(EAX, ~3); + MOV32RtoR(X86ARG1, EAX); + + CALLFunc((uptr)psxMemRead32); + + POPR (EDX); + AND32ItoR(EDX, 0x3); // shift = addr & 3; + + MOV64ItoR(ECX, (uptr)SWL_MASK); + MOV32RmStoR(ECX, ECX, EDX, 2); + AND32RtoR(EAX, ECX); // mem & SWL_MASK[shift] + + MOV64ItoR(ECX, (uptr)SWL_SHIFT); + MOV32RmStoR(ECX, ECX, EDX, 2); + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k); + } else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SHR32CLtoR(EDX); // _rRt_ >> SWL_SHIFT[shift] + + OR32RtoR (EAX, EDX); + MOV32RtoR(X86ARG2, EAX); + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + AND32ItoR(EAX, ~3); + MOV32RtoR(X86ARG1, EAX); + + CALLFunc((uptr)psxMemWrite32); +// ADD32ItoR(ESP, 8); + //resp+= 8; +} + +extern u32 SWR_MASK[4]; +extern u32 SWR_SHIFT[4]; + +void iSWRk(u32 shift) { + if (IsConst(_Rt_)) { + MOV32ItoR(ECX, iRegs[_Rt_].k); + } else { + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SHL32ItoR(ECX, SWR_SHIFT[shift]); + AND32ItoR(EAX, SWR_MASK[shift]); + OR32RtoR (EAX, ECX); +} + +void recSWR() { +// mem[Rs + Im] = Rt Merge mem[Rs + Im] + + if (IsConst(_Rs_)) { + u32 addr = iRegs[_Rs_].k + _Imm_; + int t = addr >> 16; + + if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) { + MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]); + iSWRk(addr & 3); + MOV32RtoM((uptr)&psxM[addr & 0x1ffffc], EAX); + return; + } + if (t == 0x1f80 && addr < 0x1f801000) { + MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]); + iSWRk(addr & 3); + MOV32RtoM((uptr)&psxH[addr & 0xffc], EAX); + return; + } + } + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + PUSHR (EAX); + + AND32ItoR(EAX, ~3); + MOV32RtoR(X86ARG1, EAX); + + CALLFunc((uptr)psxMemRead32); + + POPR (EDX); + AND32ItoR(EDX, 0x3); // shift = addr & 3; + + MOV64ItoR(ECX, (uptr)SWR_MASK); + MOV32RmStoR(ECX, ECX, EDX, 2); + AND32RtoR(EAX, ECX); // mem & SWR_MASK[shift] + + MOV64ItoR(ECX, (uptr)SWR_SHIFT); + MOV32RmStoR(ECX, ECX, EDX, 2); + if (IsConst(_Rt_)) { + MOV32ItoR(EDX, iRegs[_Rt_].k); + } else { + MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + SHL32CLtoR(EDX); // _rRt_ << SWR_SHIFT[shift] + + OR32RtoR (EAX, EDX); + MOV32RtoR(X86ARG2, EAX); + + if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_); + else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + if (_Imm_) ADD32ItoR(EAX, _Imm_); + } + AND32ItoR(EAX, ~3); + MOV32RtoR(X86ARG1, EAX); + CALLFunc((uptr)psxMemWrite32); +// ADD32ItoR(ESP, 8); + //resp+= 8; +} + +#endif + +#if 0 +REC_FUNC(SLL); +REC_FUNC(SRL); +REC_FUNC(SRA); +#endif +#if 1 +static void recSLL() { +// Rd = Rt << Sa + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rt_].k << _Sa_); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + if (_Sa_) SHL32ItoR(EAX, _Sa_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSRL() { +// Rd = Rt >> Sa + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_)) { + MapConst(_Rd_, iRegs[_Rt_].k >> _Sa_); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + if (_Sa_) SHR32ItoR(EAX, _Sa_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSRA() { +// Rd = Rt >> Sa + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_)) { + MapConst(_Rd_, (s32)iRegs[_Rt_].k >> _Sa_); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + if (_Sa_) SAR32ItoR(EAX, _Sa_); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} +#endif + +#if 0 +REC_FUNC(SLLV); +REC_FUNC(SRLV); +REC_FUNC(SRAV); +#endif + +#if 1 +static void recSLLV() { +// Rd = Rt << Rs + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_) && IsConst(_Rs_)) { + MapConst(_Rd_, iRegs[_Rt_].k << iRegs[_Rs_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32ItoR(ECX, iRegs[_Rs_].k); + SHL32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rt_].k); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SHL32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SHL32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSRLV() { +// Rd = Rt >> Rs + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_) && IsConst(_Rs_)) { + MapConst(_Rd_, iRegs[_Rt_].k >> iRegs[_Rs_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32ItoR(ECX, iRegs[_Rs_].k); + SHR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rt_].k); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SHR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SHR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} + +static void recSRAV() { +// Rd = Rt >> Rs + if (!_Rd_) return; + +// iFlushRegs(); + + if (IsConst(_Rt_) && IsConst(_Rs_)) { + MapConst(_Rd_, (s32)iRegs[_Rt_].k >> iRegs[_Rs_].k); + } else if (IsConst(_Rs_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32ItoR(ECX, iRegs[_Rs_].k); + SAR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else if (IsConst(_Rt_)) { + iRegs[_Rd_].state = ST_UNK; + + MOV32ItoR(EAX, iRegs[_Rt_].k); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SAR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } else { + iRegs[_Rd_].state = ST_UNK; + + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]); + SAR32CLtoR(EAX); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); + } +} +#endif + +#if 0 +REC_SYS(SYSCALL); +REC_SYS(BREAK); +#endif + +int dump = 0; + +#if 1 +static void recSYSCALL() { +// dump=1; + iFlushRegs(); + + MOV32ItoR(EAX, pc - 4); + MOV32RtoM((uptr)&psxRegs.pc, EAX); + MOV64ItoR(X86ARG2, branch == 1 ? 1 : 0); + MOV64ItoR(X86ARG1, 0x20); + CALLFunc((uptr)psxException); + //ADD32ItoR(ESP, 8); + + branch = 2; + iRet(); +} + +static void recBREAK() { +} +#endif + +#if 0 +REC_FUNC(MFHI); +REC_FUNC(MTHI); +REC_FUNC(MFLO); +REC_FUNC(MTLO); +#endif +#if 1 +static void recMFHI() { +// Rd = Hi + if (!_Rd_) return; + + iRegs[_Rd_].state = ST_UNK; + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.n.hi); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); +} + +static void recMTHI() { +// Hi = Rs + + if (IsConst(_Rs_)) { + MOV32ItoM((uptr)&psxRegs.GPR.n.hi, iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX); + } +} + +static void recMFLO() { +// Rd = Lo + if (!_Rd_) return; + + iRegs[_Rd_].state = ST_UNK; + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.n.lo); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX); +} + +static void recMTLO() { +// Lo = Rs + + if (IsConst(_Rs_)) { + MOV32ItoM((uptr)&psxRegs.GPR.n.lo, iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX); + } +} +#endif + +#if 0 +REC_BRANCH(J); +REC_BRANCH(JR); +REC_BRANCH(JAL); +REC_BRANCH(JALR); +REC_BRANCH(BLTZ); +REC_BRANCH(BGTZ); +REC_BRANCH(BLTZAL); +REC_BRANCH(BGEZAL); +REC_BRANCH(BNE); +REC_BRANCH(BEQ); +REC_BRANCH(BLEZ); +REC_BRANCH(BGEZ); +#endif +#if 1 +static void recBLTZ() { +// Branch if Rs < 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k < 0) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JL32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; +} + +static void recBGTZ() { +// Branch if Rs > 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k > 0) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JG32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; +} + +static void recBLTZAL() { +// Branch if Rs < 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k < 0) { + MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4); + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JL32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4); + iBranch(bpc, 0); + pc+=4; +} + +static void recBGEZAL() { +// Branch if Rs >= 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k >= 0) { + MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4); + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JGE32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4); + iBranch(bpc, 0); + pc+=4; +} + +static void recJ() { +// j target + + iJump(_Target_ * 4 + (pc & 0xf0000000)); +} + +static void recJAL() { +// jal target + + MapConst(31, pc + 4); + + iJump(_Target_ * 4 + (pc & 0xf0000000)); +} + +static void recJR() { +// jr Rs + + if (IsConst(_Rs_)) { + MOV32ItoM((uptr)&target, iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + MOV32RtoM((uptr)&target, EAX); + } + + SetBranch(); +} + +static void recJALR() { +// jalr Rs + + if (IsConst(_Rs_)) { + MOV32ItoM((uptr)&target, iRegs[_Rs_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + MOV32RtoM((uptr)&target, EAX); + } + + if (_Rd_) { + MapConst(_Rd_, pc + 4); + } + + SetBranch(); +} + +static void recBEQ() { +// Branch if Rs == Rt + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (_Rs_ == _Rt_) { + iJump(bpc); + } else { + if (IsConst(_Rs_) && IsConst(_Rt_)) { + if (iRegs[_Rs_].k == iRegs[_Rt_].k) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } else if (IsConst(_Rs_)) { + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rt_], iRegs[_Rs_].k); + } else if (IsConst(_Rt_)) { + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], iRegs[_Rt_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + + j32Ptr[4] = JE32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; + } +} + +static void recBNE() { +// Branch if Rs != Rt + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_) && IsConst(_Rt_)) { + if (iRegs[_Rs_].k != iRegs[_Rt_].k) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } else if (IsConst(_Rs_)) { + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rt_], iRegs[_Rs_].k); + } else if (IsConst(_Rt_)) { + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], iRegs[_Rt_].k); + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]); + CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + } + j32Ptr[4] = JNE32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; +} + +static void recBLEZ() { +// Branch if Rs <= 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k <= 0) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JLE32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; +} + +static void recBGEZ() { +// Branch if Rs >= 0 + u32 bpc = _Imm_ * 4 + pc; + +// iFlushRegs(); + if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) { + return; + } + + if (IsConst(_Rs_)) { + if ((s32)iRegs[_Rs_].k >= 0) { + iJump(bpc); return; + } else { + iJump(pc+4); return; + } + } + + CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); + j32Ptr[4] = JGE32(0); + + iBranch(pc+4, 1); + + x86SetJ32(j32Ptr[4]); + + iBranch(bpc, 0); + pc+=4; +} +#endif + +#if 0 +REC_FUNC(MFC0); +REC_SYS(MTC0); +REC_FUNC(CFC0); +REC_SYS(CTC0); +REC_FUNC(RFE); +#endif +//REC_SYS(MTC0); +#if 1 +static void recMFC0() { +// Rt = Cop0->Rd + if (!_Rt_) return; + + iRegs[_Rt_].state = ST_UNK; + MOV32MtoR(EAX, (uptr)&psxRegs.CP0.r[_Rd_]); + MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX); +} + +static void recCFC0() { +// Rt = Cop0->Rd + + recMFC0(); +} + +//* +void psxMTC0(); +static void recMTC0() { +// Cop0->Rd = Rt + + if (IsConst(_Rt_)) { + switch (_Rd_) { + case 12: + MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k); + break; + case 13: + MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k & ~(0xfc00)); + break; + default: + MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k); + break; + } + } else { + MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]); + switch (_Rd_) { + case 13: + AND32ItoR(EAX, ~(0xfc00)); + break; + } + MOV32RtoM((uptr)&psxRegs.CP0.r[_Rd_], EAX); + } + + if (_Rd_ == 12 || _Rd_ == 13) { + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); + CALLFunc((uptr)psxTestSWInts); + if (branch == 0) { + branch = 2; + iRet(); + } + } +}//*/ + +static void recCTC0() { +// Cop0->Rd = Rt + + recMTC0(); +} + +static void recRFE() { + MOV32MtoR(EAX, (uptr)&psxRegs.CP0.n.Status); + MOV32RtoR(ECX, EAX); + AND32ItoR(EAX, 0xfffffff0); + AND32ItoR(ECX, 0x3c); + SHR32ItoR(ECX, 2); + OR32RtoR (EAX, ECX); + MOV32RtoM((uptr)&psxRegs.CP0.n.Status, EAX); + + iFlushRegs(); + MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); + CALLFunc((uptr)psxTestSWInts); + if (branch == 0) { + branch = 2; + iRet(); + } +} +#endif + +#include "iGte.h" + +// + +static void recHLE() { + iFlushRegs(); + + CALLFunc((uptr)psxHLEt[psxRegs.code & 0xffff]); + branch = 2; + iRet(); +} + +// + +static void (*recBSC[64])() = { + recSPECIAL, recREGIMM, recJ , recJAL , recBEQ , recBNE , recBLEZ, recBGTZ, + recADDI , recADDIU , recSLTI, recSLTIU, recANDI, recORI , recXORI, recLUI , + recCOP0 , recNULL , recCOP2, recNULL , recNULL, recNULL, recNULL, recNULL, + recNULL , recNULL , recNULL, recNULL , recNULL, recNULL, recNULL, recNULL, + recLB , recLH , recLWL , recLW , recLBU , recLHU , recLWR , recNULL, + recSB , recSH , recSWL , recSW , recNULL, recNULL, recSWR , recNULL, + recNULL , recNULL , recLWC2, recNULL , recNULL, recNULL, recNULL, recNULL, + recNULL , recNULL , recSWC2, recHLE , recNULL, recNULL, recNULL, recNULL +}; + +static void (*recSPC[64])() = { + recSLL , recNULL, recSRL , recSRA , recSLLV , recNULL , recSRLV, recSRAV, + recJR , recJALR, recNULL, recNULL, recSYSCALL, recBREAK, recNULL, recNULL, + recMFHI, recMTHI, recMFLO, recMTLO, recNULL , recNULL , recNULL, recNULL, + recMULT, recMULTU, recDIV, recDIVU, recNULL , recNULL , recNULL, recNULL, + recADD , recADDU, recSUB , recSUBU, recAND , recOR , recXOR , recNOR , + recNULL, recNULL, recSLT , recSLTU, recNULL , recNULL , recNULL, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL , recNULL , recNULL, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL , recNULL , recNULL, recNULL +}; + +static void (*recREG[32])() = { + recBLTZ , recBGEZ , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recNULL , recNULL , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recBLTZAL, recBGEZAL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recNULL , recNULL , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL +}; + +static void (*recCP0[32])() = { + recMFC0, recNULL, recCFC0, recNULL, recMTC0, recNULL, recCTC0, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recRFE , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL +}; + +static void (*recCP2[64])() = { + recBASIC, recRTPS , recNULL , recNULL, recNULL, recNULL , recNCLIP, recNULL, // 00 + recNULL , recNULL , recNULL , recNULL, recOP , recNULL , recNULL , recNULL, // 08 + recDPCS , recINTPL, recMVMVA, recNCDS, recCDP , recNULL , recNCDT , recNULL, // 10 + recNULL , recNULL , recNULL , recNCCS, recCC , recNULL , recNCS , recNULL, // 18 + recNCT , recNULL , recNULL , recNULL, recNULL, recNULL , recNULL , recNULL, // 20 + recSQR , recDCPL , recDPCT , recNULL, recNULL, recAVSZ3, recAVSZ4, recNULL, // 28 + recRTPT , recNULL , recNULL , recNULL, recNULL, recNULL , recNULL , recNULL, // 30 + recNULL , recNULL , recNULL , recNULL, recNULL, recGPF , recGPL , recNCCT // 38 +}; + +static void (*recCP2BSC[32])() = { + recMFC2, recNULL, recCFC2, recNULL, recMTC2, recNULL, recCTC2, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, + recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL +}; + + +static void recRecompile() { + char *p; + char *ptr; + + dump = 0; + resp = 0; + + /* if x86Ptr reached the mem limit reset whole mem */ + if (((uptr)x86Ptr - (uptr)recMem) >= (RECMEM_SIZE - PTRMULT*0x10000)) + recReset(); + + x86Align(32); + ptr = x86Ptr; + + PC_RECP(psxRegs.pc) = x86Ptr; + pc = psxRegs.pc; + pcold = pc; + + //Make some stack space for function arguments spill (x86-64 calling conventions) + // 0x38 = 7 args, should be plenty... + SUB64ItoR(RSP, STACKSIZE); + + for (count=0; count<500;) { + p = (char *)PSXM(pc); + if (p == NULL) recError(); + psxRegs.code = *(u32 *)p; +/* + if ((psxRegs.code >> 26) == 0x23) { // LW + int i; + u32 code; + + for (i=1;; i++) { + p = (char *)PSXM(pc+i*4); + if (p == NULL) recError(); + code = *(u32 *)p; + + if ((code >> 26) != 0x23 || + _fRs_(code) != _Rs_ || + _fImm_(code) != (_Imm_+i*4)) + break; + } + if (i > 1) { + recLWBlock(i); + pc = pc + i*4; continue; + } + } + + if ((psxRegs.code >> 26) == 0x2b) { // SW + int i; + u32 code; + + for (i=1;; i++) { + p = (char *)PSXM(pc+i*4); + if (p == NULL) recError(); + code = *(u32 *)p; + + if ((code >> 26) != 0x2b || + _fRs_(code) != _Rs_ || + _fImm_(code) != (_Imm_+i*4)) + break; + } + if (i > 1) { + recSWBlock(i); + pc = pc + i*4; continue; + } + }*/ + + pc+=4; count++; + recBSC[psxRegs.code>>26](); + + if (branch) { + branch = 0; + if (dump) iDumpBlock(ptr); + return; + } + } + + iFlushRegs(); + + MOV32ItoM((uptr)&psxRegs.pc, pc); + iRet(); +} + + +R3000Acpu psxRec = { + recInit, + recReset, + recExecute, + recExecuteBlock, + recClear, + recShutdown +}; diff --git a/libpcsxcore/ix86_64/ix86-64.c b/libpcsxcore/ix86_64/ix86-64.c new file mode 100644 index 0000000..0582f35 --- /dev/null +++ b/libpcsxcore/ix86_64/ix86-64.c @@ -0,0 +1,3139 @@ +/* + * ix86 core v0.6.2 + * Authors: linuzappz <linuzappz@pcsx.net> + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + */ + +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include "ix86-64.h" + +#ifdef __x86_64__ + +#ifdef _MSC_VER +// visual studio calling convention +x86IntRegType g_x86savedregs[] = { RBX, RBP, RSI, RDI, R12, R13, R14, R15 }; +x86IntRegType g_x86tempregs[] = { R8, R9, R10, R11, RDX, RCX }; + +// arranged in savedreg -> tempreg order +x86IntRegType g_x86allregs[14] = { RBX, RBP, RSI, RDI, R12, R13, R14, R15, R8, R9, R10, R11, RDX, RCX }; + +#else +// standard calling convention + +// registers saved by called functions (no need to flush them across calls) +x86IntRegType g_x86savedregs[] = { RBX, RBP, R12, R13, R14, R15 }; +// temp registers that need to be saved across calls +x86IntRegType g_x86tempregs[] = { RCX, RDX, R8, R9, R10, R11, RSI, RDI }; + +// arranged in savedreg -> tempreg order +x86IntRegType g_x86allregs[14] = { RBX, RBP, R12, R13, R14, R15, RCX, RDX, R8, R9, R10, R11, RSI, RDI }; + +#endif + +x86IntRegType g_x868bitregs[11] = { RBX, R12, R13, R14, R15, RCX, RDX, R8, R9, R10, R11 }; +x86IntRegType g_x86non8bitregs[3] = { RBP, RSI, RDI }; + +#endif // __x86_64__ + +s8 *x86Ptr; +u8 *j8Ptr[32]; +u32 *j32Ptr[32]; + +void WriteRmOffset(x86IntRegType to, int offset) +{ + if( (to&7) == ESP ) { + if( offset == 0 ) { + ModRM( 0, 0, 4 ); + ModRM( 0, ESP, 4 ); + } + else if( offset < 128 && offset >= -128 ) { + ModRM( 1, 0, 4 ); + ModRM( 0, ESP, 4 ); + write8(offset); + } + else { + ModRM( 2, 0, 4 ); + ModRM( 0, ESP, 4 ); + write32(offset); + } + } + else { + if( offset == 0 ) { + ModRM( 0, 0, to ); + } + else if( offset < 128 && offset >= -128 ) { + ModRM( 1, 0, to ); + write8(offset); + } + else { + ModRM( 2, 0, to ); + write32(offset); + } + } +} + +void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) +{ + if ((from&7) == ESP) { + if( offset == 0 ) { + ModRM( 0, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + } + else if( offset < 128 && offset >= -128 ) { + ModRM( 1, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write8(offset); + } + else { + ModRM( 2, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write32(offset); + } + } + else { + if( offset == 0 ) { + ModRM( 0, to, from ); + } + else if( offset < 128 && offset >= -128 ) { + ModRM( 1, to, from ); + write8(offset); + } + else { + ModRM( 2, to, from ); + write32(offset); + } + } +} + +// This function is just for rec debugging purposes +void CheckX86Ptr( void ) +{ +} + +void writeVAROP(unsigned opl, u64 op) +{ + while (opl--) + { + write8(op & 0xFF); + op >>= 8; + } +} + +#define writeVARROP(REX, opl, op) ({ \ + if (opl > 1 && ((op & 0xFF) == 0x66 || (op & 0xFF) == 0xF3 || (op & 0xFF) == 0xF2)) { \ + write8(op & 0xFF); \ + opl --; \ + op >>= 8; \ + } \ + REX; \ + writeVAROP(opl, op); \ + }) + +void MEMADDR_OP(bool w, unsigned opl, u64 op, bool isreg, int reg, uptr p, sptr off) +{ +#ifdef __x86_64__ + sptr pr = MEMADDR_(p, 5 + opl + (w || reg >= 8) + off); + if (SPTR32(pr)) + { + writeVARROP(RexR(w, reg), opl, op); + ModRM(0, reg, DISP32); + write32(pr); + } + else if (UPTR32(p)) + { + writeVARROP(RexR(w, reg), opl, op); + ModRM(0, reg, SIB); + SibSB(0, SIB, DISP32); + write32(p); + } + else + { + assert(!isreg || reg != X86_TEMP); + MOV64ItoR(X86_TEMP, p); + writeVARROP(RexRB(w, reg, X86_TEMP), opl, op); + ModRM(0, reg, X86_TEMP); + } +#else + writeVARROP(RexR(w, reg), opl, op); + ModRM(0, reg, DISP32); + write32(p); +#endif +} + +void SET8R( int cc, int to ) +{ + RexB(0, to); + write8( 0x0F ); + write8( cc ); + write8( 0xC0 | ( to ) ); +} + +u8* J8Rel( int cc, int to ) +{ + write8( cc ); + write8( to ); + return x86Ptr - 1; +} + +u16* J16Rel( int cc, u32 to ) +{ + write16( 0x0F66 ); + write8( cc ); + write16( to ); + return (u16*)( x86Ptr - 2 ); +} + +u32* J32Rel( int cc, u32 to ) +{ + write8( 0x0F ); + write8( cc ); + write32( to ); + return (u32*)( x86Ptr - 4 ); +} + +void CMOV32RtoR( int cc, int to, int from ) +{ + RexRB(0,to, from); + write8( 0x0F ); + write8( cc ); + ModRM( 3, to, from ); +} + +void CMOV32MtoR( int cc, x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, cc), true, to, from, 0); +} + +//////////////////////////////////////////////////// +void x86SetPtr( char* ptr ) +{ + x86Ptr = ptr; +} + +//////////////////////////////////////////////////// +void x86Shutdown( void ) +{ +} + +//////////////////////////////////////////////////// +void x86SetJ8( u8* j8 ) +{ + u32 jump = ( x86Ptr - (s8*)j8 ) - 1; + + if ( jump > 0x7f ) { + assert(0); + SysPrintf( "j8 greater than 0x7f!!\n" ); + } + *j8 = (u8)jump; +} + +void x86SetJ8A( u8* j8 ) +{ + u32 jump = ( x86Ptr - (s8*)j8 ) - 1; + + if ( jump > 0x7f ) { + assert(0); + SysPrintf( "j8 greater than 0x7f!!\n" ); + } + + if( ((uptr)x86Ptr&0xf) > 4 ) { + + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); + + if( newjump <= 0x7f ) { + jump = newjump; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + } + *j8 = (u8)jump; +} + +void x86SetJ16( u16 *j16 ) +{ + // doesn't work + u32 jump = ( x86Ptr - (s8*)j16 ) - 2; + + if ( jump > 0x7fff ) { + assert(0); + SysPrintf( "j16 greater than 0x7fff!!\n" ); + } + *j16 = (u16)jump; +} + +void x86SetJ16A( u16 *j16 ) +{ + if( ((uptr)x86Ptr&0xf) > 4 ) { + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + x86SetJ16(j16); +} + +//////////////////////////////////////////////////// +void x86SetJ32( u32* j32 ) +{ + *j32 = ( x86Ptr - (s8*)j32 ) - 4; +} + +void x86SetJ32A( u32* j32 ) +{ + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); +} + +//////////////////////////////////////////////////// +void x86Align( int bytes ) +{ + // fordward align + x86Ptr = (s8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); +} + +/********************/ +/* IX86 intructions */ +/********************/ + +void STC( void ) +{ + write8( 0xF9 ); +} + +void CLC( void ) +{ + write8( 0xF8 ); +} + +//////////////////////////////////// +// mov instructions / +//////////////////////////////////// + +/* mov r64 to r64 */ +void MOV64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x89 ); + ModRM( 3, from, to ); +} + +/* mov r64 to m64 */ +void MOV64RtoM( uptr to, x86IntRegType from ) +{ + if (from == RAX) + { + RexR(1, 0); + write8(0xA3); + write64(to); + } + else + { + MEMADDR_OP(1, VAROP1(0x89), true, from, to, 0); + } +} + +/* mov m64 to r64 */ +void MOV64MtoR( x86IntRegType to, uptr from ) +{ + if (to == RAX) + { + RexR(1, 0); + write8(0xA1); + write64(from); + } + else + { + MEMADDR_OP(1, VAROP1(0x8B), true, to, from, 0); + } +} + +/* mov imm32 to m64 */ +void MOV64I32toM(uptr to, u32 from ) +{ + MEMADDR_OP(1, VAROP1(0xC7), false, 0, to, 4); + write32(from); +} + +// mov imm64 to r64 +void MOV64ItoR( x86IntRegType to, u64 from) +{ + RexB(1, to); + write8( 0xB8 | (to & 0x7) ); + write64( from ); +} + +/* mov imm32 to r64 */ +void MOV64I32toR( x86IntRegType to, s32 from ) +{ + RexB(1, to); + write8( 0xC7 ); + ModRM( 0, 0, to ); + write32( from ); +} + +// mov imm64 to [r64+off] +void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset) +{ + RexB(1,to); + write8( 0xC7 ); + WriteRmOffset(to, offset); + write32(from); +} + +// mov [r64+offset] to r64 +void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(1, to, from); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); +} + +/* mov [r64][r64*scale] to r64 */ +void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { + RexRXB(1, to, from2, from); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + +/* mov r64 to [r64+offset] */ +void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(1,from,to); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); +} + +/* mov r64 to [r64][r64*scale] */ +void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { + RexRXB(1, to, from2, from); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + + +/* mov r32 to r32 */ +void MOV32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write8( 0x89 ); + ModRM( 3, from, to ); +} + +/* mov r32 to m32 */ +void MOV32RtoM( uptr to, x86IntRegType from ) +{ + if (from == EAX) + { + write8(0xA3); + write64(to); + } + else + { + MEMADDR_OP(0, VAROP1(0x89), true, from, to, 0); + } +} + +/* mov m32 to r32 */ +void MOV32MtoR( x86IntRegType to, uptr from ) +{ + if (to == RAX) + { + write8(0xA1); + write64(from); + } + else + { + MEMADDR_OP(0, VAROP1(0x8B), true, to, from, 0); + } +} + +/* mov [r32] to r32 */ +void MOV32RmtoR( x86IntRegType to, x86IntRegType from ) { + RexRB(0, to, from); + write8(0x8B); + WriteRmOffsetFrom(to, from, 0); +} + +void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { + RexRB(0, to, from); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); +} + +/* mov [r32+r32*scale] to r32 */ +void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { + RexRXB(0,to,from2,from); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + +// mov r32 to [r32<<scale+from2] +void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int from2, int scale ) +{ + RexRXB(0,to,from1,0); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + ModRM( scale, from1, 5); + write32(from2); +} + +/* mov r32 to [r32] */ +void MOV32RtoRm( x86IntRegType to, x86IntRegType from ) { + RexRB(0, from, to); + if ((to&7) == ESP) { + write8( 0x89 ); + ModRM( 0, from, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + } else { + write8( 0x89 ); + ModRM( 0, from, to ); + } +} + +/* mov r32 to [r32][r32*scale] */ +void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { + RexRXB(0, to, from2, from); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + +/* mov imm32 to r32 */ +void MOV32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0, to); + write8( 0xB8 | (to & 0x7) ); + write32( from ); +} + +/* mov imm32 to m32 */ +void MOV32ItoM(uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0xC7), false, 0, to, 4); + write32(from); +} + +// mov imm32 to [r32+off] +void MOV32ItoRmOffset( x86IntRegType to, u32 from, int offset) +{ + RexB(0,to); + write8( 0xC7 ); + WriteRmOffset(to, offset); + write32(from); +} + +// mov r32 to [r32+off] +void MOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +{ + RexRB(0,from,to); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); +} + +/* mov r16 to m16 */ +void MOV16RtoM(uptr to, x86IntRegType from ) +{ + if (from == EAX) + { + write8(0x66); + write8(0xA3); + write64(to); + } + else + { + MEMADDR_OP(0, VAROP2(0x66, 0x89), true, from, to, 0); + } +} + +/* mov m16 to r16 */ +void MOV16MtoR( x86IntRegType to, uptr from ) +{ + if (to == EAX) + { + write8(0x66); + write8(0xA1); + write64(from); + } + else + { + MEMADDR_OP(0, VAROP2(0x66, 0x8B), true, to, from, 0); + } +} + +void MOV16RmtoR( x86IntRegType to, x86IntRegType from) +{ + write8( 0x66 ); + RexRB(0,to,from); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, 0); +} + +void MOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + write8( 0x66 ); + RexRB(0,to,from); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); +} + +void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) +{ + write8(0x66); + RexRXB(0,to,from1,0); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + ModRM( scale, from1, 5); + write32(from2); +} + +void MOV16RtoRm(x86IntRegType to, x86IntRegType from) +{ + write8( 0x66 ); + RexRB(0,from,to); + write8( 0x89 ); + ModRM( 0, from, to ); +} + +/* mov imm16 to m16 */ +void MOV16ItoM( uptr to, u16 from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0xC7), false, 0, to, 2); + write16( from ); +} + +/* mov r16 to [r32][r32*scale] */ +void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { + write8( 0x66 ); + RexRXB(0,to,from2,from); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); +} + +void MOV16ItoR( x86IntRegType to, u16 from ) +{ + RexB(0, to); + write16( 0xB866 | ((to & 0x7)<<8) ); + write16( from ); +} + +// mov imm16 to [r16+off] +void MOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) +{ + write8(0x66); + RexB(0,to); + write8( 0xC7 ); + WriteRmOffset(to, offset); + write16(from); +} + +// mov r16 to [r16+off] +void MOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +{ + write8(0x66); + RexRB(0,from,to); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); +} + +/* mov r8 to m8 */ +void MOV8RtoM( uptr to, x86IntRegType from ) +{ + if (from == EAX) + { + write8(0xA2); + write64(to); + } + else + { + MEMADDR_OP(0, VAROP1(0x88), true, from, to, 0); + } +} + +/* mov m8 to r8 */ +void MOV8MtoR( x86IntRegType to, uptr from ) +{ + if (to == EAX) + { + write8(0xA0); + write64(from); + } + else + { + MEMADDR_OP(0, VAROP1(0x8A), true, to, from, 0); + } +} + +/* mov [r32] to r8 */ +void MOV8RmtoR(x86IntRegType to, x86IntRegType from) +{ + RexRB(0,to,from); + write8( 0x8A ); + WriteRmOffsetFrom(to, from, 0); +} + +void MOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset) +{ + RexRB(0,to,from); + write8( 0x8A ); + WriteRmOffsetFrom(to, from, offset); +} + +void MOV8RtoRm(x86IntRegType to, x86IntRegType from) +{ + RexRB(0,from,to); + write8( 0x88 ); + WriteRmOffsetFrom(from, to, 0); +} + +/* mov imm8 to m8 */ +void MOV8ItoM( uptr to, u8 from ) +{ + MEMADDR_OP(0, VAROP1(0xC6), false, 0, to, 1); + write8( from ); +} + +// mov imm8 to r8 +void MOV8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0, to); + write8( 0xB0 | (to & 0x7) ); + write8( from ); +} + +// mov imm8 to [r8+off] +void MOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) +{ + assert( to != ESP ); + RexB(0,to); + write8( 0xC6 ); + WriteRmOffset(to,offset); + write8(from); +} + +// mov r8 to [r8+off] +void MOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +{ + assert( to != ESP ); + RexRB(0,from,to); + write8( 0x88 ); + WriteRmOffsetFrom(from,to,offset); +} + +/* movsx r8 to r32 */ +void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xBE0F ); + ModRM( 3, to, from ); +} + +void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xBE0F ); + ModRM( 0, to, from ); +} + +void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0,to,from); + write16( 0xBE0F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movsx m8 to r32 */ +void MOVSX32M8toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xBE), true, to, from, 0); +} + +/* movsx r16 to r32 */ +void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xBF0F ); + ModRM( 3, to, from ); +} + +void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xBF0F ); + ModRM( 0, to, from ); +} + +void MOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0,to,from); + write16( 0xBF0F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movsx m16 to r32 */ +void MOVSX32M16toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xBF), true, to, from, 0); +} + +/* movzx r8 to r32 */ +void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xB60F ); + ModRM( 3, to, from ); +} + +void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xB60F ); + ModRM( 0, to, from ); +} + +void MOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0,to,from); + write16( 0xB60F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movzx m8 to r32 */ +void MOVZX32M8toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xB6), true, to, from, 0); +} + +/* movzx r16 to r32 */ +void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xB70F ); + ModRM( 3, to, from ); +} + +void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xB70F ); + ModRM( 0, to, from ); +} + +void MOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(0,to,from); + write16( 0xB70F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movzx m16 to r32 */ +void MOVZX32M16toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xB7), true, to, from, 0); +} + +#ifdef __x86_64__ + +/* movzx r8 to r64 */ +void MOVZX64R8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1,to,from); + write16( 0xB60F ); + ModRM( 3, to, from ); +} + +void MOVZX64Rm8toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1,to,from); + write16( 0xB60F ); + ModRM( 0, to, from ); +} + +void MOVZX64Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(1,to,from); + write16( 0xB60F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movzx m8 to r64 */ +void MOVZX64M8toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP2(0x0F, 0xB6), true, to, from, 0); +} + +/* movzx r16 to r64 */ +void MOVZX64R16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1,to,from); + write16( 0xB70F ); + ModRM( 3, to, from ); +} + +void MOVZX64Rm16toR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1,to,from); + write16( 0xB70F ); + ModRM( 0, to, from ); +} + +void MOVZX64Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +{ + RexRB(1,to,from); + write16( 0xB70F ); + WriteRmOffsetFrom(to,from,offset); +} + +/* movzx m16 to r64 */ +void MOVZX64M16toR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP2(0x0F, 0xB7), true, to, from, 0); +} +#endif + +/* cmovbe r32 to r32 */ +void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x46, to, from ); +} + +/* cmovbe m32 to r32*/ +void CMOVBE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x46, to, from ); +} + +/* cmovb r32 to r32 */ +void CMOVB32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x42, to, from ); +} + +/* cmovb m32 to r32*/ +void CMOVB32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x42, to, from ); +} + +/* cmovae r32 to r32 */ +void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x43, to, from ); +} + +/* cmovae m32 to r32*/ +void CMOVAE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x43, to, from ); +} + +/* cmova r32 to r32 */ +void CMOVA32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x47, to, from ); +} + +/* cmova m32 to r32*/ +void CMOVA32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x47, to, from ); +} + +/* cmovo r32 to r32 */ +void CMOVO32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x40, to, from ); +} + +/* cmovo m32 to r32 */ +void CMOVO32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x40, to, from ); +} + +/* cmovp r32 to r32 */ +void CMOVP32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4A, to, from ); +} + +/* cmovp m32 to r32 */ +void CMOVP32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4A, to, from ); +} + +/* cmovs r32 to r32 */ +void CMOVS32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x48, to, from ); +} + +/* cmovs m32 to r32 */ +void CMOVS32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x48, to, from ); +} + +/* cmovno r32 to r32 */ +void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x41, to, from ); +} + +/* cmovno m32 to r32 */ +void CMOVNO32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x41, to, from ); +} + +/* cmovnp r32 to r32 */ +void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4B, to, from ); +} + +/* cmovnp m32 to r32 */ +void CMOVNP32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4B, to, from ); +} + +/* cmovns r32 to r32 */ +void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x49, to, from ); +} + +/* cmovns m32 to r32 */ +void CMOVNS32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x49, to, from ); +} + +/* cmovne r32 to r32 */ +void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x45, to, from ); +} + +/* cmovne m32 to r32*/ +void CMOVNE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x45, to, from ); +} + +/* cmove r32 to r32*/ +void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x44, to, from ); +} + +/* cmove m32 to r32*/ +void CMOVE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x44, to, from ); +} + +/* cmovg r32 to r32*/ +void CMOVG32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4F, to, from ); +} + +/* cmovg m32 to r32*/ +void CMOVG32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4F, to, from ); +} + +/* cmovge r32 to r32*/ +void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4D, to, from ); +} + +/* cmovge m32 to r32*/ +void CMOVGE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4D, to, from ); +} + +/* cmovl r32 to r32*/ +void CMOVL32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4C, to, from ); +} + +/* cmovl m32 to r32*/ +void CMOVL32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4C, to, from ); +} + +/* cmovle r32 to r32*/ +void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) +{ + CMOV32RtoR( 0x4E, to, from ); +} + +/* cmovle m32 to r32*/ +void CMOVLE32MtoR( x86IntRegType to, uptr from ) +{ + CMOV32MtoR( 0x4E, to, from ); +} + +//////////////////////////////////// +// arithmetic instructions / +//////////////////////////////////// + +/* add imm32 to r64 */ +void ADD64ItoR( x86IntRegType to, u32 from ) +{ + RexB(1, to); + if (from <= 0x7f) + { + write8(0x83); + ModRM( 3, 0, to ); + write8(from); + } + else + { + if (to == RAX) { + write8( 0x05 ); + } else { + write8( 0x81 ); + ModRM( 3, 0, to ); + } + write32( from ); + } +} + +/* add m64 to r64 */ +void ADD64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x03), true, to, from, 0); +} + +/* add r64 to r64 */ +void ADD64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x01 ); + ModRM( 3, from, to ); +} + +/* add imm32 to r32 */ +void ADD32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0, to); + if ( to == EAX) { + write8( 0x05 ); + } + else { + write8( 0x81 ); + ModRM( 3, 0, to ); + } + write32( from ); +} + +/* add imm32 to m32 */ +void ADD32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 0, to, 4); + write32(from); +} + +// add imm32 to [r32+off] +void ADD32ItoRmOffset( x86IntRegType to, u32 from, int offset) +{ + RexB(0,to); + write8( 0x81 ); + WriteRmOffset(to,offset); + write32(from); +} + +/* add r32 to r32 */ +void ADD32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write8( 0x01 ); + ModRM( 3, from, to ); +} + +/* add r32 to m32 */ +void ADD32RtoM(uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x01), true, from, to, 0); +} + +/* add m32 to r32 */ +void ADD32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x03), true, to, from, 0); +} + +// add r16 to r16 +void ADD16RtoR( x86IntRegType to , x86IntRegType from ) +{ + write8(0x66); + RexRB(0,to,from); + write8( 0x03 ); + ModRM( 3, to, from ); +} + +/* add imm16 to r16 */ +void ADD16ItoR( x86IntRegType to, u16 from ) +{ + write8( 0x66 ); + RexB(0,to); + if ( to == EAX) + { + write8( 0x05 ); + } + else + { + write8( 0x81 ); + ModRM( 3, 0, to ); + } + write16( from ); +} + +/* add imm16 to m16 */ +void ADD16ItoM( uptr to, u16 from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 0, to, 2); + write16( from ); +} + +/* add r16 to m16 */ +void ADD16RtoM(uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x01), true, from, to, 0); +} + +/* add m16 to r16 */ +void ADD16MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x03), true, to, from, 0); +} + +// add m8 to r8 +void ADD8MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x02), true, to, from, 0); +} + +/* adc imm32 to r32 */ +void ADC32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x15 ); + } + else { + write8( 0x81 ); + ModRM( 3, 2, to ); + } + write32( from ); +} + +/* adc imm32 to m32 */ +void ADC32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 2, to, 4); + write32(from); +} + +/* adc r32 to r32 */ +void ADC32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x11 ); + ModRM( 3, from, to ); +} + +/* adc m32 to r32 */ +void ADC32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x13), true, to, from, 0); +} + +// adc r32 to m32 +void ADC32RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x11), true, from, to, 0); +} + + +#ifdef __x86_64__ +void INC32R( x86IntRegType to ) +{ + write8( 0xFF ); + ModRM(3,0,to); +} +#else +/* inc r32 */ +void INC32R( x86IntRegType to ) +{ + X86_64ASSERT(); + write8( 0x40 + to ); +} +#endif +/* inc m32 */ +void INC32M( uptr to ) +{ + MEMADDR_OP(0, VAROP1(0xFF), false, 0, to, 0); +} + +/* inc r16 */ +void INC16R( x86IntRegType to ) +{ + X86_64ASSERT(); + write8( 0x66 ); + write8( 0x40 + to ); +} + +/* inc m16 */ +void INC16M( uptr to ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0xFF), false, 0, to, 0); +} + + +/* sub imm32 to r64 */ +void SUB64ItoR( x86IntRegType to, u32 from ) +{ + RexB(1, to); + if (from <= 0x7f) + { + write8(0x83); + ModRM( 3, 5, to ); + write8(from); + } + else + { + if ( to == RAX ) { + write8( 0x2D ); + } + else { + write8( 0x81 ); + ModRM( 3, 5, to ); + } + write32( from ); + } +} + +/* sub r64 to r64 */ +void SUB64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x29 ); + ModRM( 3, from, to ); +} + +/* sub m64 to r64 */ +void SUB64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x2B), true, to, from, 0); +} + +/* sub imm32 to r32 */ +void SUB32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x2D ); + } + else { + write8( 0x81 ); + ModRM( 3, 5, to ); + } + write32( from ); +} + +/* sub imm32 to m32 */ +void SUB32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 5, to, 4); + write32(from); +} + +/* sub r32 to r32 */ +void SUB32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, from, to); + write8( 0x29 ); + ModRM( 3, from, to ); +} + +/* sub m32 to r32 */ +void SUB32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x2B), true, to, from, 0); +} + +// sub r32 to m32 +void SUB32RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x29), true, from, to, 0); +} + +// sub r16 to r16 +void SUB16RtoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexRB(0,to,from); + write8( 0x2b ); + ModRM( 3, to, from ); +} + +/* sub imm16 to r16 */ +void SUB16ItoR( x86IntRegType to, u16 from ) { + write8( 0x66 ); + RexB(0,to); + if ( to == EAX ) { + write8( 0x2D ); + } else { + write8( 0x81 ); + ModRM( 3, 5, to ); + } + write16( from ); +} + +/* sub imm16 to m16 */ +void SUB16ItoM( uptr to, u16 from ) { + MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 5, to, 2); + write16( from ); +} + +/* sub m16 to r16 */ +void SUB16MtoR( x86IntRegType to, uptr from ) { + MEMADDR_OP(0, VAROP2(0x66, 0x2B), true, to, from, 0); +} + +/* sbb r64 to r64 */ +void SBB64RtoR( x86IntRegType to, x86IntRegType from ) { + RexRB(1, from,to); + write8( 0x19 ); + ModRM( 3, from, to ); +} + +/* sbb imm32 to r32 */ +void SBB32ItoR( x86IntRegType to, u32 from ) { + RexB(0,to); + if ( to == EAX ) { + write8( 0x1D ); + } else { + write8( 0x81 ); + ModRM( 3, 3, to ); + } + write32( from ); +} + +/* sbb imm32 to m32 */ +void SBB32ItoM( uptr to, u32 from ) { + MEMADDR_OP(0, VAROP1(0x81), false, 3, to, 4); + write32( from ); +} + +/* sbb r32 to r32 */ +void SBB32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x19 ); + ModRM( 3, from, to ); +} + +/* sbb m32 to r32 */ +void SBB32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x1B), true, to, from, 0); +} + +/* sbb r32 to m32 */ +void SBB32RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x19), true, from, to, 0); +} + +#ifdef __x86_64__ +void DEC32R( x86IntRegType to ) +{ + write8( 0xFF ); + ModRM(3,1,to); +} +#else +/* dec r32 */ +void DEC32R( x86IntRegType to ) +{ + X86_64ASSERT(); + write8( 0x48 + to ); +} +#endif + +/* dec m32 */ +void DEC32M( uptr to ) +{ + MEMADDR_OP(0, VAROP1(0xFF), false, 1, to, 0); +} + +/* dec r16 */ +void DEC16R( x86IntRegType to ) +{ + X86_64ASSERT(); + write8( 0x66 ); + write8( 0x48 + to ); +} + +/* dec m16 */ +void DEC16M( uptr to ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0xFF), false, 1, to, 0); +} + +/* mul eax by r32 to edx:eax */ +void MUL32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 4, from ); +} + +/* imul eax by r32 to edx:eax */ +void IMUL32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 5, from ); +} + +/* mul eax by m32 to edx:eax */ +void MUL32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 4, from, 0); +} + +/* imul eax by m32 to edx:eax */ +void IMUL32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 5, from, 0); +} + +/* imul r32 by r32 to r32 */ +void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write16( 0xAF0F ); + ModRM( 3, to, from ); +} + +/* div eax by r32 to edx:eax */ +void DIV32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 6, from ); +} + +/* idiv eax by r32 to edx:eax */ +void IDIV32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 7, from ); +} + +/* div eax by m32 to edx:eax */ +void DIV32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 6, from, 0); +} + +/* idiv eax by m32 to edx:eax */ +void IDIV32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 7, from, 0); +} + +//////////////////////////////////// +// shifting instructions / +//////////////////////////////////// + +/* shl imm8 to r64 */ +void SHL64ItoR( x86IntRegType to, u8 from ) +{ + RexB(1, to); + if ( from == 1 ) + { + write8( 0xD1 ); + ModRM( 3, 4, to ); + return; + } + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); +} + +/* shl cl to r64 */ +void SHL64CLtoR( x86IntRegType to ) +{ + RexB(1, to); + write8( 0xD3 ); + ModRM( 3, 4, to ); +} + +/* shr imm8 to r64 */ +void SHR64ItoR( x86IntRegType to, u8 from ) +{ + RexB(1,to); + if ( from == 1 ) { + write8( 0xD1 ); + ModRM( 3, 5, to ); + return; + } + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); +} + +/* shr cl to r64 */ +void SHR64CLtoR( x86IntRegType to ) +{ + RexB(1, to); + write8( 0xD3 ); + ModRM( 3, 5, to ); +} + +/* shl imm8 to r32 */ +void SHL32ItoR( x86IntRegType to, u8 from ) +{ + RexB(0, to); + if ( from == 1 ) + { + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); + return; + } + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); +} + +/* shl imm8 to m32 */ +void SHL32ItoM( uptr to, u8 from ) +{ + if ( from == 1 ) + { + MEMADDR_OP(0, VAROP1(0xD1), false, 4, to, 0); + } + else + { + MEMADDR_OP(0, VAROP1(0xC1), false, 4, to, 1); + write8( from ); + } +} + +/* shl cl to r32 */ +void SHL32CLtoR( x86IntRegType to ) +{ + RexB(0,to); + write8( 0xD3 ); + ModRM( 3, 4, to ); +} + +// shl imm8 to r16 +void SHL16ItoR( x86IntRegType to, u8 from ) +{ + write8(0x66); + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); + return; + } + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); +} + +// shl imm8 to r8 +void SHL8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD0 ); + write8( 0xE0 | (to & 0x7) ); + return; + } + write8( 0xC0 ); + ModRM( 3, 4, to ); + write8( from ); +} + +/* shr imm8 to r32 */ +void SHR32ItoR( x86IntRegType to, u8 from ) { + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD1 ); + write8( 0xE8 | (to & 0x7) ); + } + else + { + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); + } +} + +/* shr imm8 to m32 */ +void SHR32ItoM( uptr to, u8 from ) +{ + if ( from == 1 ) + { + MEMADDR_OP(0, VAROP1(0xD1), false, 5, to, 0); + } + else + { + MEMADDR_OP(0, VAROP1(0xC1), false, 5, to, 1); + write8( from ); + } +} + +/* shr cl to r32 */ +void SHR32CLtoR( x86IntRegType to ) +{ + RexB(0,to); + write8( 0xD3 ); + ModRM( 3, 5, to ); +} + +// shr imm8 to r8 +void SHR8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD0 ); + write8( 0xE8 | (to & 0x7) ); + } + else + { + write8( 0xC0 ); + ModRM( 3, 5, to ); + write8( from ); + } +} + +/* sar imm8 to r64 */ +void SAR64ItoR( x86IntRegType to, u8 from ) +{ + RexB(1,to); + if ( from == 1 ) + { + write8( 0xD1 ); + ModRM( 3, 7, to ); + return; + } + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); +} + +/* sar cl to r64 */ +void SAR64CLtoR( x86IntRegType to ) +{ + RexB(1, to); + write8( 0xD3 ); + ModRM( 3, 7, to ); +} + +/* sar imm8 to r32 */ +void SAR32ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD1 ); + ModRM( 3, 7, to ); + return; + } + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); +} + +/* sar imm8 to m32 */ +void SAR32ItoM( uptr to, u8 from ) +{ + if (from == 1) + { + MEMADDR_OP(0, VAROP1(0xD1), false, 7, to, 0); + } + else + { + MEMADDR_OP(0, VAROP1(0xC1), false, 7, to, 1); + write8( from ); + } +} + +/* sar cl to r32 */ +void SAR32CLtoR( x86IntRegType to ) +{ + RexB(0,to); + write8( 0xD3 ); + ModRM( 3, 7, to ); +} + +// sar imm8 to r16 +void SAR16ItoR( x86IntRegType to, u8 from ) +{ + write8(0x66); + RexB(0,to); + if ( from == 1 ) + { + write8( 0xD1 ); + ModRM( 3, 7, to ); + return; + } + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); +} + +void ROR32ItoR( x86IntRegType to,u8 from ) +{ + RexB(0,to); + if ( from == 1 ) { + write8( 0xd1 ); + write8( 0xc8 | to ); + } + else + { + write8( 0xc1 ); + write8( 0xc8 | to ); + write8( from ); + } +} + +void RCR32ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( from == 1 ) { + write8( 0xd1 ); + write8( 0xd8 | to ); + } + else + { + write8( 0xc1 ); + write8( 0xd8 | to ); + write8( from ); + } +} + +// shld imm8 to r32 +void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +{ + RexRB(0,from,to); + write8( 0x0F ); + write8( 0xA4 ); + ModRM( 3, from, to ); + write8( shift ); +} + +// shrd imm8 to r32 +void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +{ + RexRB(0,from,to); + write8( 0x0F ); + write8( 0xAC ); + ModRM( 3, from, to ); + write8( shift ); +} + +//////////////////////////////////// +// logical instructions / +//////////////////////////////////// + +/* or imm32 to r32 */ +void OR64ItoR( x86IntRegType to, u32 from ) +{ + RexB(1, to); + if ( to == EAX ) { + write8( 0x0D ); + } else { + write8( 0x81 ); + ModRM( 3, 1, to ); + } + write32( from ); +} + +/* or m64 to r64 */ +void OR64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x0B), true, to, from, 0); +} + +/* or r64 to r64 */ +void OR64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x09 ); + ModRM( 3, from, to ); +} + +// or r32 to m64 +void OR64RtoM(uptr to, x86IntRegType from ) +{ + MEMADDR_OP(1, VAROP1(0x09), true, from, to, 0); +} + +/* or imm32 to r32 */ +void OR32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x0D ); + } + else { + write8( 0x81 ); + ModRM( 3, 1, to ); + } + write32( from ); +} + +/* or imm32 to m32 */ +void OR32ItoM(uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 1, to, 4); + write32(from); +} + +/* or r32 to r32 */ +void OR32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x09 ); + ModRM( 3, from, to ); +} + +/* or r32 to m32 */ +void OR32RtoM(uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x09), true, from, to, 0); +} + +/* or m32 to r32 */ +void OR32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x0B), true, to, from, 0); +} + +// or r16 to r16 +void OR16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0,from,to); + write8( 0x09 ); + ModRM( 3, from, to ); +} + +// or imm16 to r16 +void OR16ItoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexB(0,to); + if ( to == EAX ) { + write8( 0x0D ); + } + else { + write8( 0x81 ); + ModRM( 3, 1, to ); + } + write16( from ); +} + +// or imm16 to m316 +void OR16ItoM( uptr to, u16 from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 1, to, 2); + write16( from ); +} + +/* or m16 to r16 */ +void OR16MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x0B), true, to, from, 0); +} + +// or r16 to m16 +void OR16RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x09), true, from, to, 0); +} + +// or r8 to r8 +void OR8RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x08 ); + ModRM( 3, from, to ); +} + +// or r8 to m8 +void OR8RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x08), true, from, to, 0); +} + +// or imm8 to m8 +void OR8ItoM( uptr to, u8 from ) +{ + MEMADDR_OP(0, VAROP1(0x80), false, 1, to, 1); + write8( from ); +} + +// or m8 to r8 +void OR8MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x0A), true, to, from, 0); +} + +/* xor imm32 to r64 */ +void XOR64ItoR( x86IntRegType to, u32 from ) +{ + RexB(1,to); + if ( to == EAX ) { + write8( 0x35 ); + } else { + write8( 0x81 ); + ModRM( 3, 6, to ); + } + write32( from ); +} + +/* xor r64 to r64 */ +void XOR64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x31 ); + ModRM( 3, from, to ); +} + +/* xor m64 to r64 */ +void XOR64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x33), true, to, from, 0); +} + +/* xor r64 to m64 */ +void XOR64RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(1, VAROP1(0x31), true, from, to, 0); +} + +/* xor imm32 to r32 */ +void XOR32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x35 ); + } + else { + write8( 0x81 ); + ModRM( 3, 6, to ); + } + write32( from ); +} + +/* xor imm32 to m32 */ +void XOR32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 6, to, 4); + write32( from ); +} + +/* xor r32 to r32 */ +void XOR32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x31 ); + ModRM( 3, from, to ); +} + +/* xor r16 to r16 */ +void XOR16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8( 0x66 ); + RexRB(0,from,to); + write8( 0x31 ); + ModRM( 3, from, to ); +} + +/* xor r32 to m32 */ +void XOR32RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x31), true, from, to, 0); +} + +/* xor m32 to r32 */ +void XOR32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x33), true, to, from, 0); +} + +// xor imm16 to r16 +void XOR16ItoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexB(0,to); + if ( to == EAX ) { + write8( 0x35 ); + } + else { + write8( 0x81 ); + ModRM( 3, 6, to ); + } + write16( from ); +} + +// xor r16 to m16 +void XOR16RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x31), true, from, to, 0); +} + +/* and imm32 to r64 */ +void AND64I32toR( x86IntRegType to, u32 from ) +{ + RexB(1, to); + if ( to == EAX ) { + write8( 0x25 ); + } else { + write8( 0x81 ); + ModRM( 3, 0x4, to ); + } + write32( from ); +} + +/* and m64 to r64 */ +void AND64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x23), true, to, from, 0); +} + +/* and r64 to m64 */ +void AND64RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(1, VAROP1(0x21), true, from, to, 0); +} + +/* and r64 to r64 */ +void AND64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x21 ); + ModRM( 3, from, to ); +} + +/* and imm32 to m64 */ +void AND64I32toM( uptr to, u32 from ) +{ + MEMADDR_OP(1, VAROP1(0x81), false, 4, to, 4); + write32( from ); +} + +/* and imm32 to r32 */ +void AND32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x25 ); + } else { + write8( 0x81 ); + ModRM( 3, 0x4, to ); + } + write32( from ); +} + +/* and sign ext imm8 to r32 */ +void AND32I8toR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + write8( 0x83 ); + ModRM( 3, 0x4, to ); + write8( from ); +} + +/* and imm32 to m32 */ +void AND32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 4, to, 4); + write32(from); +} + +/* and sign ext imm8 to m32 */ +void AND32I8toM( uptr to, u8 from ) +{ + MEMADDR_OP(0, VAROP1(0x83), false, 4, to, 1); + write8( from ); +} + +/* and r32 to r32 */ +void AND32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x21 ); + ModRM( 3, from, to ); +} + +/* and r32 to m32 */ +void AND32RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x21), true, from, to, 0); +} + +/* and m32 to r32 */ +void AND32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x23), true, to, from, 0); +} + +// and r16 to r16 +void AND16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0,to,from); + write8( 0x23 ); + ModRM( 3, to, from ); +} + +/* and imm16 to r16 */ +void AND16ItoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexB(0,to); + if ( to == EAX ) { + write8( 0x25 ); + } else { + write8( 0x81 ); + ModRM( 3, 0x4, to ); + } + write16( from ); +} + +/* and imm16 to m16 */ +void AND16ItoM( uptr to, u16 from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 4, to, 2); + write16( from ); +} + +/* and r16 to m16 */ +void AND16RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x21), true, from, to, 0); +} + +/* and m16 to r16 */ +void AND16MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x23), true, to, from, 0); +} + +/* and imm8 to r8 */ +void AND8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x24 ); + } else { + write8( 0x80 ); + ModRM( 3, 0x4, to ); + } + write8( from ); +} + +/* and imm8 to m8 */ +void AND8ItoM( uptr to, u8 from ) +{ + MEMADDR_OP(0, VAROP1(0x80), false, 4, to, 1); + write8( from ); +} + +// and r8 to r8 +void AND8RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,to,from); + write8( 0x22 ); + ModRM( 3, to, from ); +} + +/* and r8 to m8 */ +void AND8RtoM( uptr to, x86IntRegType from ) +{ + MEMADDR_OP(0, VAROP1(0x20), true, from, to, 0); +} + +/* and m8 to r8 */ +void AND8MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x22), true, to, from, 0); +} + +/* not r64 */ +void NOT64R( x86IntRegType from ) +{ + RexB(1, from); + write8( 0xF7 ); + ModRM( 3, 2, from ); +} + +/* not r32 */ +void NOT32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 2, from ); +} + +// not m32 +void NOT32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 2, from, 0); +} + +/* neg r64 */ +void NEG64R( x86IntRegType from ) +{ + RexB(1, from); + write8( 0xF7 ); + ModRM( 3, 3, from ); +} + +/* neg r32 */ +void NEG32R( x86IntRegType from ) +{ + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 3, from ); +} + +void NEG32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 3, from, 0); +} + +/* neg r16 */ +void NEG16R( x86IntRegType from ) +{ + write8( 0x66 ); + RexB(0,from); + write8( 0xF7 ); + ModRM( 3, 3, from ); +} + +//////////////////////////////////// +// jump instructions / +//////////////////////////////////// + +u8* JMP( uptr to ) { + uptr jump = ( x86Ptr - (s8*)to ) - 1; + + if ( jump > 0x7f ) { + assert( to <= 0xffffffff ); + return (u8*)JMP32( to ); + } else { + return (u8*)JMP8( to ); + } +} + +/* jmp rel8 */ +u8* JMP8( u8 to ) +{ + write8( 0xEB ); + write8( to ); + return x86Ptr - 1; +} + +/* jmp rel32 */ +u32* JMP32( uptr to ) +{ + assert(SPTR32((sptr)to)); + write8( 0xE9 ); + write32( (sptr)to ); + return (u32*)(x86Ptr - 4 ); +} + +/* jmp r32/r64 */ +void JMPR( x86IntRegType to ) +{ + RexB(0, to); + write8( 0xFF ); + ModRM( 3, 4, to ); +} + +// jmp m32 +void JMP32M( uptr to ) +{ + /* FIXME */ + MEMADDR_OP(0, VAROP1(0xFF), false, 4, to, 0); +} + +/* jp rel8 */ +u8* JP8( u8 to ) { + return J8Rel( 0x7A, to ); +} + +/* jnp rel8 */ +u8* JNP8( u8 to ) { + return J8Rel( 0x7B, to ); +} + +/* je rel8 */ +u8* JE8( u8 to ) { + return J8Rel( 0x74, to ); +} + +/* jz rel8 */ +u8* JZ8( u8 to ) +{ + return J8Rel( 0x74, to ); +} + +/* js rel8 */ +u8* JS8( u8 to ) +{ + return J8Rel( 0x78, to ); +} + +/* jns rel8 */ +u8* JNS8( u8 to ) +{ + return J8Rel( 0x79, to ); +} + +/* jg rel8 */ +u8* JG8( u8 to ) +{ + return J8Rel( 0x7F, to ); +} + +/* jge rel8 */ +u8* JGE8( u8 to ) +{ + return J8Rel( 0x7D, to ); +} + +/* jl rel8 */ +u8* JL8( u8 to ) +{ + return J8Rel( 0x7C, to ); +} + +/* ja rel8 */ +u8* JA8( u8 to ) +{ + return J8Rel( 0x77, to ); +} + +u8* JAE8( u8 to ) +{ + return J8Rel( 0x73, to ); +} + +/* jb rel8 */ +u8* JB8( u8 to ) +{ + return J8Rel( 0x72, to ); +} + +/* jbe rel8 */ +u8* JBE8( u8 to ) +{ + return J8Rel( 0x76, to ); +} + +/* jle rel8 */ +u8* JLE8( u8 to ) +{ + return J8Rel( 0x7E, to ); +} + +/* jne rel8 */ +u8* JNE8( u8 to ) +{ + return J8Rel( 0x75, to ); +} + +/* jnz rel8 */ +u8* JNZ8( u8 to ) +{ + return J8Rel( 0x75, to ); +} + +/* jng rel8 */ +u8* JNG8( u8 to ) +{ + return J8Rel( 0x7E, to ); +} + +/* jnge rel8 */ +u8* JNGE8( u8 to ) +{ + return J8Rel( 0x7C, to ); +} + +/* jnl rel8 */ +u8* JNL8( u8 to ) +{ + return J8Rel( 0x7D, to ); +} + +/* jnle rel8 */ +u8* JNLE8( u8 to ) +{ + return J8Rel( 0x7F, to ); +} + +/* jo rel8 */ +u8* JO8( u8 to ) +{ + return J8Rel( 0x70, to ); +} + +/* jno rel8 */ +u8* JNO8( u8 to ) +{ + return J8Rel( 0x71, to ); +} + +// jb rel8 +u16* JB16( u16 to ) +{ + return J16Rel( 0x82, to ); +} + +// jb rel32 +u32* JB32( u32 to ) +{ + return J32Rel( 0x82, to ); +} + +/* je rel32 */ +u32* JE32( u32 to ) +{ + return J32Rel( 0x84, to ); +} + +/* jz rel32 */ +u32* JZ32( u32 to ) +{ + return J32Rel( 0x84, to ); +} + +/* jg rel32 */ +u32* JG32( u32 to ) +{ + return J32Rel( 0x8F, to ); +} + +/* jge rel32 */ +u32* JGE32( u32 to ) +{ + return J32Rel( 0x8D, to ); +} + +/* jl rel32 */ +u32* JL32( u32 to ) +{ + return J32Rel( 0x8C, to ); +} + +/* jle rel32 */ +u32* JLE32( u32 to ) +{ + return J32Rel( 0x8E, to ); +} + +/* jae rel32 */ +u32* JAE32( u32 to ) +{ + return J32Rel( 0x83, to ); +} + +/* jne rel32 */ +u32* JNE32( u32 to ) +{ + return J32Rel( 0x85, to ); +} + +/* jnz rel32 */ +u32* JNZ32( u32 to ) +{ + return J32Rel( 0x85, to ); +} + +/* jng rel32 */ +u32* JNG32( u32 to ) +{ + return J32Rel( 0x8E, to ); +} + +/* jnge rel32 */ +u32* JNGE32( u32 to ) +{ + return J32Rel( 0x8C, to ); +} + +/* jnl rel32 */ +u32* JNL32( u32 to ) +{ + return J32Rel( 0x8D, to ); +} + +/* jnle rel32 */ +u32* JNLE32( u32 to ) +{ + return J32Rel( 0x8F, to ); +} + +/* jo rel32 */ +u32* JO32( u32 to ) +{ + return J32Rel( 0x80, to ); +} + +/* jno rel32 */ +u32* JNO32( u32 to ) +{ + return J32Rel( 0x81, to ); +} + +// js rel32 +u32* JS32( u32 to ) +{ + return J32Rel( 0x88, to ); +} + + +/* call func */ +void CALLFunc( uptr func ) +{ + sptr p = MEMADDR_(func, 5); + if (SPTR32(p)) + { + CALL32(p); + } + else + { + MOV64ItoR(X86_TEMP, func); + CALL64R(X86_TEMP); + } +} + +/* call rel32 */ +void CALL32( s32 to ) +{ + write8( 0xE8 ); + write32( to ); +} + +/* call r32 */ +void CALL32R( x86IntRegType to ) +{ + RexB(0, to); + write8( 0xFF ); + ModRM( 3, 2, to ); +} + +/* call r64 */ +void CALL64R( x86IntRegType to ) +{ + RexB(0, to); + write8( 0xFF ); + ModRM( 3, 2, to ); +} + +//////////////////////////////////// +// misc instructions / +//////////////////////////////////// + +/* cmp imm32 to r64 */ +void CMP64I32toR( x86IntRegType to, u32 from ) +{ + RexB(1, to); + if ( to == EAX ) { + write8( 0x3D ); + } + else { + write8( 0x81 ); + ModRM( 3, 7, to ); + } + write32( from ); +} + +/* cmp m64 to r64 */ +void CMP64MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(1, VAROP1(0x3B), true, 2, from, 0); +} + +// cmp r64 to r64 +void CMP64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1,from,to); + write8( 0x39 ); + ModRM( 3, from, to ); +} + +/* cmp imm32 to r32 */ +void CMP32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) { + write8( 0x3D ); + } + else { + write8( 0x81 ); + ModRM( 3, 7, to ); + } + write32( from ); +} + +/* cmp imm32 to m32 */ +void CMP32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0x81), false, 7, to, 4); + write32(from); +} + +/* cmp r32 to r32 */ +void CMP32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x39 ); + ModRM( 3, from, to ); +} + +/* cmp m32 to r32 */ +void CMP32MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x3B), true, to, from, 0); +} + +// cmp imm8 to [r32] +void CMP32I8toRm( x86IntRegType to, u8 from) +{ + RexB(0,to); + write8( 0x83 ); + ModRM( 0, 7, to ); + write8(from); +} + +// cmp imm32 to [r32+off] +void CMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off) +{ + RexB(0,to); + write8( 0x83 ); + ModRM( 1, 7, to ); + write8(off); + write8(from); +} + +// cmp imm8 to [r32] +void CMP32I8toM( uptr to, u8 from) +{ + MEMADDR_OP(0, VAROP1(0x83), false, 7, to, 1); + write8( from ); +} + +/* cmp imm16 to r16 */ +void CMP16ItoR( x86IntRegType to, u16 from ) +{ + write8( 0x66 ); + RexB(0,to); + if ( to == EAX ) + { + write8( 0x3D ); + } + else + { + write8( 0x81 ); + ModRM( 3, 7, to ); + } + write16( from ); +} + +/* cmp imm16 to m16 */ +void CMP16ItoM( uptr to, u16 from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 7, to, 2); + write16( from ); +} + +/* cmp r16 to r16 */ +void CMP16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8( 0x66 ); + RexRB(0,from,to); + write8( 0x39 ); + ModRM( 3, from, to ); +} + +/* cmp m16 to r16 */ +void CMP16MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x66, 0x3B), true, to, from, 0); +} + +// cmp imm8 to r8 +void CMP8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( to == EAX ) + { + write8( 0x3C ); + } + else + { + write8( 0x80 ); + ModRM( 3, 7, to ); + } + write8( from ); +} + +// cmp m8 to r8 +void CMP8MtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP1(0x3A), true, to, from, 0); +} + +/* test r64 to r64 */ +void TEST64RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(1, from, to); + write8( 0x85 ); + ModRM( 3, from, to ); +} + +/* test imm32 to r32 */ +void TEST32ItoR( x86IntRegType to, u32 from ) +{ + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA9 ); + } + else + { + write8( 0xF7 ); + ModRM( 3, 0, to ); + } + write32( from ); +} + +void TEST32ItoM( uptr to, u32 from ) +{ + MEMADDR_OP(0, VAROP1(0xF7), false, 0, to, 4); + write32( from ); +} + +/* test r32 to r32 */ +void TEST32RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0,from,to); + write8( 0x85 ); + ModRM( 3, from, to ); +} + +// test imm32 to [r32] +void TEST32ItoRm( x86IntRegType to, u32 from ) +{ + RexB(0,to); + write8( 0xF7 ); + ModRM( 0, 0, to ); + write32(from); +} + +// test imm16 to r16 +void TEST16ItoR( x86IntRegType to, u16 from ) +{ + write8(0x66); + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA9 ); + } + else + { + write8( 0xF7 ); + ModRM( 3, 0, to ); + } + write16( from ); +} + +// test r16 to r16 +void TEST16RtoR( x86IntRegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0,from,to); + write16( 0x85 ); + ModRM( 3, from, to ); +} + +// test imm8 to r8 +void TEST8ItoR( x86IntRegType to, u8 from ) +{ + RexB(0,to); + if ( to == EAX ) + { + write8( 0xA8 ); + } + else + { + write8( 0xF6 ); + ModRM( 3, 0, to ); + } + write8( from ); +} + +// test imm8 to r8 +void TEST8ItoM( uptr to, u8 from ) +{ + MEMADDR_OP(0, VAROP1(0xF6), false, 0, to, 1); + write8( from ); +} + +/* sets r8 */ +void SETS8R( x86IntRegType to ) +{ + SET8R( 0x98, to ); +} + +/* setl r8 */ +void SETL8R( x86IntRegType to ) +{ + SET8R( 0x9C, to ); +} + +// setge r8 +void SETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } +// setg r8 +void SETG8R( x86IntRegType to ) { SET8R(0x9f, to); } +// seta r8 +void SETA8R( x86IntRegType to ) { SET8R(0x97, to); } +// setae r8 +void SETAE8R( x86IntRegType to ) { SET8R(0x99, to); } +/* setb r8 */ +void SETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } +/* setb r8 */ +void SETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } +// setz r8 +void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } +// sete r8 +void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } + +/* push imm32 */ +void PUSH32I( u32 from ) +{ + //X86_64ASSERT(); //becomes sign extended in x86_64 + write8( 0x68 ); + write32( from ); +} + +#ifdef __x86_64__ + +/* push r64 */ +void PUSH64R( x86IntRegType from ) +{ + RexB(0,from); + //write8( 0x51 | from ); + write8( 0x50 | from ); +} + +/* push m64 */ +void PUSH64M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xFF), false, 6, from, 0); +} + +/* pop r64 */ +void POP64R( x86IntRegType from ) { + RexB(0,from); + //write8( 0x59 | from ); + write8( 0x58 | from ); +} + +void PUSHR(x86IntRegType from) { PUSH64R(from); } +void POPR(x86IntRegType from) { POP64R(from); } + +#else + +/* push r32 */ +void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } + +/* push m32 */ +void PUSH32M( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xFF), false, 6, from, 0); +} + +/* pop r32 */ +void POP32R( x86IntRegType from ) { write8( 0x58 | from ); } + +/* pushad */ +void PUSHA32( void ) { write8( 0x60 ); } + +/* popad */ +void POPA32( void ) { write8( 0x61 ); } + +void PUSHR(x86IntRegType from) { PUSH32R(from); } +void POPR(x86IntRegType from) { POP32R(from); } + +#endif + + +/* pushfd */ +void PUSHFD( void ) { write8( 0x9C ); } +/* popfd */ +void POPFD( void ) { write8( 0x9D ); } + +void RET( void ) { write8( 0xC3 ); } +void RET2( void ) { write16( 0xc3f3 ); } + +void CBW( void ) { write16( 0x9866 ); } +void CWD( void ) { write8( 0x98 ); } +void CDQ( void ) { write8( 0x99 ); } +void CWDE() { write8(0x98); } + +#ifdef __x86_64__ +void CDQE( void ) { RexR(1,0); write8( 0x98 ); } +#endif + +void LAHF() { write8(0x9f); } +void SAHF() { write8(0x9e); } + +void BT32ItoR( x86IntRegType to, x86IntRegType from ) +{ + write16( 0xBA0F ); + write8( 0xE0 | to ); + write8( from ); +} + +void BSRRtoR(x86IntRegType to, x86IntRegType from) +{ + write16( 0xBD0F ); + ModRM( 3, from, to ); +} + +void BSWAP32R( x86IntRegType to ) +{ + write8( 0x0F ); + write8( 0xC8 + to ); +} + +// to = from + offset +void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) +{ + write8(0x66); + LEA32RtoR(to, from, offset); +} + +void LEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset) +{ + RexRB(0,to,from); + write8(0x8d); + + if( (from&7) == ESP ) { + if( offset == 0 ) { + ModRM(1, to, from); + write8(0x24); + } + else if( offset < 128 ) { + ModRM(1, to, from); + write8(0x24); + write8(offset); + } + else { + ModRM(2, to, from); + write8(0x24); + write32(offset); + } + } + else { + if( offset == 0 && from != EBP && from!=ESP ) { + ModRM(0, to, from); + } + else if( offset < 128 ) { + ModRM(1, to, from); + write8(offset); + } + else { + ModRM(2, to, from); + write32(offset); + } + } +} + +// to = from0 + from1 +void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +{ + write8(0x66); + LEA32RRtoR(to, from0, from1); +} + +void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +{ + RexRXB(0, to, from0, from1); + write8(0x8d); + + if( (from1&7) == EBP ) { + ModRM(1, to, 4); + ModRM(0, from0, from1); + write8(0); + } + else { + ModRM(0, to, 4); + ModRM(0, from0, from1); + } +} + +// to = from << scale (max is 3) +void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +{ + write8(0x66); + LEA32RStoR(to, from, scale); +} + +void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +{ + if( to == from ) { + SHL32ItoR(to, scale); + return; + } + + if( from != ESP ) { + RexRXB(0,to,from,0); + write8(0x8d); + ModRM(0, to, 4); + ModRM(scale, from, 5); + write32(0); + } + else { + assert( to != ESP ); + MOV32RtoR(to, from); + LEA32RStoR(to, to, scale); + } +} + +#endif diff --git a/libpcsxcore/ix86_64/ix86-64.h b/libpcsxcore/ix86_64/ix86-64.h new file mode 100644 index 0000000..009fa5a --- /dev/null +++ b/libpcsxcore/ix86_64/ix86-64.h @@ -0,0 +1,1776 @@ +/* + * ix86 definitions v0.6.2 + * Authors: linuzappz <linuzappz@pcsx.net> + * alexey silinov + * goldfinger + * shadow < shadow@pcsx2.net > + */ + +#ifndef __IX86_H__ +#define __IX86_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "../psxcommon.h" // Basic types header +#include <assert.h> +#include <stdbool.h> + +#if defined(__MINGW32__) +#define PCSX2_ALIGNED16(x) __declspec(align(16)) x +#else +#define PCSX2_ALIGNED16(x) x __attribute((aligned(16))) +#endif + + +#ifdef __x86_64__ +#define XMMREGS 16 +#define X86REGS 16 +#else +#define XMMREGS 8 +#define X86REGS 8 +#endif + +#define MMXREGS 8 + +#define SIB 4 +#define DISP32 5 + +// general types +typedef int x86IntRegType; +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#ifdef __x86_64__ +#define RAX 0 +#define RBX 3 +#define RCX 1 +#define RDX 2 +#define RSI 6 +#define RDI 7 +#define RBP 5 +#define RSP 4 +#define R8 8 +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 + +#define X86_TEMP RAX // don't allocate anything + +#ifdef _MSC_VER +extern x86IntRegType g_x86savedregs[8]; +extern x86IntRegType g_x86tempregs[6]; +#else +extern x86IntRegType g_x86savedregs[6]; +extern x86IntRegType g_x86tempregs[8]; +#endif + +extern x86IntRegType g_x86allregs[14]; // all registers that can be used by the recs +extern x86IntRegType g_x868bitregs[11]; +extern x86IntRegType g_x86non8bitregs[3]; + +#ifdef _MSC_VER +#define X86ARG1 RCX +#define X86ARG2 RDX +#define X86ARG3 R8 +#define X86ARG4 R9 +#else +#define X86ARG1 RDI +#define X86ARG2 RSI +#define X86ARG3 RDX +#define X86ARG4 RCX +#endif + +#else + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#endif // __x86_64__ + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; + +typedef enum +{ + XMMT_INT = 0, // integer (sse2 only) + XMMT_FPS = 1, // floating point + //XMMT_FPD = 3, // double +} XMMSSEType; + +extern XMMSSEType g_xmmtypes[XMMREGS]; + +void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs + +//cpu capabilities structure +typedef struct { + u32 hasFloatingPointUnit; + u32 hasVirtual8086ModeEnhancements; + u32 hasDebuggingExtensions; + u32 hasPageSizeExtensions; + u32 hasTimeStampCounter; + u32 hasModelSpecificRegisters; + u32 hasPhysicalAddressExtension; + u32 hasCOMPXCHG8BInstruction; + u32 hasAdvancedProgrammableInterruptController; + u32 hasSEPFastSystemCall; + u32 hasMemoryTypeRangeRegisters; + u32 hasPTEGlobalFlag; + u32 hasMachineCheckArchitecture; + u32 hasConditionalMoveAndCompareInstructions; + u32 hasFGPageAttributeTable; + u32 has36bitPageSizeExtension; + u32 hasProcessorSerialNumber; + u32 hasCFLUSHInstruction; + u32 hasDebugStore; + u32 hasACPIThermalMonitorAndClockControl; + u32 hasMultimediaExtensions; + u32 hasFastStreamingSIMDExtensionsSaveRestore; + u32 hasStreamingSIMDExtensions; + u32 hasStreamingSIMD2Extensions; + u32 hasSelfSnoop; + u32 hasHyperThreading; + u32 hasThermalMonitor; + u32 hasIntel64BitArchitecture; + u32 hasStreamingSIMD3Extensions; + //that is only for AMDs + u32 hasMultimediaExtensionsExt; + u32 hasAMD64BitArchitecture; + u32 has3DNOWInstructionExtensionsExt; + u32 has3DNOWInstructionExtensions; +} CAPABILITIES; + +extern CAPABILITIES cpucaps; + +typedef struct { + + u32 x86Family; // Processor Family + u32 x86Model; // Processor Model + u32 x86PType; // Processor Type + u32 x86StepID; // Stepping ID + u32 x86Flags; // Feature Flags + u32 x86EFlags; // Extended Feature Flags + //all the above returns hex values + s8 x86ID[16]; // Vendor ID //the vendor creator (in %s) + s8 x86Type[20]; //cpu type in char format //the cpu type (in %s) + s8 x86Fam[50]; // family in char format //the original cpu name string (in %s) + u32 cpuspeed; // speed of cpu //this will give cpu speed (in %d) +} CPUINFO; + +extern CPUINFO cpuinfo; + +extern s8 *x86Ptr; +extern u8 *j8Ptr[32]; +extern u32 *j32Ptr[32]; + + +#ifdef __x86_64__ +#define X86_64ASSERT() assert(0) +#define MEMADDR_(addr, oplen) (sptr)((uptr)(addr) - ((uptr)x86Ptr + ((u64)(oplen)))) +#define SPTR32(addr) ((addr) < 0x80000000L && (addr) >= -0x80000000L) +#define UPTR32(addr) ((addr) < 0x100000000L) +#define MEMADDR(addr, oplen) ({ sptr _a = MEMADDR_(addr, oplen); assert(SPTR32(_a)); _a; }) +#else +#define X86_64ASSERT() +#define SPTR32(a) 1 +#define UPTR32(a) 1 +#define MEMADDR(addr, oplen) (addr) +#endif + +#ifdef __x86_64__ +#define Rex( w, r, x, b ) write8( 0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b) ) +#else +#define Rex(w,r,x,b) assert(0) +#endif +#define RexRXB(w, reg, index, base) if(w || (reg) >= 8 || (index) >= 8 || (base) >= 8 ) \ + Rex(w, (reg)>=8, (index)>=8, (base)>=8) +#define RexR(w, reg) RexRXB(w, reg, 0, 0) +#define RexB(w, base) RexRXB(w, 0, 0, base) +#define RexRB(w, reg, base) RexRXB(w, reg, 0, base) + +void x86SetPtr( char *ptr ); +void x86Shutdown( void ); + +void x86SetJ8( u8 *j8 ); +void x86SetJ8A( u8 *j8 ); +void x86SetJ16( u16 *j16 ); +void x86SetJ16A( u16 *j16 ); +void x86SetJ32( u32 *j32 ); +void x86SetJ32A( u32 *j32 ); + +void x86Align( int bytes ); +u64 GetCPUTick( void ); + +// General Helper functions +#define ModRM(mod, rm, reg) write8( ( mod << 6 ) | ( (rm & 7) << 3 ) | ( reg & 7 ) ) +#define SibSB(ss, rm, index) write8( ( ss << 6 ) | ( rm << 3 ) | ( index ) ) +void SET8R( int cc, int to ); +u8* J8Rel( int cc, int to ); +u32* J32Rel( int cc, u32 to ); +void CMOV32RtoR( int cc, int to, int from ); +void CMOV32MtoR( int cc, int to, uptr from ); + +void MEMADDR_OP(bool w, unsigned opl, u64 op, bool isreg, int reg, uptr p, sptr off); + +#define VAROP1(op) 1, op +#define VAROP2(op1, op2) 2, (op1) | ((op2) << 8) + +//****************** +// IX86 intructions +//****************** + +// +// * scale values: +// * 0 - *1 +// * 1 - *2 +// * 2 - *4 +// * 3 - *8 +// + +void STC( void ); +void CLC( void ); + +//////////////////////////////////// +// mov instructions // +//////////////////////////////////// + +// mov r64 to r64 +void MOV64RtoR( x86IntRegType to, x86IntRegType from ); +// mov r64 to m64 +void MOV64RtoM( uptr to, x86IntRegType from ); +// mov m64 to r64 +void MOV64MtoR( x86IntRegType to, uptr from ); +// mov sign ext imm32 to m64 +void MOV64I32toM( uptr to, u32 from ); +// mov sign ext imm32 to r64 +void MOV64I32toR( x86IntRegType to, s32 from); +// mov imm64 to r64 +void MOV64ItoR( x86IntRegType to, u64 from); +// mov imm64 to [r64+off] +void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset); +// mov [r64+offset] to r64 +void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ); +// mov [r64][r64*scale] to r64 +void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); +// mov r64 to [r64+offset] +void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ); +// mov r64 to [r64][r64*scale] +void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); + +// mov r32 to r32 +void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +// mov r32 to m32 +void MOV32RtoM( uptr to, x86IntRegType from ); +// mov m32 to r32 +void MOV32MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r32 +void MOV32RmtoR( x86IntRegType to, x86IntRegType from ); +void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ); +// mov [r32][r32<<scale] to r32 +void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ); +// mov [imm32(from2) + r32(from1)<<scale] to r32 +void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int from2, int scale ); +// mov r32 to [r32] +void MOV32RtoRm( x86IntRegType to, x86IntRegType from ); +// mov r32 to [r32][r32*scale] +void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ); +// mov imm32 to r32 +void MOV32ItoR( x86IntRegType to, u32 from ); +// mov imm32 to m32 +void MOV32ItoM( uptr to, u32 from ); +// mov imm32 to [r32+off] +void MOV32ItoRmOffset( x86IntRegType to, u32 from, int offset); +// mov r32 to [r32+off] +void MOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset); + +// mov r16 to m16 +void MOV16RtoM( uptr to, x86IntRegType from ); +// mov m16 to r16 +void MOV16MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r16 +void MOV16RmtoR( x86IntRegType to, x86IntRegType from ) ; +void MOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ); +// mov [imm32(from2) + r32(from1)<<scale] to r16 +void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ); +// mov r16 to [r32] +void MOV16RtoRm(x86IntRegType to, x86IntRegType from); +// mov imm16 to m16 +void MOV16ItoM( uptr to, u16 from ); +/* mov r16 to [r32][r32*scale] */ +void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); +// mov imm16 to r16 +void MOV16ItoR( x86IntRegType to, u16 from ); +// mov imm16 to [r16+off] +void MOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset); +// mov r16 to [r16+off] +void MOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset); + +// mov r8 to m8 +void MOV8RtoM( uptr to, x86IntRegType from ); +// mov m8 to r8 +void MOV8MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r8 +void MOV8RmtoR(x86IntRegType to, x86IntRegType from); +void MOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset); +// mov r8 to [r32] +void MOV8RtoRm(x86IntRegType to, x86IntRegType from); +// mov imm8 to m8 +void MOV8ItoM( uptr to, u8 from ); +// mov imm8 to r8 +void MOV8ItoR( x86IntRegType to, u8 from ); +// mov imm8 to [r8+off] +void MOV8ItoRmOffset( x86IntRegType to, u8 from, int offset); +// mov r8 to [r8+off] +void MOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset); + +// movsx r8 to r32 +void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ); +void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ); +void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movsx m8 to r32 +void MOVSX32M8toR( x86IntRegType to, uptr from ); +// movsx r16 to r32 +void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ); +void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from ); +void MOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movsx m16 to r32 +void MOVSX32M16toR( x86IntRegType to, uptr from ); + +// movzx r8 to r32 +void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ); +void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from ); +void MOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movzx m8 to r32 +void MOVZX32M8toR( x86IntRegType to, uptr from ); +// movzx r16 to r32 +void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ); +void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from ); +void MOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movzx m16 to r32 +void MOVZX32M16toR( x86IntRegType to, uptr from ); + +#ifdef __x86_64__ +void MOVZX64R8toR( x86IntRegType to, x86IntRegType from ); +void MOVZX64Rm8toR( x86IntRegType to, x86IntRegType from ); +void MOVZX64Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movzx m8 to r64 +void MOVZX64M8toR( x86IntRegType to, uptr from ); +// movzx r16 to r64 +void MOVZX64R16toR( x86IntRegType to, x86IntRegType from ); +void MOVZX64Rm16toR( x86IntRegType to, x86IntRegType from ); +void MOVZX64Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ); +// movzx m16 to r64 +void MOVZX64M16toR( x86IntRegType to, uptr from ); +#endif + +// cmovbe r32 to r32 +void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovbe m32 to r32 +void CMOVBE32MtoR( x86IntRegType to, uptr from ); +// cmovb r32 to r32 +void CMOVB32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovb m32 to r32 +void CMOVB32MtoR( x86IntRegType to, uptr from ); +// cmovae r32 to r32 +void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovae m32 to r32 +void CMOVAE32MtoR( x86IntRegType to, uptr from ); +// cmova r32 to r32 +void CMOVA32RtoR( x86IntRegType to, x86IntRegType from ); +// cmova m32 to r32 +void CMOVA32MtoR( x86IntRegType to, uptr from ); + +// cmovo r32 to r32 +void CMOVO32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovo m32 to r32 +void CMOVO32MtoR( x86IntRegType to, uptr from ); +// cmovp r32 to r32 +void CMOVP32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovp m32 to r32 +void CMOVP32MtoR( x86IntRegType to, uptr from ); +// cmovs r32 to r32 +void CMOVS32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovs m32 to r32 +void CMOVS32MtoR( x86IntRegType to, uptr from ); +// cmovno r32 to r32 +void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovno m32 to r32 +void CMOVNO32MtoR( x86IntRegType to, uptr from ); +// cmovnp r32 to r32 +void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovnp m32 to r32 +void CMOVNP32MtoR( x86IntRegType to, uptr from ); +// cmovns r32 to r32 +void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovns m32 to r32 +void CMOVNS32MtoR( x86IntRegType to, uptr from ); + +// cmovne r32 to r32 +void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovne m32 to r32 +void CMOVNE32MtoR( x86IntRegType to, uptr from ); +// cmove r32 to r32 +void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmove m32 to r32 +void CMOVE32MtoR( x86IntRegType to, uptr from ); +// cmovg r32 to r32 +void CMOVG32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovg m32 to r32 +void CMOVG32MtoR( x86IntRegType to, uptr from ); +// cmovge r32 to r32 +void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovge m32 to r32 +void CMOVGE32MtoR( x86IntRegType to, uptr from ); +// cmovl r32 to r32 +void CMOVL32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovl m32 to r32 +void CMOVL32MtoR( x86IntRegType to, uptr from ); +// cmovle r32 to r32 +void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from ); +// cmovle m32 to r32 +void CMOVLE32MtoR( x86IntRegType to, uptr from ); + +//////////////////////////////////// +// arithmetic instructions // +//////////////////////////////////// + +// add imm32 to r64 +void ADD64ItoR( x86IntRegType to, u32 from ); +// add m64 to r64 +void ADD64MtoR( x86IntRegType to, uptr from ); + +// add imm32 to r32 +void ADD32ItoR( x86IntRegType to, u32 from ); +// add imm32 to m32 +void ADD32ItoM( uptr to, u32 from ); +// add imm32 to [r32+off] +void ADD32ItoRmOffset( x86IntRegType to, u32 from, int offset); +// add r32 to r32 +void ADD32RtoR( x86IntRegType to, x86IntRegType from ); +// add r32 to m32 +void ADD32RtoM( uptr to, x86IntRegType from ); +// add m32 to r32 +void ADD32MtoR( x86IntRegType to, uptr from ); + +// add r16 to r16 +void ADD16RtoR( x86IntRegType to , x86IntRegType from ); +// add imm16 to r16 +void ADD16ItoR( x86IntRegType to, u16 from ); +// add imm16 to m16 +void ADD16ItoM( uptr to, u16 from ); +// add r16 to m16 +void ADD16RtoM( uptr to, x86IntRegType from ); +// add m16 to r16 +void ADD16MtoR( x86IntRegType to, uptr from ); + +// add m8 to r8 +void ADD8MtoR( x86IntRegType to, uptr from ); + +// adc imm32 to r32 +void ADC32ItoR( x86IntRegType to, u32 from ); +// adc imm32 to m32 +void ADC32ItoM( uptr to, u32 from ); +// adc r32 to r32 +void ADC32RtoR( x86IntRegType to, x86IntRegType from ); +// adc m32 to r32 +void ADC32MtoR( x86IntRegType to, uptr from ); +// adc r32 to m32 +void ADC32RtoM( uptr to, x86IntRegType from ); + +// inc r32 +void INC32R( x86IntRegType to ); +// inc m32 +void INC32M( uptr to ); +// inc r16 +void INC16R( x86IntRegType to ); +// inc m16 +void INC16M( uptr to ); + +// sub m64 to r64 +void SUB64MtoR( x86IntRegType to, uptr from ); +void SUB64ItoR( x86IntRegType to, u32 from ); + +// sub imm32 to r32 +void SUB32ItoR( x86IntRegType to, u32 from ); +// sub imm32 to m32 +void SUB32ItoM( uptr to, u32 from ) ; +// sub r32 to r32 +void SUB32RtoR( x86IntRegType to, x86IntRegType from ); +// sub m32 to r32 +void SUB32MtoR( x86IntRegType to, uptr from ) ; +// sub r32 to m32 +void SUB32RtoM( uptr to, x86IntRegType from ); +// sub r16 to r16 +void SUB16RtoR( x86IntRegType to, u16 from ); +// sub imm16 to r16 +void SUB16ItoR( x86IntRegType to, u16 from ); +// sub imm16 to m16 +void SUB16ItoM( uptr to, u16 from ) ; +// sub m16 to r16 +void SUB16MtoR( x86IntRegType to, uptr from ); + +// sbb r64 to r64 +void SBB64RtoR( x86IntRegType to, x86IntRegType from ); + +// sbb imm32 to r32 +void SBB32ItoR( x86IntRegType to, u32 from ); +// sbb imm32 to m32 +void SBB32ItoM( uptr to, u32 from ); +// sbb r32 to r32 +void SBB32RtoR( x86IntRegType to, x86IntRegType from ); +// sbb m32 to r32 +void SBB32MtoR( x86IntRegType to, uptr from ); +// sbb r32 to m32 +void SBB32RtoM( uptr to, x86IntRegType from ); + +// dec r32 +void DEC32R( x86IntRegType to ); +// dec m32 +void DEC32M( uptr to ); +// dec r16 +void DEC16R( x86IntRegType to ); +// dec m16 +void DEC16M( uptr to ); + +// mul eax by r32 to edx:eax +void MUL32R( x86IntRegType from ); +// mul eax by m32 to edx:eax +void MUL32M( uptr from ); + +// imul eax by r32 to edx:eax +void IMUL32R( x86IntRegType from ); +// imul eax by m32 to edx:eax +void IMUL32M( uptr from ); +// imul r32 by r32 to r32 +void IMUL32RtoR( x86IntRegType to, x86IntRegType from ); + +// div eax by r32 to edx:eax +void DIV32R( x86IntRegType from ); +// div eax by m32 to edx:eax +void DIV32M( uptr from ); + +// idiv eax by r32 to edx:eax +void IDIV32R( x86IntRegType from ); +// idiv eax by m32 to edx:eax +void IDIV32M( uptr from ); + +//////////////////////////////////// +// shifting instructions // +//////////////////////////////////// + +// shl imm8 to r64 +void SHL64ItoR( x86IntRegType to, u8 from ); +// shl cl to r64 +void SHL64CLtoR( x86IntRegType to ); +// shr imm8 to r64 +void SHR64ItoR( x86IntRegType to, u8 from ); +// shr cl to r64 +void SHR64CLtoR( x86IntRegType to ); +// sar imm8 to r64 +void SAR64ItoR( x86IntRegType to, u8 from ); +// sar cl to r64 +void SAR64CLtoR( x86IntRegType to ); + +// shl imm8 to r32 +void SHL32ItoR( x86IntRegType to, u8 from ); +/* shl imm8 to m32 */ +void SHL32ItoM( uptr to, u8 from ); +// shl cl to r32 +void SHL32CLtoR( x86IntRegType to ); + +// shl imm8 to r16 +void SHL16ItoR( x86IntRegType to, u8 from ); +// shl imm8 to r8 +void SHL8ItoR( x86IntRegType to, u8 from ); + +// shr imm8 to r32 +void SHR32ItoR( x86IntRegType to, u8 from ); +/* shr imm8 to m32 */ +void SHR32ItoM( uptr to, u8 from ); +// shr cl to r32 +void SHR32CLtoR( x86IntRegType to ); + +// shr imm8 to r8 +void SHR8ItoR( x86IntRegType to, u8 from ); + +// sar imm8 to r32 +void SAR32ItoR( x86IntRegType to, u8 from ); +// sar imm8 to m32 +void SAR32ItoM( uptr to, u8 from ); +// sar cl to r32 +void SAR32CLtoR( x86IntRegType to ); + +// sar imm8 to r16 +void SAR16ItoR( x86IntRegType to, u8 from ); + +// ror imm8 to r32 (rotate right) +void ROR32ItoR( x86IntRegType to,u8 from ); + +void RCR32ItoR( x86IntRegType to,u8 from ); +// shld imm8 to r32 +void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ); +// shrd imm8 to r32 +void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ); + +// sal imm8 to r32 +#define SAL32ItoR SHL32ItoR +// sal cl to r32 +#define SAL32CLtoR SHL32CLtoR + +// logical instructions + +// or imm32 to r64 +void OR64ItoR( x86IntRegType to, u32 from ); +// or m64 to r64 +void OR64MtoR( x86IntRegType to, uptr from ); +// or r64 to r64 +void OR64RtoR( x86IntRegType to, x86IntRegType from ); +// or r32 to m64 +void OR64RtoM( uptr to, x86IntRegType from ); + +// or imm32 to r32 +void OR32ItoR( x86IntRegType to, u32 from ); +// or imm32 to m32 +void OR32ItoM( uptr to, u32 from ); +// or r32 to r32 +void OR32RtoR( x86IntRegType to, x86IntRegType from ); +// or r32 to m32 +void OR32RtoM( uptr to, x86IntRegType from ); +// or m32 to r32 +void OR32MtoR( x86IntRegType to, uptr from ); +// or r16 to r16 +void OR16RtoR( x86IntRegType to, x86IntRegType from ); +// or imm16 to r16 +void OR16ItoR( x86IntRegType to, u16 from ); +// or imm16 to m16 +void OR16ItoM( uptr to, u16 from ); +// or m16 to r16 +void OR16MtoR( x86IntRegType to, uptr from ); +// or r16 to m16 +void OR16RtoM( uptr to, x86IntRegType from ); + +// or r8 to r8 +void OR8RtoR( x86IntRegType to, x86IntRegType from ); +// or r8 to m8 +void OR8RtoM( uptr to, x86IntRegType from ); +// or imm8 to m8 +void OR8ItoM( uptr to, u8 from ); +// or m8 to r8 +void OR8MtoR( x86IntRegType to, uptr from ); + +// xor imm32 to r64 +void XOR64ItoR( x86IntRegType to, u32 from ); +// xor r64 to r64 +void XOR64RtoR( x86IntRegType to, x86IntRegType from ); +// xor m64 to r64 +void XOR64MtoR( x86IntRegType to, uptr from ); +// xor r64 to r64 +void XOR64RtoR( x86IntRegType to, x86IntRegType from ); +// xor r64 to m64 +void XOR64RtoM( uptr to, x86IntRegType from ); +// xor imm32 to r32 +void XOR32ItoR( x86IntRegType to, u32 from ); +// xor imm32 to m32 +void XOR32ItoM( uptr to, u32 from ); +// xor r32 to r32 +void XOR32RtoR( x86IntRegType to, x86IntRegType from ); +// xor r16 to r16 +void XOR16RtoR( x86IntRegType to, x86IntRegType from ); +// xor r32 to m32 +void XOR32RtoM( uptr to, x86IntRegType from ); +// xor m32 to r32 +void XOR32MtoR( x86IntRegType to, uptr from ); +// xor r16 to m16 +void XOR16RtoM( uptr to, x86IntRegType from ); +// xor imm16 to r16 +void XOR16ItoR( x86IntRegType to, u16 from ); + +// and imm32 to r64 +void AND64I32toR( x86IntRegType to, u32 from ); +// and m64 to r64 +void AND64MtoR( x86IntRegType to, uptr from ); +// and r64 to m64 +void AND64RtoM( uptr to, x86IntRegType from ); +// and r64 to r64 +void AND64RtoR( x86IntRegType to, x86IntRegType from ); +// and imm32 to m64 +void AND64I32toM( uptr to, u32 from ); + +// and imm32 to r32 +void AND32ItoR( x86IntRegType to, u32 from ); +// and sign ext imm8 to r32 +void AND32I8toR( x86IntRegType to, u8 from ); +// and imm32 to m32 +void AND32ItoM( uptr to, u32 from ); +// and sign ext imm8 to m32 +void AND32I8toM( uptr to, u8 from ); +// and r32 to r32 +void AND32RtoR( x86IntRegType to, x86IntRegType from ); +// and r32 to m32 +void AND32RtoM( uptr to, x86IntRegType from ); +// and m32 to r32 +void AND32MtoR( x86IntRegType to, uptr from ); +// and r16 to r16 +void AND16RtoR( x86IntRegType to, x86IntRegType from ); +// and imm16 to r16 +void AND16ItoR( x86IntRegType to, u16 from ); +// and imm16 to m16 +void AND16ItoM( uptr to, u16 from ); +// and r16 to m16 +void AND16RtoM( uptr to, x86IntRegType from ); +// and m16 to r16 +void AND16MtoR( x86IntRegType to, uptr from ); +// and imm8 to r8 +void AND8ItoR( x86IntRegType to, u8 from ); +// and imm8 to m32 +void AND8ItoM( uptr to, u8 from ); +// and r8 to m8 +void AND8RtoM( uptr to, x86IntRegType from ); +// and m8 to r8 +void AND8MtoR( x86IntRegType to, uptr from ); +// and r8 to r8 +void AND8RtoR( x86IntRegType to, x86IntRegType from ); + +// not r64 +void NOT64R( x86IntRegType from ); +// not r32 +void NOT32R( x86IntRegType from ); +// not m32 +void NOT32M( uptr from ); +// neg r64 +void NEG64R( x86IntRegType from ); +// neg r32 +void NEG32R( x86IntRegType from ); +// neg m32 +void NEG32M( uptr from ); +// neg r16 +void NEG16R( x86IntRegType from ); + +//////////////////////////////////// +// jump instructions // +//////////////////////////////////// + +// jmp rel8 +u8* JMP8( u8 to ); + +// jmp rel32 +u32* JMP32( uptr to ); +// jmp r32 (r64 if __x86_64__) +void JMPR( x86IntRegType to ); +// jmp m32 +void JMP32M( uptr to ); + +// jp rel8 +u8* JP8( u8 to ); +// jnp rel8 +u8* JNP8( u8 to ); +// je rel8 +u8* JE8( u8 to ); +// jz rel8 +u8* JZ8( u8 to ); +// jg rel8 +u8* JG8( u8 to ); +// jge rel8 +u8* JGE8( u8 to ); +// js rel8 +u8* JS8( u8 to ); +// jns rel8 +u8* JNS8( u8 to ); +// jl rel8 +u8* JL8( u8 to ); +// ja rel8 +u8* JA8( u8 to ); +// jae rel8 +u8* JAE8( u8 to ); +// jb rel8 +u8* JB8( u8 to ); +// jbe rel8 +u8* JBE8( u8 to ); +// jle rel8 +u8* JLE8( u8 to ); +// jne rel8 +u8* JNE8( u8 to ); +// jnz rel8 +u8* JNZ8( u8 to ); +// jng rel8 +u8* JNG8( u8 to ); +// jnge rel8 +u8* JNGE8( u8 to ); +// jnl rel8 +u8* JNL8( u8 to ); +// jnle rel8 +u8* JNLE8( u8 to ); +// jo rel8 +u8* JO8( u8 to ); +// jno rel8 +u8* JNO8( u8 to ); + +// jb rel8 +u16* JB16( u16 to ); + +// jb rel32 +u32* JB32( u32 to ); +// je rel32 +u32* JE32( u32 to ); +// jz rel32 +u32* JZ32( u32 to ); +// jg rel32 +u32* JG32( u32 to ); +// jge rel32 +u32* JGE32( u32 to ); +// jl rel32 +u32* JL32( u32 to ); +// jle rel32 +u32* JLE32( u32 to ); +// jae rel32 +u32* JAE32( u32 to ); +// jne rel32 +u32* JNE32( u32 to ); +// jnz rel32 +u32* JNZ32( u32 to ); +// jng rel32 +u32* JNG32( u32 to ); +// jnge rel32 +u32* JNGE32( u32 to ); +// jnl rel32 +u32* JNL32( u32 to ); +// jnle rel32 +u32* JNLE32( u32 to ); +// jo rel32 +u32* JO32( u32 to ); +// jno rel32 +u32* JNO32( u32 to ); +// js rel32 +u32* JS32( u32 to ); + +// call func +void CALLFunc( uptr func); +// call rel32 +void CALL32( s32 to ); +// call r32 +void CALL32R( x86IntRegType to ); +// call m32 +void CALL64R( x86IntRegType to ); + + +//////////////////////////////////// +// misc instructions // +//////////////////////////////////// + +// cmp imm32 to r64 +void CMP64I32toR( x86IntRegType to, u32 from ); +// cmp m64 to r64 +void CMP64MtoR( x86IntRegType to, uptr from ); +// cmp r64 to r64 +void CMP64RtoR( x86IntRegType to, x86IntRegType from ); + +// cmp imm32 to r32 +void CMP32ItoR( x86IntRegType to, u32 from ); +// cmp imm32 to m32 +void CMP32ItoM( uptr to, u32 from ); +// cmp r32 to r32 +void CMP32RtoR( x86IntRegType to, x86IntRegType from ); +// cmp m32 to r32 +void CMP32MtoR( x86IntRegType to, uptr from ); +// cmp imm32 to [r32] +void CMP32I8toRm( x86IntRegType to, u8 from); +// cmp imm32 to [r32+off] +void CMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off); +// cmp imm8 to [r32] +void CMP32I8toM( uptr to, u8 from); + +// cmp imm16 to r16 +void CMP16ItoR( x86IntRegType to, u16 from ); +// cmp imm16 to m16 +void CMP16ItoM( uptr to, u16 from ); +// cmp r16 to r16 +void CMP16RtoR( x86IntRegType to, x86IntRegType from ); +// cmp m16 to r16 +void CMP16MtoR( x86IntRegType to, uptr from ); + +// cmp imm8 to r8 +void CMP8ItoR( x86IntRegType to, u8 from ); +// cmp m8 to r8 +void CMP8MtoR( x86IntRegType to, uptr from ); + +// test r64 to r64 +void TEST64RtoR( x86IntRegType to, x86IntRegType from ); +// test imm32 to r32 +void TEST32ItoR( x86IntRegType to, u32 from ); +// test imm32 to m32 +void TEST32ItoM( uptr to, u32 from ); +// test r32 to r32 +void TEST32RtoR( x86IntRegType to, x86IntRegType from ); +// test imm32 to [r32] +void TEST32ItoRm( x86IntRegType to, u32 from ); +// test imm16 to r16 +void TEST16ItoR( x86IntRegType to, u16 from ); +// test r16 to r16 +void TEST16RtoR( x86IntRegType to, x86IntRegType from ); +// test imm8 to r8 +void TEST8ItoR( x86IntRegType to, u8 from ); +// test imm8 to r8 +void TEST8ItoM( uptr to, u8 from ); + +// sets r8 +void SETS8R( x86IntRegType to ); +// setl r8 +void SETL8R( x86IntRegType to ); +// setge r8 +void SETGE8R( x86IntRegType to ); +// setge r8 +void SETG8R( x86IntRegType to ); +// seta r8 +void SETA8R( x86IntRegType to ); +// setae r8 +void SETAE8R( x86IntRegType to ); +// setb r8 +void SETB8R( x86IntRegType to ); +// setnz r8 +void SETNZ8R( x86IntRegType to ); +// setz r8 +void SETZ8R( x86IntRegType to ); +// sete r8 +void SETE8R( x86IntRegType to ); + +// push imm32 +void PUSH32I( u32 from ); + +#ifdef __x86_64__ +void PUSHI( u32 from ); +// push r64 +void PUSH64R( x86IntRegType from ); +// push m64 +void PUSH64M( uptr from ); +// pop r32 +void POP64R( x86IntRegType from ); +#else +// push r32 +void PUSH32R( x86IntRegType from ); +// push m32 +void PUSH32M( u32 from ); +// push imm32 +void PUSH32I( u32 from ); +// pop r32 +void POP32R( x86IntRegType from ); +// pushad +void PUSHA32( void ); +// popad +void POPA32( void ); +#endif + +void PUSHR(x86IntRegType from); +void POPR(x86IntRegType from); + +// pushfd +void PUSHFD( void ); +// popfd +void POPFD( void ); +// ret +void RET( void ); +// ret (2-byte code used for misprediction) +void RET2( void ); + +void CBW(); +void CWDE(); +// cwd +void CWD( void ); +// cdq +void CDQ( void ); +// cdqe +void CDQE( void ); + +void LAHF(); +void SAHF(); + +void BT32ItoR( x86IntRegType to, x86IntRegType from ); +void BSRRtoR(x86IntRegType to, x86IntRegType from); +void BSWAP32R( x86IntRegType to ); + +// to = from + offset +void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset); +void LEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); + +// to = from0 + from1 +void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); +void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); + +// to = from << scale (max is 3) +void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); + +//****************** +// FPU instructions +//****************** + +// fild m32 to fpu reg stack +void FILD32( uptr from ); +// fistp m32 from fpu reg stack +void FISTP32( uptr from ); +// fld m32 to fpu reg stack +void FLD32( uptr from ); +// fld st(i) +void FLD(int st); +// fld1 (push +1.0f on the stack) +void FLD1(); +// fld1 (push log_2 e on the stack) +void FLDL2E(); +// fst m32 from fpu reg stack +void FST32( uptr to ); +// fstp m32 from fpu reg stack +void FSTP32( uptr to ); +// fstp st(i) +void FSTP(int st); + +// fldcw fpu control word from m16 +void FLDCW( uptr from ); +// fstcw fpu control word to m16 +void FNSTCW( uptr to ); +void FXAM(); +void FDECSTP(); +// frndint +void FRNDINT(); +void FXCH(int st); +void F2XM1(); +void FSCALE(); + +// fadd ST(src) to fpu reg stack ST(0) +void FADD32Rto0( x86IntRegType src ); +// fadd ST(0) to fpu reg stack ST(src) +void FADD320toR( x86IntRegType src ); +// fsub ST(src) to fpu reg stack ST(0) +void FSUB32Rto0( x86IntRegType src ); +// fsub ST(0) to fpu reg stack ST(src) +void FSUB320toR( x86IntRegType src ); +// fsubp -> subtract ST(0) from ST(1), store in ST(1) and POP stack +void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +void FADD32( uptr from ); +// fsub m32 to fpu reg stack +void FSUB32( uptr from ); +// fmul m32 to fpu reg stack +void FMUL32( uptr from ); +// fdiv m32 to fpu reg stack +void FDIV32( uptr from ); +// fcomi st, st( i) +void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +void FCOM32( uptr from ); +// fabs fpu reg stack +void FABS( void ); +// fsqrt fpu reg stack +void FSQRT( void ); +// ftan fpu reg stack +void FPATAN( void ); +// fsin fpu reg stack +void FSIN( void ); +// fchs fpu reg stack +void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +void FCMOVNU32( x86IntRegType from ); +void FCOMP32( uptr from ); +void FNSTSWtoAX( void ); + +// probably a little extreme here, but x86-64 should NOT use MMX +#ifdef __x86_64__ + +#define MMXONLY(code) + +#else + +#define MMXONLY(code) code + +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +void PADDDMtoR( x86MMXRegType to, uptr from ); +void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +void PSRLWItoR( x86MMXRegType to, u8 from ); +void PSRLDItoR( x86MMXRegType to, u8 from ); +void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSLLWItoR( x86MMXRegType to, u8 from ); +void PSLLDItoR( x86MMXRegType to, u8 from ); +void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSRAWItoR( x86MMXRegType to, u8 from ); +void PSRADItoR( x86MMXRegType to, u8 from ); +void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ); +void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); +void MOVDMtoMMX( x86MMXRegType to, uptr from ); +void MOVDMMXtoM( uptr to, x86MMXRegType from ); +void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ); +void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ); +void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ); +void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); +void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + +#endif // !__x86_64__ + +//********************* +// SSE instructions * +//********************* +void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); +void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); +void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +#ifndef __x86_64__ +void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); +#endif +void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +void SSE_STMXCSR( uptr from ); +void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* +void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ); +void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +#ifdef __x86_64__ +void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +#endif + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); +//********************* +// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data +// Uses g_xmmtypes to infer the correct type. +//********************* +void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); +void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); +void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//********************* +// 3DNOW instructions * +//********************* +void FEMMS( void ); +void PFCMPEQMtoR( x86IntRegType to, uptr from ); +void PFCMPGTMtoR( x86IntRegType to, uptr from ); +void PFCMPGEMtoR( x86IntRegType to, uptr from ); +void PFADDMtoR( x86IntRegType to, uptr from ); +void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +void PFSUBMtoR( x86IntRegType to, uptr from ); +void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +void PFMULMtoR( x86IntRegType to, uptr from ); +void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPMtoR( x86IntRegType to, uptr from ); +void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +void PF2IDMtoR( x86IntRegType to, uptr from ); +void PF2IDRtoR( x86IntRegType to, x86IntRegType from ); +void PI2FDMtoR( x86IntRegType to, uptr from ); +void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +void PFMAXMtoR( x86IntRegType to, uptr from ); +void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +void PFMINMtoR( x86IntRegType to, uptr from ); +void PFMINRtoR( x86IntRegType to, x86IntRegType from ); + +void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from); +void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from); +void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from); +void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset ); + +#ifndef __x86_64__ +void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); +#endif + +/* SSE2 emulated functions for SSE CPU's by kekko*/ + +void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ); +void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); + +//////////////////////////////////////////////////// +#ifdef _DEBUG +#define WRITECHECK() CheckX86Ptr() +#else +#define WRITECHECK() +#endif + +#define writeVAL(val) ({ \ + WRITECHECK(); \ + *(typeof(val)*)x86Ptr = (val); \ + x86Ptr += sizeof(val); \ + (void)0; \ + }) + +#define write8(val ) writeVAL((u8)(val)) +#define write16(val ) writeVAL((u16)(val)) +#define write32( val ) writeVAL((u32)(val)) +#define write64( val ) writeVAL((u64)(val)) + +#ifdef __cplusplus +} +#endif + +#endif // __IX86_H__ diff --git a/libpcsxcore/ix86_64/ix86_3dnow.c b/libpcsxcore/ix86_64/ix86_3dnow.c new file mode 100644 index 0000000..8fd4233 --- /dev/null +++ b/libpcsxcore/ix86_64/ix86_3dnow.c @@ -0,0 +1,178 @@ +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include "ix86-64.h" + +/**********************/ +/* 3DNOW instructions */ +/**********************/ + +/* femms */ +void FEMMS( void ) +{ + write16( 0x0E0F ); +} + +void PFCMPEQMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0xB0 ); +} + +void PFCMPGTMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0xA0 ); +} + +void PFCMPGEMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x90 ); +} + +void PFADDMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x9E ); +} + +void PFADDRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9E ); +} + +void PFSUBMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x9A ); +} + +void PFSUBRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9A ); +} + +void PFMULMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0xB4 ); +} + +void PFMULRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB4 ); +} + +void PFRCPMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x96 ); +} + +void PFRCPRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x96 ); +} + +void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA6 ); +} + +void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB6 ); +} + +void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x97 ); +} + +void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA7 ); +} + +void PF2IDMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x1D ); +} + +void PF2IDRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x1D ); +} + +void PI2FDMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x0D ); +} + +void PI2FDRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x0D ); +} + +void PFMAXMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0xA4 ); +} + +void PFMAXRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA4 ); +} + +void PFMINMtoR( x86IntRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1); + write8( 0x94 ); +} + +void PFMINRtoR( x86IntRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x94 ); +} + +#endif diff --git a/libpcsxcore/ix86_64/ix86_cpudetect.c b/libpcsxcore/ix86_64/ix86_cpudetect.c new file mode 100644 index 0000000..3c014d8 --- /dev/null +++ b/libpcsxcore/ix86_64/ix86_cpudetect.c @@ -0,0 +1,487 @@ +/* Cpudetection lib
+ * Copyright (C) 2002-2003 Pcsx2 Team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA
+ */
+#if defined (_WIN32)
+#include <windows.h>
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include "ix86-64.h"
+
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ void __cpuid(int* CPUInfo, int InfoType);
+ unsigned __int64 __rdtsc();
+
+ #pragma intrinsic(__cpuid)
+ #pragma intrinsic(__rdtsc)
+
+#endif
+
+CAPABILITIES cpucaps;
+CPUINFO cpuinfo;
+
+#define cpuid(cmd,a,b,c,d) \
+ __asm__ __volatile__("cpuid" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (cmd))
+
+static s32 iCpuId( u32 cmd, u32 *regs )
+{
+ int flag=1;
+
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ __cpuid( regs, cmd );
+
+ return 0;
+
+#elif defined (_MSC_VER)
+
+#ifdef __x86_64__
+ assert(0);
+#else // __x86_64__
+ __asm
+ {
+ push ebx;
+ push edi;
+
+ pushfd;
+ pop eax;
+ mov edx, eax;
+ xor eax, 1 << 21;
+ push eax;
+ popfd;
+ pushfd;
+ pop eax;
+ xor eax, edx;
+ mov flag, eax;
+ }
+ if ( ! flag )
+ {
+ return -1;
+ }
+
+ __asm
+ {
+ mov eax, cmd;
+ cpuid;
+ mov edi, [regs]
+ mov [edi], eax;
+ mov [edi+4], ebx;
+ mov [edi+8], ecx;
+ mov [edi+12], edx;
+
+ pop edi;
+ pop ebx;
+ }
+#endif // __x86_64__
+ return 0;
+
+
+#else
+
+#ifndef __x86_64__
+ // see if we can use cpuid
+ __asm__ __volatile__ (
+ "sub $0x18, %%esp\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%edx\n"
+ "xor $0x200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "xor %%edx, %%eax\n"
+ "mov %%eax, %0\n"
+ "add $0x18, %%esp\n"
+ : "=r"(flag) :
+ );
+#endif
+
+ if ( !flag )
+ return -1;
+
+ cpuid(cmd, regs[0], regs[1], regs[2], regs[3]);
+ return 0;
+#endif // _MSC_VER
+}
+
+u64 GetCPUTick( void )
+{
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ return __rdtsc();
+
+#elif defined(__MSCW32__) && !defined(__x86_64__)
+
+ __asm rdtsc;
+
+#else
+
+ u32 _a, _d;
+ __asm__ __volatile__ ("rdtsc" : "=a"(_a), "=d"(_d));
+ return (u64)_a | ((u64)_d << 32);
+
+#endif
+}
+
+#if defined __LINUX__
+
+#include <sys/time.h>
+#include <errno.h>
+//*
+unsigned long timeGetTime2()
+{
+ struct timeval tv;
+ gettimeofday(&tv, 0); // well, maybe there are better ways
+ return (unsigned long)tv.tv_sec * 1000 + tv.tv_usec/1000; // to do that, but at least it works
+}
+//*/
+#endif
+
+s64 CPUSpeedHz( unsigned int time )
+{
+ s64 timeStart,
+ timeStop;
+ s64 startTick,
+ endTick;
+ s64 overhead;
+
+ if( ! cpucaps.hasTimeStampCounter )
+ {
+ return 0; //check if function is supported
+ }
+
+ overhead = GetCPUTick() - GetCPUTick();
+
+ timeStart = timeGetTime2( );
+ while( timeGetTime2( ) == timeStart )
+ {
+ timeStart = timeGetTime2( );
+ }
+ for(;;)
+ {
+ timeStop = timeGetTime2( );
+ if ( ( timeStop - timeStart ) > 1 )
+ {
+ startTick = GetCPUTick( );
+ break;
+ }
+ }
+
+ timeStart = timeStop;
+ for(;;)
+ {
+ timeStop = timeGetTime2( );
+ if ( ( timeStop - timeStart ) > time )
+ {
+ endTick = GetCPUTick( );
+ break;
+ }
+ }
+
+ return (s64)( ( endTick - startTick ) + ( overhead ) );
+}
+
+////////////////////////////////////////////////////
+void cpudetectInit( void )
+{
+ u32 regs[ 4 ];
+ u32 cmds;
+ u32 AMDspeed;
+ s8 AMDspeedString[10];
+ int cputype=0; // Cpu type
+ //AMD 64 STUFF
+ u32 x86_64_8BITBRANDID;
+ u32 x86_64_12BITBRANDID;
+ memset( cpuinfo.x86ID, 0, sizeof( cpuinfo.x86ID ) );
+ cpuinfo.x86Family = 0;
+ cpuinfo.x86Model = 0;
+ cpuinfo.x86PType = 0;
+ cpuinfo.x86StepID = 0;
+ cpuinfo.x86Flags = 0;
+ cpuinfo.x86EFlags = 0;
+
+ if ( iCpuId( 0, regs ) == -1 ) return;
+
+ cmds = regs[ 0 ];
+ ((u32*)cpuinfo.x86ID)[ 0 ] = regs[ 1 ];
+ ((u32*)cpuinfo.x86ID)[ 1 ] = regs[ 3 ];
+ ((u32*)cpuinfo.x86ID)[ 2 ] = regs[ 2 ];
+ if ( cmds >= 0x00000001 )
+ {
+ if ( iCpuId( 0x00000001, regs ) != -1 )
+ {
+ cpuinfo.x86StepID = regs[ 0 ] & 0xf;
+ cpuinfo.x86Model = (regs[ 0 ] >> 4) & 0xf;
+ cpuinfo.x86Family = (regs[ 0 ] >> 8) & 0xf;
+ cpuinfo.x86PType = (regs[ 0 ] >> 12) & 0x3;
+ x86_64_8BITBRANDID = regs[1] & 0xff;
+ cpuinfo.x86Flags = regs[ 3 ];
+ }
+ }
+ if ( iCpuId( 0x80000000, regs ) != -1 )
+ {
+ cmds = regs[ 0 ];
+ if ( cmds >= 0x80000001 )
+ {
+ if ( iCpuId( 0x80000001, regs ) != -1 )
+ {
+ x86_64_12BITBRANDID = regs[1] & 0xfff;
+ cpuinfo.x86EFlags = regs[ 3 ];
+
+ }
+ }
+ }
+ switch(cpuinfo.x86PType)
+ {
+ case 0:
+ strcpy( cpuinfo.x86Type, "Standard OEM");
+ break;
+ case 1:
+ strcpy( cpuinfo.x86Type, "Overdrive");
+ break;
+ case 2:
+ strcpy( cpuinfo.x86Type, "Dual");
+ break;
+ case 3:
+ strcpy( cpuinfo.x86Type, "Reserved");
+ break;
+ default:
+ strcpy( cpuinfo.x86Type, "Unknown");
+ break;
+ }
+ if ( cpuinfo.x86ID[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
+ if ( cpuinfo.x86ID[ 0 ] == 'A' ){ cputype=1;}
+
+ if ( cputype == 0 ) //intel cpu
+ {
+ if( ( cpuinfo.x86Family >= 7 ) && ( cpuinfo.x86Family < 15 ) )
+ {
+ strcpy( cpuinfo.x86Fam, "Intel P6 family (Not PIV and Higher then PPro" );
+ }
+ else
+ {
+ switch( cpuinfo.x86Family )
+ {
+ // Start at 486 because if it's below 486 there is no cpuid instruction
+ case 4:
+ strcpy( cpuinfo.x86Fam, "Intel 486" );
+ break;
+ case 5:
+ switch( cpuinfo.x86Model )
+ {
+ case 4:
+ case 8: // 0.25 µm
+ strcpy( cpuinfo.x86Fam, "Intel Pentium (MMX)");
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium" );
+ }
+ break;
+ case 6:
+ switch( cpuinfo.x86Model )
+ {
+ case 0: // Pentium pro (P6 A-Step)
+ case 1: // Pentium pro
+ strcpy( cpuinfo.x86Fam, "Intel Pentium Pro" );
+ break;
+
+ case 2: // 66 MHz FSB
+ case 5: // Xeon/Celeron (0.25 µm)
+ case 6: // Internal L2 cache
+ strcpy( cpuinfo.x86Fam, "Intel Pentium II" );
+ break;
+
+ case 7: // Xeon external L2 cache
+ case 8: // Xeon/Celeron with 256 KB on-die L2 cache
+ case 10: // Xeon/Celeron with 1 or 2 MB on-die L2 cache
+ case 11: // Xeon/Celeron with Tualatin core, on-die cache
+ strcpy( cpuinfo.x86Fam, "Intel Pentium III" );
+ break;
+ case 15: // Core 2 Duo Allendale/Conroe
+ strcpy( cpuinfo.x86Fam, "Intel Core 2 Duo" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium Pro (Unknown)" );
+ }
+ break;
+ case 15:
+ switch( cpuinfo.x86Model )
+ {
+ case 0: // Willamette (A-Step)
+ case 1: // Willamette
+ strcpy( cpuinfo.x86Fam, "Willamette Intel Pentium IV" );
+ break;
+ case 2: // Northwood
+ strcpy( cpuinfo.x86Fam, "Northwood Intel Pentium IV" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium IV (Unknown)" );
+ break;
+ }
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Unknown Intel CPU" );
+ }
+ }
+ }
+ else if ( cputype == 1 ) //AMD cpu
+ {
+ if( cpuinfo.x86Family >= 7 )
+ {
+ if((x86_64_12BITBRANDID !=0) || (x86_64_8BITBRANDID !=0))
+ {
+ if(x86_64_8BITBRANDID == 0 )
+ {
+ switch((x86_64_12BITBRANDID >>6)& 0x3f)
+ {
+ case 4:
+ strcpy(cpuinfo.x86Fam,"AMD Athlon(tm) 64 Processor");
+ AMDspeed = 22 + (x86_64_12BITBRANDID & 0x1f);
+ //AMDspeedString = strtol(AMDspeed, (char**)NULL,10);
+ sprintf(AMDspeedString," %d",AMDspeed);
+ strcat(AMDspeedString,"00+");
+ strcat(cpuinfo.x86Fam,AMDspeedString);
+ break;
+ case 12:
+ strcpy(cpuinfo.x86Fam,"AMD Opteron(tm) Processor");
+ break;
+ case 5:
+ strcpy( cpuinfo.x86Fam, "AMD Athlon X2 Processor" );
+ AMDspeed = 22 + (x86_64_12BITBRANDID & 0x1f);
+ //AMDspeedString = strtol(AMDspeed, (char**)NULL,10);
+ sprintf(AMDspeedString," %d",AMDspeed);
+ strcat(AMDspeedString,"00+");
+ strcat(cpuinfo.x86Fam,AMDspeedString);
+ break;
+ case 44:
+ strcpy( cpuinfo.x86Fam, "AMD Opteron(tm) Dual Core Processor" );
+ break;
+ default:
+ strcpy(cpuinfo.x86Fam,"Unknown AMD 64 proccesor");
+
+ }
+ }
+ else //8bit brand id is non zero
+ {
+ strcpy(cpuinfo.x86Fam,"Unsupported yet AMD64 cpu");
+ }
+ }
+ else
+ {
+ strcpy( cpuinfo.x86Fam, "AMD K7+ Processor" );
+ }
+ }
+ else
+ {
+ switch ( cpuinfo.x86Family )
+ {
+ case 4:
+ switch( cpuinfo.x86Model )
+ {
+ case 14:
+ case 15: // Write-back enhanced
+ strcpy( cpuinfo.x86Fam, "AMD 5x86 Processor" );
+ break;
+
+ case 3: // DX2
+ case 7: // Write-back enhanced DX2
+ case 8: // DX4
+ case 9: // Write-back enhanced DX4
+ strcpy( cpuinfo.x86Fam, "AMD 486 Processor" );
+ break;
+
+
+ default:
+ strcpy( cpuinfo.x86Fam, "AMD Unknown Processor" );
+
+ }
+ break;
+
+ case 5:
+ switch( cpuinfo.x86Model)
+ {
+ case 0: // SSA 5 (75, 90 and 100 Mhz)
+ case 1: // 5k86 (PR 120 and 133 MHz)
+ case 2: // 5k86 (PR 166 MHz)
+ case 3: // K5 5k86 (PR 200 MHz)
+ strcpy( cpuinfo.x86Fam, "AMD K5 Processor" );
+ break;
+
+ case 6:
+ case 7: // (0.25 µm)
+ case 8: // K6-2
+ case 9: // K6-III
+ case 14: // K6-2+ / K6-III+
+ strcpy( cpuinfo.x86Fam, "AMD K6 Series Processor" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "AMD Unknown Processor" );
+ }
+ break;
+ case 6:
+ strcpy( cpuinfo.x86Fam, "AMD Athlon XP Processor" );
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Unknown AMD CPU" );
+ }
+ }
+ }
+ //capabilities
+ cpucaps.hasFloatingPointUnit = ( cpuinfo.x86Flags >> 0 ) & 1;
+ cpucaps.hasVirtual8086ModeEnhancements = ( cpuinfo.x86Flags >> 1 ) & 1;
+ cpucaps.hasDebuggingExtensions = ( cpuinfo.x86Flags >> 2 ) & 1;
+ cpucaps.hasPageSizeExtensions = ( cpuinfo.x86Flags >> 3 ) & 1;
+ cpucaps.hasTimeStampCounter = ( cpuinfo.x86Flags >> 4 ) & 1;
+ cpucaps.hasModelSpecificRegisters = ( cpuinfo.x86Flags >> 5 ) & 1;
+ cpucaps.hasPhysicalAddressExtension = ( cpuinfo.x86Flags >> 6 ) & 1;
+ cpucaps.hasMachineCheckArchitecture = ( cpuinfo.x86Flags >> 7 ) & 1;
+ cpucaps.hasCOMPXCHG8BInstruction = ( cpuinfo.x86Flags >> 8 ) & 1;
+ cpucaps.hasAdvancedProgrammableInterruptController = ( cpuinfo.x86Flags >> 9 ) & 1;
+ cpucaps.hasSEPFastSystemCall = ( cpuinfo.x86Flags >> 11 ) & 1;
+ cpucaps.hasMemoryTypeRangeRegisters = ( cpuinfo.x86Flags >> 12 ) & 1;
+ cpucaps.hasPTEGlobalFlag = ( cpuinfo.x86Flags >> 13 ) & 1;
+ cpucaps.hasMachineCheckArchitecture = ( cpuinfo.x86Flags >> 14 ) & 1;
+ cpucaps.hasConditionalMoveAndCompareInstructions = ( cpuinfo.x86Flags >> 15 ) & 1;
+ cpucaps.hasFGPageAttributeTable = ( cpuinfo.x86Flags >> 16 ) & 1;
+ cpucaps.has36bitPageSizeExtension = ( cpuinfo.x86Flags >> 17 ) & 1;
+ cpucaps.hasProcessorSerialNumber = ( cpuinfo.x86Flags >> 18 ) & 1;
+ cpucaps.hasCFLUSHInstruction = ( cpuinfo.x86Flags >> 19 ) & 1;
+ cpucaps.hasDebugStore = ( cpuinfo.x86Flags >> 21 ) & 1;
+ cpucaps.hasACPIThermalMonitorAndClockControl = ( cpuinfo.x86Flags >> 22 ) & 1;
+ cpucaps.hasMultimediaExtensions = ( cpuinfo.x86Flags >> 23 ) & 1; //mmx
+ cpucaps.hasFastStreamingSIMDExtensionsSaveRestore = ( cpuinfo.x86Flags >> 24 ) & 1;
+ cpucaps.hasStreamingSIMDExtensions = ( cpuinfo.x86Flags >> 25 ) & 1; //sse
+ cpucaps.hasStreamingSIMD2Extensions = ( cpuinfo.x86Flags >> 26 ) & 1; //sse2
+ cpucaps.hasSelfSnoop = ( cpuinfo.x86Flags >> 27 ) & 1;
+ cpucaps.hasHyperThreading = ( cpuinfo.x86Flags >> 28 ) & 1;
+ cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1;
+ cpucaps.hasIntel64BitArchitecture = ( cpuinfo.x86Flags >> 30 ) & 1;
+ //that is only for AMDs
+ cpucaps.hasMultimediaExtensionsExt = ( cpuinfo.x86EFlags >> 22 ) & 1; //mmx2
+ cpucaps.hasAMD64BitArchitecture = ( cpuinfo.x86EFlags >> 29 ) & 1; //64bit cpu
+ cpucaps.has3DNOWInstructionExtensionsExt = ( cpuinfo.x86EFlags >> 30 ) & 1; //3dnow+
+ cpucaps.has3DNOWInstructionExtensions = ( cpuinfo.x86EFlags >> 31 ) & 1; //3dnow
+ cpuinfo.cpuspeed = (u32 )(CPUSpeedHz( 1000 ) / 1000000);
+}
diff --git a/libpcsxcore/ix86_64/ix86_fpu.c b/libpcsxcore/ix86_64/ix86_fpu.c new file mode 100644 index 0000000..ca49eb7 --- /dev/null +++ b/libpcsxcore/ix86_64/ix86_fpu.c @@ -0,0 +1,248 @@ +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include <stdio.h> +#include <string.h> +#include "ix86-64.h" + +/********************/ +/* FPU instructions */ +/********************/ + +/* fild m32 to fpu reg stack */ +void FILD32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xDB), false, 0, from, 0); +} + +/* fistp m32 from fpu reg stack */ +void FISTP32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xDB), false, 3, from, 0); +} + +/* fld m32 to fpu reg stack */ +void FLD32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD9), false, 0, from, 0); +} + +// fld st(i) +void FLD(int st) { write16(0xc0d9+(st<<8)); } + +void FLD1() { write16(0xe8d9); } +void FLDL2E() { write16(0xead9); } + +/* fst m32 from fpu reg stack */ +void FST32( uptr to ) +{ + MEMADDR_OP(0, VAROP1(0xD9), false, 2, to, 0); +} + +/* fstp m32 from fpu reg stack */ +void FSTP32( uptr to ) +{ + MEMADDR_OP(0, VAROP1(0xD9), false, 3, to, 0); +} + +// fstp st(i) +void FSTP(int st) { write16(0xd8dd+(st<<8)); } + +/* fldcw fpu control word from m16 */ +void FLDCW( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD9), false, 5, from, 0); +} + +/* fnstcw fpu control word to m16 */ +void FNSTCW( uptr to ) +{ + MEMADDR_OP(0, VAROP1(0xD9), false, 7, to, 0); +} + +void FNSTSWtoAX( void ) +{ + write16( 0xE0DF ); +} + +void FXAM() +{ + write16(0xe5d9); +} + +void FDECSTP() { write16(0xf6d9); } +void FRNDINT() { write16(0xfcd9); } +void FXCH(int st) { write16(0xc8d9+(st<<8)); } +void F2XM1() { write16(0xf0d9); } +void FSCALE() { write16(0xfdd9); } + +/* fadd ST(src) to fpu reg stack ST(0) */ +void FADD32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xC0 + src ); +} + +/* fadd ST(0) to fpu reg stack ST(src) */ +void FADD320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xC0 + src ); +} + +/* fsub ST(src) to fpu reg stack ST(0) */ +void FSUB32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xE0 + src ); +} + +/* fsub ST(0) to fpu reg stack ST(src) */ +void FSUB320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xE8 + src ); +} + +/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ +void FSUBP( void ) +{ + write8( 0xDE ); + write8( 0xE9 ); +} + +/* fmul ST(src) to fpu reg stack ST(0) */ +void FMUL32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xC8 + src ); +} + +/* fmul ST(0) to fpu reg stack ST(src) */ +void FMUL320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xC8 + src ); +} + +/* fdiv ST(src) to fpu reg stack ST(0) */ +void FDIV32Rto0( x86IntRegType src ) +{ + write8( 0xD8 ); + write8( 0xF0 + src ); +} + +/* fdiv ST(0) to fpu reg stack ST(src) */ +void FDIV320toR( x86IntRegType src ) +{ + write8( 0xDC ); + write8( 0xF8 + src ); +} + +void FDIV320toRP( x86IntRegType src ) +{ + write8( 0xDE ); + write8( 0xF8 + src ); +} + +/* fadd m32 to fpu reg stack */ +void FADD32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 0, from, 0); +} + +/* fsub m32 to fpu reg stack */ +void FSUB32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 4, from, 0); +} + +/* fmul m32 to fpu reg stack */ +void FMUL32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 1, from, 0); +} + +/* fdiv m32 to fpu reg stack */ +void FDIV32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 6, from, 0); +} + +/* fabs fpu reg stack */ +void FABS( void ) +{ + write16( 0xE1D9 ); +} + +/* fsqrt fpu reg stack */ +void FSQRT( void ) +{ + write16( 0xFAD9 ); +} + +void FPATAN(void) { write16(0xf3d9); } +void FSIN(void) { write16(0xfed9); } + +/* fchs fpu reg stack */ +void FCHS( void ) +{ + write16( 0xE0D9 ); +} + +/* fcomi st, st(i) */ +void FCOMI( x86IntRegType src ) +{ + write8( 0xDB ); + write8( 0xF0 + src ); +} + +/* fcomip st, st(i) */ +void FCOMIP( x86IntRegType src ) +{ + write8( 0xDF ); + write8( 0xF0 + src ); +} + +/* fucomi st, st(i) */ +void FUCOMI( x86IntRegType src ) +{ + write8( 0xDB ); + write8( 0xE8 + src ); +} + +/* fucomip st, st(i) */ +void FUCOMIP( x86IntRegType src ) +{ + write8( 0xDF ); + write8( 0xE8 + src ); +} + +/* fcom m32 to fpu reg stack */ +void FCOM32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 2, from, 0); +} + +/* fcomp m32 to fpu reg stack */ +void FCOMP32( uptr from ) +{ + MEMADDR_OP(0, VAROP1(0xD8), false, 3, from, 0); +} + +#define FCMOV32( low, high ) \ + { \ + write8( low ); \ + write8( high + from ); \ + } + +void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } +void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } +void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } +void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } +void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } +void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } +void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } +void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } + +#endif diff --git a/libpcsxcore/ix86_64/ix86_mmx.c b/libpcsxcore/ix86_64/ix86_mmx.c new file mode 100644 index 0000000..eddbbfc --- /dev/null +++ b/libpcsxcore/ix86_64/ix86_mmx.c @@ -0,0 +1,646 @@ +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include "ix86-64.h" + +#include <assert.h> + +/********************/ +/* MMX instructions */ +/********************/ + +// r64 = mm + +/* movq m64 to r64 */ +void MOVQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x6F), true, to, from, 0); +} + +/* movq r64 to m64 */ +void MOVQRtoM( uptr to, x86MMXRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x7F), true, from, to, 0); +} + +/* pand r64 to r64 */ +void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xDB0F ); + ModRM( 3, to, from ); +} + +void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xDF0F ); + ModRM( 3, to, from ); +} + +/* por r64 to r64 */ +void PORRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xEB0F ); + ModRM( 3, to, from ); +} + +/* pxor r64 to r64 */ +void PXORRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xEF0F ); + ModRM( 3, to, from ); +} + +/* psllq r64 to r64 */ +void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xF30F ); + ModRM( 3, to, from ); +} + +/* psllq m64 to r64 */ +void PSLLQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xF3), true, to, from, 0); +} + +/* psllq imm8 to r64 */ +void PSLLQItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 6, to); + write8( from ); +} + +/* psrlq r64 to r64 */ +void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xD30F ); + ModRM( 3, to, from ); +} + +/* psrlq m64 to r64 */ +void PSRLQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xD3), true, to, from, 0); +} + +/* psrlq imm8 to r64 */ +void PSRLQItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 2, to); + write8( from ); +} + +/* paddusb r64 to r64 */ +void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xDC0F ); + ModRM( 3, to, from ); +} + +/* paddusb m64 to r64 */ +void PADDUSBMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xDC), true, to, from, 0); +} + +/* paddusw r64 to r64 */ +void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xDD0F ); + ModRM( 3, to, from ); +} + +/* paddusw m64 to r64 */ +void PADDUSWMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xDD), true, to, from, 0); +} + +/* paddb r64 to r64 */ +void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xFC0F ); + ModRM( 3, to, from ); +} + +/* paddb m64 to r64 */ +void PADDBMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xFC), true, to, from, 0); +} + +/* paddw r64 to r64 */ +void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xFD0F ); + ModRM( 3, to, from ); +} + +/* paddw m64 to r64 */ +void PADDWMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xFD), true, to, from, 0); +} + +/* paddd r64 to r64 */ +void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xFE0F ); + ModRM( 3, to, from ); +} + +/* paddd m64 to r64 */ +void PADDDMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xFE), true, to, from, 0); +} + +/* emms */ +void EMMS( void ) +{ + write16( 0x770F ); +} + +void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xEC0F ); + ModRM( 3, to, from ); +} + +void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xED0F ); + ModRM( 3, to, from ); +} + +// paddq m64 to r64 (sse2 only?) +void PADDQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xD4), true, to, from, 0); +} + +// paddq r64 to r64 (sse2 only?) +void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xD40F ); + ModRM( 3, to, from ); +} + +void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xE80F ); + ModRM( 3, to, from ); +} + +void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xE90F ); + ModRM( 3, to, from ); +} + + +void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xF80F ); + ModRM( 3, to, from ); +} + +void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xF90F ); + ModRM( 3, to, from ); +} + +void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xFA0F ); + ModRM( 3, to, from ); +} + +void PSUBDMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xFA), true, to, from, 0); +} + +void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xD80F ); + ModRM( 3, to, from ); +} + +void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xD90F ); + ModRM( 3, to, from ); +} + +// psubq m64 to r64 (sse2 only?) +void PSUBQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xFB), true, to, from, 0); +} + +// psubq r64 to r64 (sse2 only?) +void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xFB0F ); + ModRM( 3, to, from ); +} + +// pmuludq m64 to r64 (sse2 only?) +void PMULUDQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xF4), true, to, from, 0); +} + +// pmuludq r64 to r64 (sse2 only?) +void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xF40F ); + ModRM( 3, to, from ); +} + +void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x740F ); + ModRM( 3, to, from ); +} + +void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x750F ); + ModRM( 3, to, from ); +} + +void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x760F ); + ModRM( 3, to, from ); +} + +void PCMPEQDMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x76), true, to, from, 0); +} + +void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x640F ); + ModRM( 3, to, from ); +} + +void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x650F ); + ModRM( 3, to, from ); +} + +void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x660F ); + ModRM( 3, to, from ); +} + +void PCMPGTDMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x66), true, to, from, 0); +} + +void PSRLWItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( from ); +} + +void PSRLDItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( from ); +} + +void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xD20F ); + ModRM( 3, to, from ); +} + +void PSLLWItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( from ); +} + +void PSLLDItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( from ); +} + +void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xF20F ); + ModRM( 3, to, from ); +} + +void PSRAWItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( from ); +} + +void PSRADItoR( x86MMXRegType to, u8 from ) +{ + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( from ); +} + +void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0xE20F ); + ModRM( 3, to, from ); +} + +/* por m64 to r64 */ +void PORMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xEB), true, to, from, 0); +} + +/* pxor m64 to r64 */ +void PXORMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xEF), true, to, from, 0); +} + +/* pand m64 to r64 */ +void PANDMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xDB), true, to, from, 0); +} + +void PANDNMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0xDF), true, to, from, 0); +} + +void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x6A0F ); + ModRM( 3, to, from ); +} + +void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x6A), true, to, from, 0); +} + +void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x620F ); + ModRM( 3, to, from ); +} + +void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x62), true, to, from, 0); +} + +void MOVQ64ItoR( x86MMXRegType reg, u64 i ) +{ + RexR(0, reg); + write16(0x6F0F); + ModRM(0, reg, DISP32); + write32(2); + JMP8( 8 ); + write64( i ); +} + +void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) +{ + RexRB(0, to, from); + write16( 0x6F0F ); + ModRM( 3, to, from ); +} + +void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ) +{ + RexRB(0, to, from); + write16( 0x6F0F ); + + if( offset < 128 ) { + ModRM( 1, to, from ); + write8(offset); + } + else { + ModRM( 2, to, from ); + write32(offset); + } +} + +void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +{ + RexRB(0, from, to); + write16( 0x7F0F ); + + if( offset < 128 ) { + ModRM( 1, from , to ); + write8(offset); + } + else { + ModRM( 2, from, to ); + write32(offset); + } +} + +/* movd m32 to r64 */ +void MOVDMtoMMX( x86MMXRegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x6E), true, to, from, 0); +} + +/* movd r64 to m32 */ +void MOVDMMXtoM( uptr to, x86MMXRegType from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x7E), true, from, to, 0); +} + +void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x6E0F ); + ModRM( 3, to, from ); +} + +void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) +{ + RexRB(0, to, from); + write16( 0x6E0F ); + ModRM( 0, to, from ); +} + +void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) +{ + RexRB(0, to, from); + write16( 0x6E0F ); + + if( offset < 128 ) { + ModRM( 1, to, from ); + write8(offset); + } + else { + ModRM( 2, to, from ); + write32(offset); + } +} + +void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) +{ + RexRB(0, from, to); + write16( 0x7E0F ); + ModRM( 3, from, to ); +} + +void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) +{ + RexRB(0, from, to); + write16( 0x7E0F ); + ModRM( 0, from, to ); + if( to >= 4 ) { + // no idea why + assert( to == ESP ); + write8(0x24); + } + +} + +void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +{ + RexRB(0, from, to); + write16( 0x7E0F ); + + if( offset < 128 ) { + ModRM( 1, from, to ); + write8(offset); + } + else { + ModRM( 2, from, to ); + write32(offset); + } +} + +///* movd r32 to r64 */ +//void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//{ +// write16( 0x6E0F ); +// ModRM( 3, to, from ); +//} +// +///* movq r64 to r32 */ +//void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//{ +// write16( 0x7E0F ); +// ModRM( 3, from, to ); +//} + +// untested +void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +{ + RexRB(0, to, from); + write16( 0x630F ); + ModRM( 3, to, from ); +} + +void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +{ + RexRB(0, to, from); + write16( 0x6B0F ); + ModRM( 3, to, from ); +} + +void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) +{ + RexRB(0, to, from); + write16( 0xD70F ); + ModRM( 3, to, from ); +} + +void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) +{ + RexRB(0, to, from); + write16( 0xc40f ); + ModRM( 3, to, from ); + write8( imm8 ); +} + +void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) +{ + RexRB(0, to, from); + write16(0x700f); + ModRM( 3, to, from ); + write8(imm8); +} + +void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x70), true, to, from, 1 /* XXX was 0? */); + write8(imm8); +} + +void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) +{ + RexRB(0, to, from); + write16(0xf70f); + ModRM( 3, to, from ); +} + +#endif diff --git a/libpcsxcore/ix86_64/ix86_sse.c b/libpcsxcore/ix86_64/ix86_sse.c new file mode 100644 index 0000000..cb391dc --- /dev/null +++ b/libpcsxcore/ix86_64/ix86_sse.c @@ -0,0 +1,1455 @@ +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include <assert.h> +#include "ix86-64.h" + +PCSX2_ALIGNED16(static unsigned int p[4]); +PCSX2_ALIGNED16(static unsigned int p2[4]); +PCSX2_ALIGNED16(static float f[4]); + + +XMMSSEType g_xmmtypes[XMMREGS] = {0}; + +/********************/ +/* SSE instructions */ +/********************/ + +#define SSEMtoRv( nc, code, overb ) \ + assert( cpucaps.hasStreamingSIMDExtensions ); \ + assert( to < XMMREGS ) ; \ + MEMADDR_OP(0, nc, code, true, to, from, overb) + +#define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb) + +#define SSERtoMv( nc, code, overb ) \ + assert( cpucaps.hasStreamingSIMDExtensions ); \ + assert( from < XMMREGS) ; \ + MEMADDR_OP(0, nc, code, true, from, to, overb) + +#define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \ + +#define SSE_SS_MtoR( code, overb ) \ + SSEMtoRv(3, (code << 8) | 0xF3, overb) + +#define SSE_SS_RtoM( code, overb ) \ + SSERtoMv(3, (code << 8) | 0xF3, overb) + +#define SSERtoR( code ) \ + assert( cpucaps.hasStreamingSIMDExtensions ); \ + assert( to < XMMREGS && from < XMMREGS) ; \ + RexRB(0, to, from); \ + write16( code ); \ + ModRM( 3, to, from ); + +#define SSEMtoR66( code ) \ + SSEMtoRv( 3, (code << 8) | 0x66, 0 ) + +#define SSERtoM66( code ) \ + SSERtoMv( 3, (code << 8) | 0x66, 0 ) + +#define SSERtoR66( code ) \ + write8( 0x66 ); \ + SSERtoR( code ); + +#define _SSERtoR66( code ) \ + assert( cpucaps.hasStreamingSIMDExtensions ); \ + assert( to < XMMREGS && from < XMMREGS) ; \ + write8( 0x66 ); \ + RexRB(0, from, to); \ + write16( code ); \ + ModRM( 3, from, to ); + +#define SSE_SS_RtoR( code ) \ + assert( cpucaps.hasStreamingSIMDExtensions ); \ + assert( to < XMMREGS && from < XMMREGS) ; \ + write8( 0xf3 ); \ + RexRB(0, to, from); \ + write16( code ); \ + ModRM( 3, to, from ); + +#define CMPPSMtoR( op ) \ + SSEMtoR( 0xc20f, 1 ); \ + write8( op ); + +#define CMPPSRtoR( op ) \ + SSERtoR( 0xc20f ); \ + write8( op ); + +#define CMPSSMtoR( op ) \ + SSE_SS_MtoR( 0xc20f, 1 ); \ + write8( op ); + +#define CMPSSRtoR( op ) \ + SSE_SS_RtoR( 0xc20f ); \ + write8( op ); + + + +void WriteRmOffset(x86IntRegType to, int offset); +void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); + +/* movups [r32][r32*scale] to xmm1 */ +void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRXB(0, to, from2, from); + write16( 0x100f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movups xmm1 to [r32][r32*scale] */ +void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRXB(1, to, from2, from); + write16( 0x110f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movups [r32] to r32 */ +void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, to, from); + write16( 0x100f ); + ModRM( 0, to, from ); +} + +/* movups r32 to [r32] */ +void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16( 0x110f ); + ModRM( 0, from, to ); +} + +/* movlps [r32] to r32 */ +void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(1, to, from); + write16( 0x120f ); + ModRM( 0, to, from ); +} + +void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, to, from); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); +} + +/* movaps r32 to [r32] */ +void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16( 0x130f ); + ModRM( 0, from, to ); +} + +void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16( 0x130f ); + WriteRmOffsetFrom(from, to, offset); +} + +/* movaps [r32][r32*scale] to xmm1 */ +void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); + RexRXB(0, to, from2, from); + write16( 0x280f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +/* movaps xmm1 to [r32][r32*scale] */ +void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +{ + assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); + RexRXB(0, to, from2, from); + write16( 0x290f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); +} + +// movaps [r32+offset] to r32 +void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, to, from); + write16( 0x280f ); + WriteRmOffsetFrom(to, from, offset); +} + +// movaps r32 to [r32+offset] +void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16( 0x290f ); + WriteRmOffsetFrom(from, to, offset); +} + +// movdqa [r32+offset] to r32 +void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + write8(0x66); + RexRB(0, to, from); + write16( 0x6f0f ); + WriteRmOffsetFrom(to, from, offset); +} + +// movdqa r32 to [r32+offset] +void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + write8(0x66); + RexRB(0, from, to); + write16( 0x7f0f ); + WriteRmOffsetFrom(from, to, offset); +} + +// movups [r32+offset] to r32 +void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + RexRB(0, to, from); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); +} + +// movups r32 to [r32+offset] +void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16( 0x110f ); + WriteRmOffsetFrom(from, to, offset); +} + +//**********************************************************************************/ +//MOVAPS: Move aligned Packed Single Precision FP values * +//********************************************************************************** +void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } +void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } +void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x280f ); } + +void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } +void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } + +void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from); + else { + write8(0xf2); + SSERtoR( 0x100f); + } +} + +void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from); + else { + SSE_SS_MtoR( 0x7e0f, 0); + } +} + +void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from); + else { + SSE_SS_RtoR( 0x7e0f); + } +} + +void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from); + else { + SSERtoM66(0xd60f); + } +} + +#ifndef __x86_64__ +void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from); + else { + write8(0xf2); + SSERtoR( 0xd60f); + } +} +void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from); + else { + SSE_SS_RtoR( 0xd60f); + } +} +#endif + +//**********************************************************************************/ +//MOVSS: Move Scalar Single-Precision FP value * +//********************************************************************************** +void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } +void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } +void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) +{ + write8(0xf3); + RexRB(0, from, to); + write16(0x110f); + ModRM(0, from, to); +} + +void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x100f ); } + +void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + write8(0xf3); + RexRB(0, to, from); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); +} + +void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + write8(0xf3); + RexRB(0, from, to); + write16(0x110f); + WriteRmOffsetFrom(from, to, offset); +} + +void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } +//**********************************************************************************/ +//MOVLPS: Move low Packed Single-Precision FP * +//********************************************************************************** +void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } +void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } + +void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, to, from); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); +} + +void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + RexRB(0, from, to); + write16(0x130f); + WriteRmOffsetFrom(from, to, offset); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVHPS: Move High Packed Single-Precision FP * +//********************************************************************************** +void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } +void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } + +void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, to, from); + write16( 0x160f ); + WriteRmOffsetFrom(to, from, offset); +} + +void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + assert( cpucaps.hasStreamingSIMDExtensions ); + RexRB(0, from, to); + write16(0x170f); + WriteRmOffsetFrom(from, to, offset); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVLHPS: Moved packed Single-Precision FP low to high * +//********************************************************************************** +void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVHLPS: Moved packed Single-Precision FP High to Low * +//********************************************************************************** +void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ANDPS: Logical Bit-wise AND for Single FP * +//********************************************************************************** +void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } +void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * +//********************************************************************************** +void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } +void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RCPPS : Packed Single-Precision FP Reciprocal * +//********************************************************************************** +void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } +void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } + +void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } +void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ORPS : Bit-wise Logical OR of Single-Precision FP Data * +//********************************************************************************** +void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } +void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//XORPS : Bitwise Logical XOR of Single-Precision FP Values * +//********************************************************************************** +void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } +void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ADDPS : ADD Packed Single-Precision FP Values * +//********************************************************************************** +void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } +void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//ADDSS : ADD Scalar Single-Precision FP Values * +//********************************************************************************** +void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } +void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SUBPS: Packed Single-Precision FP Subtract * +//********************************************************************************** +void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } +void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SUBSS : Scalar Single-Precision FP Subtract * +//********************************************************************************** +void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } +void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MULPS : Packed Single-Precision FP Multiply * +//********************************************************************************** +void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } +void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MULSS : Scalar Single-Precision FP Multiply * +//********************************************************************************** +void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } +void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//Packed Single-Precission FP compare (CMPccPS) * +//********************************************************************************** +//missing SSE_CMPPS_I8_to_XMM +// SSE_CMPPS_M32_to_XMM +// SSE_CMPPS_XMM_to_XMM +void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } +void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } +void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } +void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } +void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } +void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } +void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } +void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } +void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } +void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } +void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } +void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } +void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } +void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } +void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } +void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//Scalar Single-Precission FP compare (CMPccSS) * +//********************************************************************************** +//missing SSE_CMPSS_I8_to_XMM +// SSE_CMPSS_M32_to_XMM +// SSE_CMPSS_XMM_to_XMM +void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } +void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } +void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } +void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } +void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } +void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } +void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } +void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } +void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } +void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } +void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } +void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } +void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } +void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } +void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } +void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } + +void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +{ + MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0); +} + +void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + RexRB(0, to, from); + write16( 0x2e0f ); + ModRM( 3, to, from ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * +//********************************************************************************** +void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } +void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * +//********************************************************************************** +void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } +void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); } + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SQRTPS : Packed Single-Precision FP Square Root * +//********************************************************************************** +void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } +void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SQRTSS : Scalar Single-Precision FP Square Root * +//********************************************************************************** +void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } +void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MAXPS: Return Packed Single-Precision FP Maximum * +//********************************************************************************** +void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } +void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MAXSS: Return Scalar Single-Precision FP Maximum * +//********************************************************************************** +void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } +void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } + +#ifndef __x86_64__ +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * +//********************************************************************************** +void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * +//********************************************************************************** +void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +#endif + +void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); } +void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) +{ + write8(0xf3); + RexRB(0, to, from); + write16(0x2c0f); + ModRM(3, to, from); +} + +void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); } +void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) +{ + write8(0xf3); + RexRB(0, to, from); + write16(0x2a0f); + ModRM(3, to, from); +} + +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * +//********************************************************************************** +void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } +void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } + +//**********************************************************************************/ +//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * +//********************************************************************************** +void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } +void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } + +void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); } +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MINPS: Return Packed Single-Precision FP Minimum * +//********************************************************************************** +void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } +void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } + +////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MINSS: Return Scalar Single-Precision FP Minimum * +//********************************************************************************** +void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } +void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } + +#ifndef __x86_64__ +/////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PMAXSW: Packed Signed Integer Word Maximum * +//********************************************************************************** +//missing + // SSE_PMAXSW_M64_to_MM +// SSE2_PMAXSW_M128_to_XMM +// SSE2_PMAXSW_XMM_to_XMM +void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PMINSW: Packed Signed Integer Word Minimum * +//********************************************************************************** +//missing + // SSE_PMINSW_M64_to_MM +// SSE2_PMINSW_M128_to_XMM +// SSE2_PMINSW_XMM_to_XMM +void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } +#endif + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//SHUFPS: Shuffle Packed Single-Precision FP Values * +//********************************************************************************** +void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } +void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } + +void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +{ + RexRB(0, to, from); + write16(0xc60f); + WriteRmOffsetFrom(to, from, offset); + write8(imm8); +} + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PSHUFD: Shuffle Packed DoubleWords * +//********************************************************************************** +void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8); + } + else { + SSERtoR66( 0x700F ); + write8( imm8 ); + } +} +void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); } + +void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } +void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); } +void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); } +void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * +//********************************************************************************** +void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } +void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * +//********************************************************************************** +void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } +void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } + +//////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//DIVPS : Packed Single-Precision FP Divide * +//********************************************************************************** +void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } +void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } + +////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//DIVSS : Scalar Single-Precision FP Divide * +//********************************************************************************** +void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } +void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } + +///////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//STMXCSR : Store Streaming SIMD Extension Control/Status * +//********************************************************************************** +void SSE_STMXCSR( uptr from ) { + MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//LDMXCSR : Load Streaming SIMD Extension Control/Status * +//********************************************************************************** +void SSE_LDMXCSR( uptr from ) { + MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0); +} + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PADDB,PADDW,PADDD : Add Packed Integers * +//********************************************************************************** +void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } +void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } +void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } +void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } +void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } +void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } + +void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } +void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } + +/////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } +void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } +void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } +void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } +void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } +void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } +void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } +void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } +void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } +void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } +void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE_CMPEQPS_XMM_to_XMM(to, from); + } + else { + SSERtoR66( 0x760F ); + } +} + +void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE_CMPEQPS_M128_to_XMM(to, from); + } + else { + SSEMtoR66( 0x760F ); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } +void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } + +//////////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } +void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } +void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } +void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } +void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } +void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } +void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } +void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } + +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } +void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE2EMU_MOVD_R_to_XMM(to, from); + } + else { + SSERtoR66(0x6E0F); + } +} + +void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x6e0f ); + ModRM( 0, to, from); +} + +void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x6e0f ); + WriteRmOffsetFrom(to, from, offset); +} + +void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } +void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE2EMU_MOVD_XMM_to_R(to, from); + } + else { + _SSERtoR66(0x7E0F); + } +} + +void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) +{ + write8(0x66); + RexRB(0, from, to); + write16( 0x7e0f ); + ModRM( 0, from, to ); +} + +void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( !cpucaps.hasStreamingSIMD2Extensions ) { + SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset); + } + else { + write8(0x66); + RexRB(0, from, to); + write16( 0x7e0f ); + WriteRmOffsetFrom(from, to, offset); + } +} + +#ifdef __x86_64__ +void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ) +{ + assert( from < XMMREGS); + write8( 0x66 ); + RexRB(1, from, to); + write16( 0x7e0f ); + ModRM( 3, from, to ); +} + +void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ) +{ + assert( to < XMMREGS); + write8(0x66); + RexRB(1, to, from); + write16( 0x6e0f ); + ModRM( 3, to, from ); +} + +#endif + +//////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } +void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } + +// logical and to &= from +void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } +void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } + +// to = (~to) & from +void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } +void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } + +///////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PXOR : SSE Bitwise XOR * +//********************************************************************************** +void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); } +void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); } +/////////////////////////////////////////////////////////////////////////////////////// + +void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); } +void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);} +void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); } + +void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); } +void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); } +void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); } + +// shift right logical + +void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } +void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } +void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } +void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } +void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } +void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } +void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 2 , to ); + write8( imm8 ); +} + +void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 3 , to ); + write8( imm8 ); +} + +// shift right arithmetic + +void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } +void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } +void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( imm8 ); +} + +void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } +void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } +void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( imm8 ); +} + +// shift left logical + +void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } +void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } +void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } +void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } +void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } +void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } +void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 6 , to ); + write8( imm8 ); +} + +void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +{ + write8( 0x66 ); + RexB(0, to); + write16( 0x730F ); + ModRM( 3, 7 , to ); + write8( imm8 ); +} + + +void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); } +void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); } + +void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); } +void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); } + +void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); } +void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); } + +void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); } +void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); } + +// + +void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); } +void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); } + +void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); } +void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); } + +void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); } +void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); } + +void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); } +void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); } + +void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } +void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } +void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } +void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } + +void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } +void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } +void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } +void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } +void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } +void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } +void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } + +void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } +void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } +void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } + +void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } +void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } + +void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } +void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } +void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } +void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } + +void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } +void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } +void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } +void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } + +void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } +void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } + +void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } +void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } + +void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } +void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } +void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } +void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } + +void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } +void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } + +void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } + +void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } +void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } + +void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } +void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); } + +void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { + write8(0xf3); + RexRB(0, to, from); + write16( 0x120f); + ModRM( 3, to, from ); +} + +void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); } +void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); } +void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); } + +// SSE-X +void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); + else SSE_MOVAPS_M128_to_XMM(to, from); +} + +void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); + else SSE_MOVAPS_XMM_to_M128(to, from); +} + +void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); + else SSE_MOVAPS_XMM_to_XMM(to, from); +} + +void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset); + else SSE_MOVAPSRmtoROffset(to, from, offset); +} + +void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset); + else SSE_MOVAPSRtoRmOffset(to, from, offset); +} + +void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); + else SSE_MOVAPS_M128_to_XMM(to, from); +} + +void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); + else SSE_MOVAPS_XMM_to_M128(to, from); +} + +void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from); + else SSE_MOVAPS_XMM_to_XMM(to, from); +} + +void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); + else SSE_MOVSS_M32_to_XMM(to, from); +} + +void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); + else SSE_MOVSS_XMM_to_M32(to, from); +} + +void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from); + else SSE_MOVSS_XMM_to_Rm(to, from); +} + +void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset); + else SSE_MOVSS_RmOffset_to_XMM(to, from, offset); +} + +void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset); + else SSE_MOVSS_XMM_to_RmOffset(to, from, offset); +} + +void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); + else SSE_ORPS_M128_to_XMM(to, from); +} + +void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); + else SSE_ORPS_XMM_to_XMM(to, from); +} + +void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); + else SSE_XORPS_M128_to_XMM(to, from); +} + +void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); + else SSE_XORPS_XMM_to_XMM(to, from); +} + +void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); + else SSE_ANDPS_M128_to_XMM(to, from); +} + +void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); + else SSE_ANDPS_XMM_to_XMM(to, from); +} + +void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); + else SSE_ANDNPS_M128_to_XMM(to, from); +} + +void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); + else SSE_ANDNPS_XMM_to_XMM(to, from); +} + +void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); + else SSE_UNPCKLPS_M128_to_XMM(to, from); +} + +void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); + else SSE_UNPCKLPS_XMM_to_XMM(to, from); +} + +void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); + else SSE_UNPCKHPS_M128_to_XMM(to, from); +} + +void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); + else SSE_UNPCKHPS_XMM_to_XMM(to, from); +} + +void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) { + SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); + if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); + } + else { + SSE_MOVHLPS_XMM_to_XMM(to, from); + } +} + +// SSE2 emulation +void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) +{ + SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e); + SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e); +} + +void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from) +{ + SSE_XORPS_XMM_to_XMM(to, to); + SSE_MOVLPS_M64_to_XMM(to, from); +} + +void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from) +{ + SSE_XORPS_XMM_to_XMM(to, to); + SSE2EMU_MOVSD_XMM_to_XMM(to, from); +} + +void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + MOV32RmtoROffset(EAX, from, offset); + MOV32ItoM((uptr)p+4, 0); + MOV32ItoM((uptr)p+8, 0); + MOV32RtoM((uptr)p, EAX); + MOV32ItoM((uptr)p+12, 0); + SSE_MOVAPS_M128_to_XMM(to, (uptr)p); +} + +void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset ) +{ + SSE_MOVSS_XMM_to_M32((uptr)p, from); + MOV32MtoR(EAX, (uptr)p); + MOV32RtoRmOffset(to, EAX, offset); +} + +#ifndef __x86_64__ +extern void SetMMXstate(); + +void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) +{ + SSE_MOVLPS_XMM_to_M64(p, from); + MOVQMtoR(to, p); + SetMMXstate(); +} + +void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) +{ + MOVQRtoM(p, from); + SSE_MOVLPS_M64_to_XMM(to, p); + SetMMXstate(); +} +#endif + +/****************************************************************************/ +/* SSE2 Emulated functions for SSE CPU's by kekko */ +/****************************************************************************/ +void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { + MOV64ItoR(EAX, (uptr)&p); + MOV64ItoR(EBX, (uptr)&p2); + SSE_MOVUPSRtoRm(EAX, from); + + MOV32ItoR(ECX, (u32)imm8); + AND32ItoR(ECX, 3); + SHL32ItoR(ECX, 2); + ADD32RtoR(ECX, EAX); + MOV32RmtoR(ECX, ECX); + MOV32RtoRm(EBX, ECX); + + ADD32ItoR(EBX, 4); + MOV32ItoR(ECX, (u32)imm8); + SHR32ItoR(ECX, 2); + AND32ItoR(ECX, 3); + SHL32ItoR(ECX, 2); + ADD32RtoR(ECX, EAX); + MOV32RmtoR(ECX, ECX); + MOV32RtoRm(EBX, ECX); + + ADD32ItoR(EBX, 4); + MOV32ItoR(ECX, (u32)imm8); + SHR32ItoR(ECX, 4); + AND32ItoR(ECX, 3); + SHL32ItoR(ECX, 2); + ADD32RtoR(ECX, EAX); + MOV32RmtoR(ECX, ECX); + MOV32RtoRm(EBX, ECX); + + ADD32ItoR(EBX, 4); + MOV32ItoR(ECX, (u32)imm8); + SHR32ItoR(ECX, 6); + AND32ItoR(ECX, 3); + SHL32ItoR(ECX, 2); + ADD32RtoR(ECX, EAX); + MOV32RmtoR(ECX, ECX); + MOV32RtoRm(EBX, ECX); + + SUB32ItoR(EBX, 12); + + SSE_MOVUPSRmtoR(to, EBX); +} + +void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { + /* XXX? */ + MOV64ItoR(to, (uptr)&p); + SSE_MOVUPSRtoRm(to, from); + MOV32RmtoR(to, to); +} + +#ifndef __x86_64__ +extern void SetFPUstate(); +extern void _freeMMXreg(int mmxreg); +#endif + +void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { +#ifndef __x86_64__ + SetFPUstate(); + _freeMMXreg(7); +#endif + SSE_MOVAPS_XMM_to_M128((uptr)f, from); + + FLD32((uptr)&f[0]); + FISTP32((uptr)&p2[0]); + FLD32((uptr)&f[1]); + FISTP32((uptr)&p2[1]); + FLD32((uptr)&f[2]); + FISTP32((uptr)&p2[2]); + FLD32((uptr)&f[3]); + FISTP32((uptr)&p2[3]); + + SSE_MOVAPS_M128_to_XMM(to, (uptr)p2); +} + +void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { +#ifndef __x86_64__ + SetFPUstate(); + _freeMMXreg(7); +#endif + FILD32(from); + FSTP32((uptr)&f[0]); + FILD32(from+4); + FSTP32((uptr)&f[1]); + FILD32(from+8); + FSTP32((uptr)&f[2]); + FILD32(from+12); + FSTP32((uptr)&f[3]); + + SSE_MOVAPS_M128_to_XMM(to, (uptr)f); +} + +void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) { + /* XXX? */ + MOV64ItoR(EAX, (uptr)&p); + SSE_MOVUPSRtoRm(EAX, from); + MOV32RmtoR(EAX, EAX); + MOV32RtoM(to, EAX); +} + +void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { + MOV32ItoM((uptr)p+4, 0); + MOV32ItoM((uptr)p+8, 0); + MOV32RtoM((uptr)p, from); + MOV32ItoM((uptr)p+12, 0); + SSE_MOVAPS_M128_to_XMM(to, (uptr)p); +} + +#endif |