aboutsummaryrefslogtreecommitdiff
path: root/libpcsxcore/ix86_64
diff options
context:
space:
mode:
Diffstat (limited to 'libpcsxcore/ix86_64')
-rw-r--r--libpcsxcore/ix86_64/README2
-rw-r--r--libpcsxcore/ix86_64/iGte.h79
-rw-r--r--libpcsxcore/ix86_64/iR3000A-64.c2963
-rw-r--r--libpcsxcore/ix86_64/ix86-64.c3139
-rw-r--r--libpcsxcore/ix86_64/ix86-64.h1776
-rw-r--r--libpcsxcore/ix86_64/ix86_3dnow.c178
-rw-r--r--libpcsxcore/ix86_64/ix86_cpudetect.c487
-rw-r--r--libpcsxcore/ix86_64/ix86_fpu.c248
-rw-r--r--libpcsxcore/ix86_64/ix86_mmx.c646
-rw-r--r--libpcsxcore/ix86_64/ix86_sse.c1455
10 files changed, 10973 insertions, 0 deletions
diff --git a/libpcsxcore/ix86_64/README b/libpcsxcore/ix86_64/README
new file mode 100644
index 0000000..af24e37
--- /dev/null
+++ b/libpcsxcore/ix86_64/README
@@ -0,0 +1,2 @@
+This is the AMD64 dynamic recompiler.
+Made from opcodes from PCSX2 0.9.3 and the x86 recompiler modified to fit. ie. currently no AMD64/SSE specific code advantages.
diff --git a/libpcsxcore/ix86_64/iGte.h b/libpcsxcore/ix86_64/iGte.h
new file mode 100644
index 0000000..bcb2f4b
--- /dev/null
+++ b/libpcsxcore/ix86_64/iGte.h
@@ -0,0 +1,79 @@
+/***************************************************************************
+ * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+ ***************************************************************************/
+
+#ifndef __IGTE_H__
+#define __IGTE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../r3000a.h"
+#include "../psxmem.h"
+
+#define CP2_FUNC(f) \
+void gte##f(); \
+static void rec##f() { \
+ iFlushRegs(); \
+ MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \
+ CALLFunc((uptr)gte##f); \
+/* branch = 2; */\
+}
+
+#define CP2_FUNCNC(f) \
+void gte##f(); \
+static void rec##f() { \
+ iFlushRegs(); \
+ CALLFunc((uptr)gte##f); \
+/* branch = 2; */\
+}
+
+CP2_FUNC(MFC2);
+CP2_FUNC(MTC2);
+CP2_FUNC(CFC2);
+CP2_FUNC(CTC2);
+CP2_FUNC(LWC2);
+CP2_FUNC(SWC2);
+CP2_FUNCNC(RTPS);
+CP2_FUNC(OP);
+CP2_FUNCNC(NCLIP);
+CP2_FUNC(DPCS);
+CP2_FUNC(INTPL);
+CP2_FUNC(MVMVA);
+CP2_FUNCNC(NCDS);
+CP2_FUNCNC(NCDT);
+CP2_FUNCNC(CDP);
+CP2_FUNCNC(NCCS);
+CP2_FUNCNC(CC);
+CP2_FUNCNC(NCS);
+CP2_FUNCNC(NCT);
+CP2_FUNC(SQR);
+CP2_FUNC(DCPL);
+CP2_FUNCNC(DPCT);
+CP2_FUNCNC(AVSZ3);
+CP2_FUNCNC(AVSZ4);
+CP2_FUNCNC(RTPT);
+CP2_FUNC(GPF);
+CP2_FUNC(GPL);
+CP2_FUNCNC(NCCT);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/libpcsxcore/ix86_64/iR3000A-64.c b/libpcsxcore/ix86_64/iR3000A-64.c
new file mode 100644
index 0000000..4ca3e99
--- /dev/null
+++ b/libpcsxcore/ix86_64/iR3000A-64.c
@@ -0,0 +1,2963 @@
+/***************************************************************************
+ * Copyright (C) 2007 Ryan Schultz, PCSX-df Team, PCSX team *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+ ***************************************************************************/
+
+/*
+* i386 assembly functions for R3000A core.
+*/
+
+#include "ix86-64.h"
+#include "../r3000a.h"
+#include "../psxhle.h"
+
+#include <sys/mman.h>
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+uptr* psxRecLUT;
+
+#define PTRMULT (sizeof(uptr) / sizeof(u32))
+
+#undef PC_REC
+#undef PC_REC8
+#undef PC_REC16
+#undef PC_REC32
+#define PC_REC(x) (psxRecLUT[(x) >> 16] + PTRMULT * ((x) & 0xffff))
+#define PC_RECP(x) (*(uptr *)PC_REC(x))
+
+#define RECMEM_SIZE (PTRMULT * 8 * 1024 * 1024)
+
+static char *recMem; /* the recompiled blocks will be here */
+static char *recRAM; /* and the ptr to the blocks here */
+static char *recROM; /* and here */
+
+static u32 pc; /* recompiler pc */
+static u32 pcold; /* recompiler oldpc */
+static int count; /* recompiler intruction count */
+static int branch; /* set for branch */
+static u32 target; /* branch target */
+static u32 resp;
+
+typedef struct {
+ int state;
+ u32 k;
+ int reg;
+} iRegisters;
+
+static iRegisters iRegs[32];
+static iRegisters iRegsS[32];
+
+#define ST_UNK 0
+#define ST_CONST 1
+#define ST_MAPPED 2
+
+#define IsConst(reg) (iRegs[reg].state == ST_CONST)
+#define IsMapped(reg) (iRegs[reg].state == ST_MAPPED)
+
+static void (*recBSC[64])();
+static void (*recSPC[64])();
+static void (*recREG[32])();
+static void (*recCP0[32])();
+static void (*recCP2[64])();
+static void (*recCP2BSC[32])();
+
+#define STACKSIZE 0x18
+static void StackRes()
+{
+#ifdef __x86_64__
+ ADD64ItoR(RSP, STACKSIZE);
+#else
+ if (resp) ADD32ItoR(ESP, resp);
+#endif
+}
+
+static void MapConst(int reg, u32 _const) {
+ iRegs[reg].k = _const;
+ iRegs[reg].state = ST_CONST;
+}
+
+static void iFlushReg(int reg) {
+ if (IsConst(reg)) {
+ MOV32ItoM((uptr)&psxRegs.GPR.r[reg], iRegs[reg].k);
+ }
+ iRegs[reg].state = ST_UNK;
+}
+
+static void iFlushRegs() {
+ int i;
+
+ for (i=1; i<32; i++) {
+ iFlushReg(i);
+ }
+}
+
+static void iRet() {
+ /* store cycle */
+ count = ((pc - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+ StackRes();
+ RET();
+}
+
+static int iLoadTest() {
+ u32 tmp;
+
+ // check for load delay
+ tmp = psxRegs.code >> 26;
+ switch (tmp) {
+ case 0x10: // COP0
+ switch (_Rs_) {
+ case 0x00: // MFC0
+ case 0x02: // CFC0
+ return 1;
+ }
+ break;
+ case 0x12: // COP2
+ switch (_Funct_) {
+ case 0x00:
+ switch (_Rs_) {
+ case 0x00: // MFC2
+ case 0x02: // CFC2
+ return 1;
+ }
+ break;
+ }
+ break;
+ case 0x32: // LWC2
+ return 1;
+ default:
+ if (tmp >= 0x20 && tmp <= 0x26) { // LB/LH/LWL/LW/LBU/LHU/LWR
+ return 1;
+ }
+ break;
+ }
+ return 0;
+}
+
+/* set a pending branch */
+static void SetBranch() {
+ branch = 1;
+ psxRegs.code = PSXMu32(pc);
+ pc+=4;
+
+ if (iLoadTest() == 1) {
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.code, psxRegs.code);
+ /* store cycle */
+ count = ((pc - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+
+ //PUSH64M((uptr)&target);
+ MOV32MtoR(X86ARG2, (uptr)&target);
+ //PUSHI(_Rt_);
+ MOV64ItoR(X86ARG1, _Rt_);
+ CALLFunc((uptr)psxDelayTest);
+ StackRes();
+ RET();
+ return;
+ }
+
+ recBSC[psxRegs.code>>26]();
+
+ iFlushRegs();
+ MOV32MtoR(EAX, (uptr)&target);
+ MOV32RtoM((uptr)&psxRegs.pc, EAX);
+ CALLFunc((uptr)psxBranchTest);
+
+ iRet();
+}
+
+static void iJump(u32 branchPC) {
+ branch = 1;
+ psxRegs.code = PSXMu32(pc);
+ pc+=4;
+
+ if (iLoadTest() == 1) {
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.code, psxRegs.code);
+ /* store cycle */
+ count = ((pc - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+
+ //PUSHI(branchPC);
+ MOV64ItoR(X86ARG2, branchPC);
+ //PUSHI(_Rt_);
+ MOV64ItoR(X86ARG1, _Rt_);
+ CALLFunc((uptr)psxDelayTest);
+ //ADD32ItoR(ESP, 2*8);
+ StackRes();
+ RET();
+ return;
+ }
+
+ recBSC[psxRegs.code>>26]();
+
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.pc, branchPC);
+ CALLFunc((uptr)psxBranchTest);
+ /* store cycle */
+ count = ((pc - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+ StackRes();
+
+ RET();
+ //* XXX?
+ // maybe just happened an interruption, check so
+ CMP32ItoM((uptr)&psxRegs.pc, branchPC);
+ j8Ptr[0] = JE8(0);
+
+ RET();
+
+ x86SetJ8(j8Ptr[0]);
+ MOV64MtoR(RAX, PC_REC(branchPC));
+ TEST64RtoR(RAX,RAX);
+ j8Ptr[1] = JNE8(0);
+
+ RET();
+
+ x86SetJ8(j8Ptr[1]);
+
+ RET();
+ //JMP32R(EAX);
+ JMPR(EAX);
+ //*/
+}
+
+static void iBranch(u32 branchPC, int savectx) {
+ u32 respold=0;
+
+ if (savectx) {
+ respold = resp;
+ memcpy(iRegsS, iRegs, sizeof(iRegs));
+ }
+
+ branch = 1;
+ psxRegs.code = PSXMu32(pc);
+
+ // the delay test is only made when the branch is taken
+ // savectx == 0 will mean that :)
+ if (savectx == 0 && iLoadTest() == 1) {
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.code, psxRegs.code);
+ /* store cycle */
+ count = (((pc+4) - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+ //if (resp) ADD32ItoR(ESP, resp);
+
+ //PUSHI(branchPC);
+ MOV64ItoR(X86ARG2, branchPC);
+ //PUSHI(_Rt_);
+ MOV64ItoR(X86ARG1,_Rt_);
+ CALLFunc((uptr)psxDelayTest);
+ StackRes();
+ RET();
+ return;
+ }
+
+ pc+= 4;
+ recBSC[psxRegs.code>>26]();
+
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.pc, branchPC);
+ CALLFunc((uptr)psxBranchTest);
+ /* store cycle */
+ count = ((pc - pcold) / 4) * BIAS;
+ ADD32ItoM((uptr)&psxRegs.cycle, count);
+
+ StackRes();
+
+ // maybe just happened an interruption, check so
+ CMP32ItoM((uptr)&psxRegs.pc, branchPC);
+ j8Ptr[1] = JE8(0);
+
+ RET();
+
+ x86SetJ8(j8Ptr[1]);
+ MOV64MtoR(RAX, PC_REC(branchPC));
+ TEST64RtoR(RAX, RAX);
+ j8Ptr[2] = JNE8(0);
+
+ RET();
+
+ x86SetJ8(j8Ptr[2]);
+ //JMP32R(EAX);
+ JMPR(EAX);
+
+ pc-= 4;
+ if (savectx) {
+ resp = respold;
+ memcpy(iRegs, iRegsS, sizeof(iRegs));
+ }
+}
+
+
+char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n";
+char *txt1 = "EAX = %x\n";
+char *txt2 = "M32 = %x\n";
+
+/*
+void iLogX86() {
+ PUSHA32();
+
+ PUSH32R (EDX);
+ PUSH32R (ECX);
+ PUSH32R (EAX);
+ PUSH32M ((uptr)&txt0);
+ CALLFunc((uptr)SysPrintf);
+ ADD32ItoR(ESP, 4*4);
+
+ POPA32();
+}
+*/
+
+void iLogEAX() {
+ PUSH64R (EAX);
+ PUSH64M ((uptr)&txt1);
+ CALLFunc((uptr)SysPrintf);
+ ADD32ItoR(ESP, 8*2);
+}
+
+void iLogM32(u32 mem) {
+ PUSH64M (mem);
+ PUSH64M ((uptr)&txt2);
+ CALLFunc((uptr)SysPrintf);
+ ADD32ItoR(ESP, 8*2);
+}
+
+static void iDumpRegs() {
+ int i, j;
+
+ printf("%x %x\n", psxRegs.pc, psxRegs.cycle);
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%x ", psxRegs.GPR.r[j*i]);
+ printf("\n");
+ }
+}
+
+void iDumpBlock(char *ptr) {
+ FILE *f;
+ u32 i;
+
+ SysPrintf("dump1 %x:%x, %x\n", psxRegs.pc, pc, psxRegs.cycle);
+
+ for (i = psxRegs.pc; i < pc; i+=4)
+ SysPrintf("%s\n", disR3000AF(PSXMu32(i), i));
+
+ fflush(stdout);
+ f = fopen("dump1", "w");
+ fwrite(ptr, 1, (uptr)x86Ptr - (uptr)ptr, f);
+ fclose(f);
+ //system("ndisasm -b64 dump1");
+ fflush(stdout);
+}
+
+#define REC_FUNC(f) \
+void psx##f(); \
+static void rec##f() { \
+ iFlushRegs(); \
+ MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \
+ MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \
+ CALLFunc((uptr)psx##f); \
+/* branch = 2; */\
+}
+
+#define REC_SYS(f) \
+void psx##f(); \
+static void rec##f() { \
+ iFlushRegs(); \
+ MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \
+ MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \
+ CALLFunc((uptr)psx##f); \
+ branch = 2; \
+ iRet(); \
+}
+
+#define REC_BRANCH(f) \
+void psx##f(); \
+static void rec##f() { \
+ iFlushRegs(); \
+ MOV32ItoM((uptr)&psxRegs.code, (u32)psxRegs.code); \
+ MOV32ItoM((uptr)&psxRegs.pc, (u32)pc); \
+ CALLFunc((uptr)psx##f); \
+ branch = 2; \
+ iRet(); \
+}
+
+static void recRecompile();
+
+static int recInit() {
+ int i;
+
+ psxRecLUT = (uptr*) malloc(0x010000 * sizeof(uptr));
+
+ recMem = mmap(0,
+ RECMEM_SIZE + PTRMULT*0x1000,
+ PROT_EXEC | PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ recRAM = mmap(0,
+ 0x280000*PTRMULT,
+ PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ recROM = &recRAM[0x200000*PTRMULT];
+
+ if (recRAM == NULL || recROM == NULL || recMem == NULL || psxRecLUT == NULL) {
+ SysMessage("Error allocating memory"); return -1;
+ }
+ memset(recMem, 0, RECMEM_SIZE);
+ memset(recRAM, 0, 0x200000 * PTRMULT);
+ memset(recROM, 0, 0x080000 * PTRMULT);
+
+ for (i=0; i<0x80; i++) psxRecLUT[i + 0x0000] = (uptr)&recRAM[PTRMULT*((i & 0x1f) << 16)];
+ memcpy(psxRecLUT + 0x8000, psxRecLUT, 0x80 * sizeof(uptr));
+ memcpy(psxRecLUT + 0xa000, psxRecLUT, 0x80 * sizeof(uptr));
+
+ for (i=0; i<0x08; i++) psxRecLUT[i + 0xbfc0] = (uptr)&recROM[PTRMULT*(i << 16)];
+
+ return 0;
+}
+
+static void recReset() {
+ memset(recRAM, 0, 0x200000 * PTRMULT);
+ memset(recROM, 0, 0x080000 * PTRMULT);
+
+ //x86Init();
+ cpudetectInit();
+ x86SetPtr(recMem);
+
+ branch = 0;
+ memset(iRegs, 0, sizeof(iRegs));
+ iRegs[0].state = ST_CONST;
+ iRegs[0].k = 0;
+}
+
+static void recShutdown() {
+ if (recMem == NULL) return;
+ free(psxRecLUT);
+ munmap(recMem, RECMEM_SIZE + PTRMULT*0x1000);
+ munmap(recRAM, 0x280000*PTRMULT);
+ x86Shutdown();
+}
+
+static void recError() {
+ SysReset();
+ ClosePlugins();
+ SysMessage("Unrecoverable error while running recompiler\n");
+ SysRunGui();
+}
+
+/*__inline*/ static void execute() {
+ void (*recFunc)();
+ uptr *p;
+
+ p = (uptr *)PC_REC(psxRegs.pc);
+ // if (!p) { recError(); return; }
+
+ if (*p == 0) {
+ recRecompile();
+ }
+
+ if (*p < (uptr)recMem || *p >= (uptr)recMem + RECMEM_SIZE)
+ {
+ recError();
+ return;
+ }
+ recFunc = (void (*)())*p;
+ (*recFunc)();
+}
+
+static void recExecute() {
+ for (;;) execute();
+}
+
+static void recExecuteBlock() {
+ execute();
+}
+
+static void recClear(u32 Addr, u32 Size) {
+ memset((void*)PC_REC(Addr), 0, Size * sizeof(uptr));
+}
+
+static void recNULL() {
+// SysMessage("recUNK: %8.8x\n", psxRegs.code);
+}
+
+/*********************************************************
+* goes to opcodes tables... *
+* Format: table[something....] *
+*********************************************************/
+
+//REC_SYS(SPECIAL);
+#if 1
+static void recSPECIAL() {
+ recSPC[_Funct_]();
+}
+#endif
+
+static void recREGIMM() {
+ recREG[_Rt_]();
+}
+
+static void recCOP0() {
+ recCP0[_Rs_]();
+}
+
+//REC_SYS(COP2);
+#if 1
+static void recCOP2() {
+ recCP2[_Funct_]();
+}
+#endif
+
+static void recBASIC() {
+ recCP2BSC[_Rs_]();
+}
+
+//end of Tables opcodes...
+
+/*********************************************************
+* Arithmetic with immediate operand *
+* Format: OP rt, rs, immediate *
+*********************************************************/
+
+#if 0
+REC_FUNC(ADDI);
+REC_FUNC(ADDIU);
+REC_FUNC(ANDI);
+REC_FUNC(ORI);
+REC_FUNC(XORI);
+REC_FUNC(SLTI);
+REC_FUNC(SLTIU);
+#endif
+
+#if 1
+static void recADDIU() {
+// Rt = Rs + Im
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (_Rs_ == _Rt_) {
+ if (IsConst(_Rt_)) {
+ iRegs[_Rt_].k+= _Imm_;
+ } else {
+ if (_Imm_ == 1) {
+ INC32M((uptr)&psxRegs.GPR.r[_Rt_]);
+ } else if (_Imm_ == -1) {
+ DEC32M((uptr)&psxRegs.GPR.r[_Rt_]);
+ } else if (_Imm_) {
+ ADD32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _Imm_);
+ }
+ }
+ } else {
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, iRegs[_Rs_].k + _Imm_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_ == 1) {
+ INC32R(EAX);
+ } else if (_Imm_ == -1) {
+ DEC32R(EAX);
+ } else if (_Imm_) {
+ ADD32ItoR(EAX, _Imm_);
+ }
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+ }
+}
+
+static void recADDI() {
+// Rt = Rs + Im
+ recADDIU();
+}
+
+static void recSLTI() {
+// Rt = Rs < Im (signed)
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, (s32)iRegs[_Rs_].k < _Imm_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32ItoR(EAX, _Imm_);
+ SETL8R (EAX);
+ AND32ItoR(EAX, 0xff);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+}
+
+static void recSLTIU() {
+// Rt = Rs < Im (unsigned)
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, iRegs[_Rs_].k < _ImmU_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32ItoR(EAX, _Imm_);
+ SETB8R (EAX);
+ AND32ItoR(EAX, 0xff);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+}
+
+static void recANDI() {
+// Rt = Rs And Im
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (_Rs_ == _Rt_) {
+ if (IsConst(_Rt_)) {
+ iRegs[_Rt_].k&= _ImmU_;
+ } else {
+ AND32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_);
+ }
+ } else {
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, iRegs[_Rs_].k & _ImmU_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ AND32ItoR(EAX, _ImmU_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+ }
+}
+
+static void recORI() {
+// Rt = Rs Or Im
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (_Rs_ == _Rt_) {
+ if (IsConst(_Rt_)) {
+ iRegs[_Rt_].k|= _ImmU_;
+ } else {
+ OR32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_);
+ }
+ } else {
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, iRegs[_Rs_].k | _ImmU_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_ImmU_) OR32ItoR (EAX, _ImmU_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+ }
+}
+
+static void recXORI() {
+// Rt = Rs Xor Im
+ if (!_Rt_) return;
+
+// iFlushRegs();
+
+ if (_Rs_ == _Rt_) {
+ if (IsConst(_Rt_)) {
+ iRegs[_Rt_].k^= _ImmU_;
+ } else {
+ XOR32ItoM((uptr)&psxRegs.GPR.r[_Rt_], _ImmU_);
+ }
+ } else {
+ if (IsConst(_Rs_)) {
+ MapConst(_Rt_, iRegs[_Rs_].k ^ _ImmU_);
+ } else {
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ XOR32ItoR(EAX, _ImmU_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+ }
+}
+#endif
+//end of * Arithmetic with immediate operand
+
+/*********************************************************
+* Load higher 16 bits of the first word in GPR with imm *
+* Format: OP rt, immediate *
+*********************************************************/
+//REC_FUNC(LUI);
+#if 1
+static void recLUI() {
+// Rt = Imm << 16
+ if (!_Rt_) return;
+
+ MapConst(_Rt_, psxRegs.code << 16);
+}
+#endif
+//End of Load Higher .....
+
+
+/*********************************************************
+* Register arithmetic *
+* Format: OP rd, rs, rt *
+*********************************************************/
+
+
+#if 0
+REC_FUNC(ADD);
+REC_FUNC(ADDU);
+REC_FUNC(SUB);
+REC_FUNC(SUBU);
+REC_FUNC(AND);
+REC_FUNC(OR);
+REC_FUNC(XOR);
+REC_FUNC(NOR);
+REC_FUNC(SLT);
+REC_FUNC(SLTU);
+#endif
+
+#if 1
+static void recADDU() {
+// Rd = Rs + Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k + iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rt_ == _Rd_) {
+ if (iRegs[_Rs_].k == 1) {
+ INC32M((uptr)&psxRegs.GPR.r[_Rd_]);
+ } else if (iRegs[_Rs_].k == -1) {
+ DEC32M((uptr)&psxRegs.GPR.r[_Rd_]);
+ } else if (iRegs[_Rs_].k) {
+ ADD32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rs_].k);
+ }
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ if (iRegs[_Rs_].k == 1) {
+ INC32R(EAX);
+ } else if (iRegs[_Rs_].k == 0xffffffff) {
+ DEC32R(EAX);
+ } else if (iRegs[_Rs_].k) {
+ ADD32ItoR(EAX, iRegs[_Rs_].k);
+ }
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rs_ == _Rd_) {
+ if (iRegs[_Rt_].k == 1) {
+ INC32M((uptr)&psxRegs.GPR.r[_Rd_]);
+ } else if (iRegs[_Rt_].k == -1) {
+ DEC32M((uptr)&psxRegs.GPR.r[_Rd_]);
+ } else if (iRegs[_Rt_].k) {
+ ADD32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rt_].k);
+ }
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (iRegs[_Rt_].k == 1) {
+ INC32R(EAX);
+ } else if (iRegs[_Rt_].k == 0xffffffff) {
+ DEC32R(EAX);
+ } else if (iRegs[_Rt_].k) {
+ ADD32ItoR(EAX, iRegs[_Rt_].k);
+ }
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rs_ == _Rd_) { // Rd+= Rt
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ ADD32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (_Rt_ == _Rd_) { // Rd+= Rs
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ ADD32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else { // Rd = Rs + Rt
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ ADD32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ }
+}
+
+static void recADD() {
+// Rd = Rs + Rt
+ recADDU();
+}
+
+static void recSUBU() {
+// Rd = Rs - Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k - iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ SUB32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SUB32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SUB32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSUB() {
+// Rd = Rs - Rt
+ recSUBU();
+}
+
+static void recAND() {
+// Rd = Rs And Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k & iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rd_ == _Rt_) { // Rd&= Rs
+ AND32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rs_].k);
+ } else {
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ AND32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rd_ == _Rs_) { // Rd&= kRt
+ AND32ItoM((uptr)&psxRegs.GPR.r[_Rd_], iRegs[_Rt_].k);
+ } else { // Rd = Rs & kRt
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ AND32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ if (_Rs_ == _Rd_) { // Rd&= Rt
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ AND32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (_Rt_ == _Rd_) { // Rd&= Rs
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ AND32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else { // Rd = Rs & Rt
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ AND32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+ }
+}
+
+static void recOR() {
+// Rd = Rs Or Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k | iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ OR32ItoR (EAX, iRegs[_Rt_].k);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recXOR() {
+// Rd = Rs Xor Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k ^ iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ XOR32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ XOR32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ XOR32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recNOR() {
+// Rd = Rs Nor Rt
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, ~(iRegs[_Rs_].k | iRegs[_Rt_].k));
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ NOT32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ OR32ItoR (EAX, iRegs[_Rt_].k);
+ NOT32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ OR32MtoR (EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ NOT32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSLT() {
+// Rd = Rs < Rt (signed)
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, (s32)iRegs[_Rs_].k < (s32)iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ SETL8R (EAX);
+ AND32ItoR(EAX, 0xff);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32ItoR(EAX, iRegs[_Rt_].k);
+ SETL8R (EAX);
+ AND32ItoR(EAX, 0xff);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ SETL8R (EAX);
+ AND32ItoR(EAX, 0xff);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSLTU() {
+// Rd = Rs < Rt (unsigned)
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rs_].k < iRegs[_Rt_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rs_].k);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ SBB32RtoR(EAX, EAX);
+ NEG32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32ItoR(EAX, iRegs[_Rt_].k);
+ SBB32RtoR(EAX, EAX);
+ NEG32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ SBB32RtoR(EAX, EAX);
+ NEG32R (EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+#endif
+//End of * Register arithmetic
+
+/*********************************************************
+* Register mult/div & Register trap logic *
+* Format: OP rs, rt *
+*********************************************************/
+
+#if 0
+REC_FUNC(MULT);
+REC_FUNC(MULTU);
+REC_FUNC(DIV);
+REC_FUNC(DIVU);
+#endif
+
+#if 1
+static void recMULT() {
+// Lo/Hi = Rs * Rt (signed)
+
+// iFlushRegs();
+
+ if ((IsConst(_Rs_) && iRegs[_Rs_].k == 0) ||
+ (IsConst(_Rt_) && iRegs[_Rt_].k == 0)) {
+ XOR32RtoR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX);
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("multrsk %x\n", iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ }
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);// printf("multrtk %x\n", iRegs[_Rt_].k);
+ IMUL32R (EDX);
+ } else {
+ IMUL32M ((uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX);
+}
+
+static void recMULTU() {
+// Lo/Hi = Rs * Rt (unsigned)
+
+// iFlushRegs();
+
+ if ((IsConst(_Rs_) && iRegs[_Rs_].k == 0) ||
+ (IsConst(_Rt_) && iRegs[_Rt_].k == 0)) {
+ XOR32RtoR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX);
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("multursk %x\n", iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ }
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);// printf("multurtk %x\n", iRegs[_Rt_].k);
+ MUL32R (EDX);
+ } else {
+ MUL32M ((uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX);
+}
+
+static void recDIV() {
+// Lo/Hi = Rs / Rt (signed)
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_)) {
+ if (iRegs[_Rt_].k == 0) return;
+ MOV32ItoR(ECX, iRegs[_Rt_].k);// printf("divrtk %x\n", iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ CMP32ItoR(ECX, 0);
+ j8Ptr[0] = JE8(0);
+ }
+ if (IsConst(_Rs_)) {
+ MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("divrsk %x\n", iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ }
+ CDQ();
+ IDIV32R (ECX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX);
+ if (!IsConst(_Rt_)) {
+ x86SetJ8(j8Ptr[0]);
+ }
+}
+
+static void recDIVU() {
+// Lo/Hi = Rs / Rt (unsigned)
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_)) {
+ if (iRegs[_Rt_].k == 0) return;
+ MOV32ItoR(ECX, iRegs[_Rt_].k);// printf("divurtk %x\n", iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ CMP32ItoR(ECX, 0);
+ j8Ptr[0] = JE8(0);
+ }
+ if (IsConst(_Rs_)) {
+ MOV32ItoR(EAX, iRegs[_Rs_].k);// printf("divursk %x\n", iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ }
+ XOR32RtoR(EDX, EDX);
+ DIV32R (ECX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EDX);
+ if (!IsConst(_Rt_)) {
+ x86SetJ8(j8Ptr[0]);
+ }
+}
+#endif
+//End of * Register mult/div & Register trap logic
+
+#if 0
+REC_FUNC(LB);
+REC_FUNC(LBU);
+REC_FUNC(LH);
+REC_FUNC(LHU);
+REC_FUNC(LW);
+
+REC_FUNC(SB);
+REC_FUNC(SH);
+REC_FUNC(SW);
+
+REC_FUNC(LWL);
+REC_FUNC(LWR);
+REC_FUNC(SWL);
+REC_FUNC(SWR);
+#endif
+
+
+static void SetArg_OfB(x86IntRegType arg) {
+ if (IsConst(_Rs_))
+#ifdef __x86_64__
+ MOV64ItoR(arg, iRegs[_Rs_].k + _Imm_);
+#else
+ PUSH32I (iRegs[_Rs_].k + _Imm_);
+#endif
+ else {
+#ifdef __x86_64__
+ MOV32MtoR(arg, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_)
+ ADD32ItoR(arg, _Imm_);
+#else
+ if (_Imm_) {
+ MOV32MtoR(EAX, (u32)&psxRegs.GPR.r[_Rs_]);
+ ADD32ItoR(EAX, _Imm_);
+ PUSH32R (EAX);
+ } else {
+ PUSH32M ((u32)&psxRegs.GPR.r[_Rs_]);
+ }
+#endif
+ }
+#ifndef __x86_64__
+ resp += 4;
+#endif
+}
+
+#if 1
+static void recLB() {
+// Rt = mem[Rs + Im] (signed)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ if (!_Rt_) return;
+ // since bios is readonly it won't change
+ MapConst(_Rt_, psxRs8(addr));
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVSX32M8toR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVSX32M8toR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+// SysPrintf("unhandled r8 %x\n", addr);
+ }
+
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemRead8);
+ if (_Rt_) {
+ iRegs[_Rt_].state = ST_UNK;
+ MOVSX32R8toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+// ADD32ItoR(ESP, 4);
+}
+
+static void recLBU() {
+// Rt = mem[Rs + Im] (unsigned)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ if (!_Rt_) return;
+ // since bios is readonly it won't change
+ MapConst(_Rt_, psxRu8(addr));
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVZX32M8toR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVZX32M8toR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+// SysPrintf("unhandled r8u %x\n", addr);
+ }
+
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemRead8);
+ if (_Rt_) {
+ iRegs[_Rt_].state = ST_UNK;
+ MOVZX32R8toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+// ADD32ItoR(ESP, 4);
+}
+
+static void recLH() {
+// Rt = mem[Rs + Im] (signed)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ if (!_Rt_) return;
+ // since bios is readonly it won't change
+ MapConst(_Rt_, psxRs16(addr));
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVSX32M16toR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVSX32M16toR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+// SysPrintf("unhandled r16 %x\n", addr);
+ }
+
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemRead16);
+ if (_Rt_) {
+ iRegs[_Rt_].state = ST_UNK;
+ MOVSX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+// ADD32ItoR(ESP, 4);
+}
+
+static void recLHU() {
+// Rt = mem[Rs + Im] (unsigned)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ if (!_Rt_) return;
+ // since bios is readonly it won't change
+ MapConst(_Rt_, psxRu16(addr));
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVZX32M16toR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOVZX32M16toR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80) {
+ if (addr >= 0x1f801c00 && addr < 0x1f801e00) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ //PUSHI (addr);
+ MOV64ItoR(X86ARG1, addr);
+ //CALLFunc ((uptr)SPU_readRegister);
+ MOV64ItoR(RAX, (uptr)SPU_readRegister);
+ CALL64R(RAX);
+ MOVZX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+#ifndef __WIN32__
+ resp+= 4;
+#endif
+ return;
+ }
+ switch (addr) {
+ case 0x1f801100: case 0x1f801110: case 0x1f801120:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ //PUSHI((addr >> 4) & 0x3);
+ MOV64ItoR(X86ARG1, (addr >> 4) & 0x3);
+ CALLFunc((uptr)psxRcntRcount);
+ MOVZX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ resp+= 4;
+ return;
+
+ case 0x1f801104: case 0x1f801114: case 0x1f801124:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV64ItoR(X86ARG1, (addr >> 4) & 0x3);
+ CALLFunc((uptr)psxRcntRmode);
+ MOVZX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ resp+= 4;
+ return;
+
+ case 0x1f801108: case 0x1f801118: case 0x1f801128:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV64ItoR(X86ARG1, (addr >> 4) & 0x3);
+ CALLFunc((uptr)psxRcntRtarget);
+ MOVZX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ resp+= 4;
+ return;
+ }
+ }
+// SysPrintf("unhandled r16u %x\n", addr);
+ }
+
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemRead16);
+ if (_Rt_) {
+ iRegs[_Rt_].state = ST_UNK;
+ MOVZX32R16toR(EAX, EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+// ADD32ItoR(ESP, 4);
+}
+
+static void recLW() {
+// Rt = mem[Rs + Im] (unsigned)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ if (!_Rt_) return;
+ // since bios is readonly it won't change
+ MapConst(_Rt_, psxRu32(addr));
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80) {
+ switch (addr) {
+ case 0x1f801080: case 0x1f801084: case 0x1f801088:
+ case 0x1f801090: case 0x1f801094: case 0x1f801098:
+ case 0x1f8010a0: case 0x1f8010a4: case 0x1f8010a8:
+ case 0x1f8010b0: case 0x1f8010b4: case 0x1f8010b8:
+ case 0x1f8010c0: case 0x1f8010c4: case 0x1f8010c8:
+ case 0x1f8010d0: case 0x1f8010d4: case 0x1f8010d8:
+ case 0x1f8010e0: case 0x1f8010e4: case 0x1f8010e8:
+ case 0x1f801070: case 0x1f801074:
+ case 0x1f8010f0: case 0x1f8010f4:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+
+ case 0x1f801810:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ CALLFunc((uptr)GPU_readData);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+
+ case 0x1f801814:
+ if (!_Rt_) return;
+ iRegs[_Rt_].state = ST_UNK;
+
+ CALLFunc((uptr)GPU_readStatus);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ }
+// SysPrintf("unhandled r32 %x\n", addr);
+ }
+
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemRead32);
+ if (_Rt_) {
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ }
+// ADD32ItoR(ESP, 4);
+}
+
+extern u32 LWL_MASK[4];
+extern u32 LWL_SHIFT[4];
+
+void iLWLk(u32 shift) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(ECX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ AND32ItoR(ECX, LWL_MASK[shift]);
+ SHL32ItoR(EAX, LWL_SHIFT[shift]);
+ OR32RtoR (EAX, ECX);
+}
+
+void recLWL() {
+// Rt = Rt Merge mem[Rs + Im]
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]);
+ iLWLk(addr & 3);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]);
+ iLWLk(addr & 3);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ }
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ //PUSH64R (EAX);
+ AND32ItoR(EAX, ~3);
+ //PUSH64R (EAX);
+ MOV32RtoR(X86ARG1, EAX);
+ CALLFunc((uptr)psxMemRead32);
+
+ if (_Rt_) {
+ //ADD32ItoR(ESP, 4);
+ //POP64R (EDX);
+ if (IsConst(_Rs_)) MOV32ItoR(EDX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EDX, _Imm_);
+ }
+
+ AND32ItoR(EDX, 0x3); // shift = addr & 3;
+
+ MOV64ItoR(ECX, (uptr)LWL_SHIFT);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ SHL32CLtoR(EAX); // mem(EAX) << LWL_SHIFT[shift]
+
+ MOV64ItoR(ECX, (uptr)LWL_MASK);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ AND32RtoR(EDX, ECX); // _rRt_ & LWL_MASK[shift]
+
+ OR32RtoR(EAX, EDX);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ //} else {
+ //ADD64ItoR(RSP, 8);
+ //resp+= 8;
+ }
+}
+
+/*
+static void recLWBlock(int count) {
+ u32 *code = PSXM(pc);
+ int i, respsave;
+// Rt = mem[Rs + Im] (unsigned)
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0xfff0) == 0xbfc0) {
+ // since bios is readonly it won't change
+ for (i=0; i<count; i++, code++, addr+=4) {
+ if (_fRt_(*code)) {
+ MapConst(_fRt_(*code), psxRu32(addr));
+ }
+ }
+ return;
+ }
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ for (i=0; i<count; i++, code++, addr+=4) {
+ if (!_fRt_(*code)) return;
+ iRegs[_fRt_(*code)].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1fffff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ for (i=0; i<count; i++, code++, addr+=4) {
+ if (!_fRt_(*code)) return;
+ iRegs[_fRt_(*code)].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX);
+ }
+ return;
+ }
+ }
+
+ SysPrintf("recLWBlock %d: %d\n", count, IsConst(_Rs_));
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemPointer);
+// ADD32ItoR(ESP, 4);
+
+ respsave = resp; resp = 0;
+ TEST64RtoR(RAX,RAX);
+ j32Ptr[4] = JZ32(0);
+ XOR32RtoR(ECX, ECX);
+ for (i=0; i<count; i++, code++) {
+ if (_fRt_(*code)) {
+ iRegs[_fRt_(*code)].state = ST_UNK;
+
+ MOV64RmStoR(EDX, EAX, ECX, 2);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EDX);
+ }
+ if (i != (count-1)) INC32R(ECX);
+ }
+ j32Ptr[5] = JMP32(0);
+ x86SetJ32(j32Ptr[4]);
+ for (i=0, code = PSXM(pc); i<count; i++, code++) {
+ psxRegs.code = *code;
+ recLW();
+ }
+#ifndef __x86_64__
+ ADD32ItoR(ESP, resp);
+#endif
+ x86SetJ32(j32Ptr[5]);
+ resp = respsave;
+}
+*/
+
+extern u32 LWR_MASK[4];
+extern u32 LWR_SHIFT[4];
+
+void iLWRk(u32 shift) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(ECX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ AND32ItoR(ECX, LWR_MASK[shift]);
+ SHR32ItoR(EAX, LWR_SHIFT[shift]);
+ OR32RtoR (EAX, ECX);
+}
+
+void recLWR() {
+// Rt = Rt Merge mem[Rs + Im]
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]);
+ iLWRk(addr & 3);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]);
+ iLWRk(addr & 3);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ return;
+ }
+ }
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ PUSHR(EAX);
+ AND32ItoR(EAX, ~3);
+ MOV32RtoR(X86ARG1, EAX);
+ CALLFunc((uptr)psxMemRead32);
+
+ POPR (EDX);
+ if (_Rt_) {
+ AND32ItoR(EDX, 0x3); // shift = addr & 3;
+
+ MOV64ItoR(ECX, (uptr)LWR_SHIFT);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ SHR32CLtoR(EAX); // mem(EAX) >> LWR_SHIFT[shift]
+
+ MOV64ItoR(ECX, (uptr)LWR_MASK);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ AND32RtoR(EDX, ECX); // _rRt_ & LWR_MASK[shift]
+
+ OR32RtoR(EAX, EDX);
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+ //} else {
+ //resp+= 8;
+ }
+}
+
+static void recSB() {
+// mem[Rs + Im] = Rt
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (IsConst(_Rt_)) {
+ MOV8ItoM((uptr)&psxM[addr & 0x1fffff], (u8)iRegs[_Rt_].k);
+ } else {
+ MOV8MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV8RtoM((uptr)&psxM[addr & 0x1fffff], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (IsConst(_Rt_)) {
+ MOV8ItoM((uptr)&psxH[addr & 0xfff], (u8)iRegs[_Rt_].k);
+ } else {
+ MOV8MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV8RtoM((uptr)&psxH[addr & 0xfff], EAX);
+ }
+ return;
+ }
+// SysPrintf("unhandled w8 %x\n", addr);
+ }
+
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG2, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemWrite8);
+// ADD32ItoR(ESP, 8);
+}
+
+static void recSH() {
+// mem[Rs + Im] = Rt
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (IsConst(_Rt_)) {
+ MOV16ItoM((uptr)&psxM[addr & 0x1fffff], (u16)iRegs[_Rt_].k);
+ } else {
+ MOV16MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV16RtoM((uptr)&psxM[addr & 0x1fffff], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (IsConst(_Rt_)) {
+ MOV16ItoM((uptr)&psxH[addr & 0xfff], (u16)iRegs[_Rt_].k);
+ } else {
+ MOV16MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV16RtoM((uptr)&psxH[addr & 0xfff], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80) {
+ if (addr >= 0x1f801c00 && addr < 0x1f801e00) {
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG2, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ MOV64ItoR(X86ARG1, addr);
+ CALLFunc ((uptr)SPU_writeRegister);
+#ifndef __WIN32__
+ //resp+= 8;
+#endif
+ return;
+ }
+ }
+// SysPrintf("unhandled w16 %x\n", addr);
+ }
+
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG2, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemWrite16);
+// ADD32ItoR(ESP, 8);
+}
+
+static void recSW() {
+// mem[Rs + Im] = Rt
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoM((uptr)&psxM[addr & 0x1fffff], iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxM[addr & 0x1fffff], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoM((uptr)&psxH[addr & 0xfff], iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxH[addr & 0xfff], EAX);
+ }
+ return;
+ }
+ if (t == 0x1f80) {
+ switch (addr) {
+ case 0x1f801080: case 0x1f801084:
+ case 0x1f801090: case 0x1f801094:
+ case 0x1f8010a0: case 0x1f8010a4:
+ case 0x1f8010b0: case 0x1f8010b4:
+ case 0x1f8010c0: case 0x1f8010c4:
+ case 0x1f8010d0: case 0x1f8010d4:
+ case 0x1f8010e0: case 0x1f8010e4:
+ case 0x1f801074:
+ case 0x1f8010f0:
+ if (IsConst(_Rt_)) {
+ MOV32ItoM((uptr)&psxH[addr & 0xffff], iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32RtoM((uptr)&psxH[addr & 0xffff], EAX);
+ }
+ return;
+
+ case 0x1f801810:
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG1, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ CALLFunc((uptr)GPU_writeData);
+#ifndef __WIN32__
+ //resp+= 4;
+#endif
+ return;
+
+ case 0x1f801814:
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG1, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG1, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ CALLFunc((uptr)GPU_writeStatus);
+#ifndef __WIN32__
+ //resp+= 4;
+#endif
+ }
+ }
+// SysPrintf("unhandled w32 %x\n", addr);
+ }
+
+ if (IsConst(_Rt_)) {
+ MOV64ItoR(X86ARG2, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(X86ARG2, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemWrite32);
+// ADD32ItoR(ESP, 8);
+ //resp+= 8;
+}
+
+/*
+static void recSWBlock(int count) {
+ u32 *code;
+ int i, respsave;
+// mem[Rs + Im] = Rt
+
+// iFlushRegs();
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+ code = PSXM(pc);
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ for (i=0; i<count; i++, code++, addr+=4) {
+ if (IsConst(_fRt_(*code))) {
+ MOV32ItoM((uptr)&psxM[addr & 0x1fffff], iRegs[_fRt_(*code)].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_fRt_(*code)]);
+ MOV32RtoM((uptr)&psxM[addr & 0x1fffff], EAX);
+ }
+ }
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ for (i=0; i<count; i++, code++, addr+=4) {
+ if (!_fRt_(*code)) return;
+ iRegs[_fRt_(*code)].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xfff]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_fRt_(*code)], EAX);
+ }
+ return;
+ }
+ }
+
+ SysPrintf("recSWBlock %d: %d\n", count, IsConst(_Rs_));
+ SetArg_OfB(X86ARG1);
+ CALLFunc((uptr)psxMemPointer);
+// ADD32ItoR(ESP, 4);
+ //resp+= 4;
+
+ respsave = resp; resp = 0;
+ TEST64RtoR(RAX,RAX);
+ j32Ptr[4] = JZ32(0);
+ XOR32RtoR(ECX, ECX);
+ for (i=0, code = PSXM(pc); i<count; i++, code++) {
+ if (IsConst(_fRt_(*code))) {
+ MOV32ItoR(EDX, iRegs[_fRt_(*code)].k);
+ } else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_fRt_(*code)]);
+ }
+ MOV32RtoRmS(EAX, ECX, 2, EDX);
+ if (i != (count-1)) INC32R(ECX);
+ }
+ j32Ptr[5] = JMP32(0);
+ x86SetJ32(j32Ptr[4]);
+ for (i=0, code = PSXM(pc); i<count; i++, code++) {
+ psxRegs.code = *code;
+ recSW();
+ }
+ //ADD32ItoR(ESP, resp);
+ x86SetJ32(j32Ptr[5]);
+ resp = respsave;
+}
+*/
+
+extern u32 SWL_MASK[4];
+extern u32 SWL_SHIFT[4];
+
+void iSWLk(u32 shift) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(ECX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SHR32ItoR(ECX, SWL_SHIFT[shift]);
+ AND32ItoR(EAX, SWL_MASK[shift]);
+ OR32RtoR (EAX, ECX);
+}
+
+void recSWL() {
+// mem[Rs + Im] = Rt Merge mem[Rs + Im]
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]);
+ iSWLk(addr & 3);
+ MOV32RtoM((uptr)&psxM[addr & 0x1ffffc], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]);
+ iSWLk(addr & 3);
+ MOV32RtoM((uptr)&psxH[addr & 0xffc], EAX);
+ return;
+ }
+ }
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ PUSHR (EAX);
+ AND32ItoR(EAX, ~3);
+ MOV32RtoR(X86ARG1, EAX);
+
+ CALLFunc((uptr)psxMemRead32);
+
+ POPR (EDX);
+ AND32ItoR(EDX, 0x3); // shift = addr & 3;
+
+ MOV64ItoR(ECX, (uptr)SWL_MASK);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ AND32RtoR(EAX, ECX); // mem & SWL_MASK[shift]
+
+ MOV64ItoR(ECX, (uptr)SWL_SHIFT);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SHR32CLtoR(EDX); // _rRt_ >> SWL_SHIFT[shift]
+
+ OR32RtoR (EAX, EDX);
+ MOV32RtoR(X86ARG2, EAX);
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ AND32ItoR(EAX, ~3);
+ MOV32RtoR(X86ARG1, EAX);
+
+ CALLFunc((uptr)psxMemWrite32);
+// ADD32ItoR(ESP, 8);
+ //resp+= 8;
+}
+
+extern u32 SWR_MASK[4];
+extern u32 SWR_SHIFT[4];
+
+void iSWRk(u32 shift) {
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(ECX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SHL32ItoR(ECX, SWR_SHIFT[shift]);
+ AND32ItoR(EAX, SWR_MASK[shift]);
+ OR32RtoR (EAX, ECX);
+}
+
+void recSWR() {
+// mem[Rs + Im] = Rt Merge mem[Rs + Im]
+
+ if (IsConst(_Rs_)) {
+ u32 addr = iRegs[_Rs_].k + _Imm_;
+ int t = addr >> 16;
+
+ if ((t & 0x1fe0) == 0 && (t & 0x1fff) != 0) {
+ MOV32MtoR(EAX, (uptr)&psxM[addr & 0x1ffffc]);
+ iSWRk(addr & 3);
+ MOV32RtoM((uptr)&psxM[addr & 0x1ffffc], EAX);
+ return;
+ }
+ if (t == 0x1f80 && addr < 0x1f801000) {
+ MOV32MtoR(EAX, (uptr)&psxH[addr & 0xffc]);
+ iSWRk(addr & 3);
+ MOV32RtoM((uptr)&psxH[addr & 0xffc], EAX);
+ return;
+ }
+ }
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ PUSHR (EAX);
+
+ AND32ItoR(EAX, ~3);
+ MOV32RtoR(X86ARG1, EAX);
+
+ CALLFunc((uptr)psxMemRead32);
+
+ POPR (EDX);
+ AND32ItoR(EDX, 0x3); // shift = addr & 3;
+
+ MOV64ItoR(ECX, (uptr)SWR_MASK);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ AND32RtoR(EAX, ECX); // mem & SWR_MASK[shift]
+
+ MOV64ItoR(ECX, (uptr)SWR_SHIFT);
+ MOV32RmStoR(ECX, ECX, EDX, 2);
+ if (IsConst(_Rt_)) {
+ MOV32ItoR(EDX, iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EDX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ SHL32CLtoR(EDX); // _rRt_ << SWR_SHIFT[shift]
+
+ OR32RtoR (EAX, EDX);
+ MOV32RtoR(X86ARG2, EAX);
+
+ if (IsConst(_Rs_)) MOV32ItoR(EAX, iRegs[_Rs_].k + _Imm_);
+ else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ if (_Imm_) ADD32ItoR(EAX, _Imm_);
+ }
+ AND32ItoR(EAX, ~3);
+ MOV32RtoR(X86ARG1, EAX);
+ CALLFunc((uptr)psxMemWrite32);
+// ADD32ItoR(ESP, 8);
+ //resp+= 8;
+}
+
+#endif
+
+#if 0
+REC_FUNC(SLL);
+REC_FUNC(SRL);
+REC_FUNC(SRA);
+#endif
+#if 1
+static void recSLL() {
+// Rd = Rt << Sa
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rt_].k << _Sa_);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ if (_Sa_) SHL32ItoR(EAX, _Sa_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSRL() {
+// Rd = Rt >> Sa
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_)) {
+ MapConst(_Rd_, iRegs[_Rt_].k >> _Sa_);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ if (_Sa_) SHR32ItoR(EAX, _Sa_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSRA() {
+// Rd = Rt >> Sa
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_)) {
+ MapConst(_Rd_, (s32)iRegs[_Rt_].k >> _Sa_);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ if (_Sa_) SAR32ItoR(EAX, _Sa_);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+#endif
+
+#if 0
+REC_FUNC(SLLV);
+REC_FUNC(SRLV);
+REC_FUNC(SRAV);
+#endif
+
+#if 1
+static void recSLLV() {
+// Rd = Rt << Rs
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_) && IsConst(_Rs_)) {
+ MapConst(_Rd_, iRegs[_Rt_].k << iRegs[_Rs_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32ItoR(ECX, iRegs[_Rs_].k);
+ SHL32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SHL32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SHL32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSRLV() {
+// Rd = Rt >> Rs
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_) && IsConst(_Rs_)) {
+ MapConst(_Rd_, iRegs[_Rt_].k >> iRegs[_Rs_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32ItoR(ECX, iRegs[_Rs_].k);
+ SHR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SHR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SHR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+
+static void recSRAV() {
+// Rd = Rt >> Rs
+ if (!_Rd_) return;
+
+// iFlushRegs();
+
+ if (IsConst(_Rt_) && IsConst(_Rs_)) {
+ MapConst(_Rd_, (s32)iRegs[_Rt_].k >> iRegs[_Rs_].k);
+ } else if (IsConst(_Rs_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32ItoR(ECX, iRegs[_Rs_].k);
+ SAR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else if (IsConst(_Rt_)) {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32ItoR(EAX, iRegs[_Rt_].k);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SAR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ } else {
+ iRegs[_Rd_].state = ST_UNK;
+
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ MOV32MtoR(ECX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ SAR32CLtoR(EAX);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+ }
+}
+#endif
+
+#if 0
+REC_SYS(SYSCALL);
+REC_SYS(BREAK);
+#endif
+
+int dump = 0;
+
+#if 1
+static void recSYSCALL() {
+// dump=1;
+ iFlushRegs();
+
+ MOV32ItoR(EAX, pc - 4);
+ MOV32RtoM((uptr)&psxRegs.pc, EAX);
+ MOV64ItoR(X86ARG2, branch == 1 ? 1 : 0);
+ MOV64ItoR(X86ARG1, 0x20);
+ CALLFunc((uptr)psxException);
+ //ADD32ItoR(ESP, 8);
+
+ branch = 2;
+ iRet();
+}
+
+static void recBREAK() {
+}
+#endif
+
+#if 0
+REC_FUNC(MFHI);
+REC_FUNC(MTHI);
+REC_FUNC(MFLO);
+REC_FUNC(MTLO);
+#endif
+#if 1
+static void recMFHI() {
+// Rd = Hi
+ if (!_Rd_) return;
+
+ iRegs[_Rd_].state = ST_UNK;
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.n.hi);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+}
+
+static void recMTHI() {
+// Hi = Rs
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoM((uptr)&psxRegs.GPR.n.hi, iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.hi, EAX);
+ }
+}
+
+static void recMFLO() {
+// Rd = Lo
+ if (!_Rd_) return;
+
+ iRegs[_Rd_].state = ST_UNK;
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.n.lo);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rd_], EAX);
+}
+
+static void recMTLO() {
+// Lo = Rs
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoM((uptr)&psxRegs.GPR.n.lo, iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.n.lo, EAX);
+ }
+}
+#endif
+
+#if 0
+REC_BRANCH(J);
+REC_BRANCH(JR);
+REC_BRANCH(JAL);
+REC_BRANCH(JALR);
+REC_BRANCH(BLTZ);
+REC_BRANCH(BGTZ);
+REC_BRANCH(BLTZAL);
+REC_BRANCH(BGEZAL);
+REC_BRANCH(BNE);
+REC_BRANCH(BEQ);
+REC_BRANCH(BLEZ);
+REC_BRANCH(BGEZ);
+#endif
+#if 1
+static void recBLTZ() {
+// Branch if Rs < 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k < 0) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JL32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recBGTZ() {
+// Branch if Rs > 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k > 0) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JG32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recBLTZAL() {
+// Branch if Rs < 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k < 0) {
+ MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4);
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JL32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4);
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recBGEZAL() {
+// Branch if Rs >= 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k >= 0) {
+ MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4);
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JGE32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ MOV32ItoM((uptr)&psxRegs.GPR.r[31], pc + 4);
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recJ() {
+// j target
+
+ iJump(_Target_ * 4 + (pc & 0xf0000000));
+}
+
+static void recJAL() {
+// jal target
+
+ MapConst(31, pc + 4);
+
+ iJump(_Target_ * 4 + (pc & 0xf0000000));
+}
+
+static void recJR() {
+// jr Rs
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoM((uptr)&target, iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ MOV32RtoM((uptr)&target, EAX);
+ }
+
+ SetBranch();
+}
+
+static void recJALR() {
+// jalr Rs
+
+ if (IsConst(_Rs_)) {
+ MOV32ItoM((uptr)&target, iRegs[_Rs_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ MOV32RtoM((uptr)&target, EAX);
+ }
+
+ if (_Rd_) {
+ MapConst(_Rd_, pc + 4);
+ }
+
+ SetBranch();
+}
+
+static void recBEQ() {
+// Branch if Rs == Rt
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (_Rs_ == _Rt_) {
+ iJump(bpc);
+ } else {
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ if (iRegs[_Rs_].k == iRegs[_Rt_].k) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ } else if (IsConst(_Rs_)) {
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rt_], iRegs[_Rs_].k);
+ } else if (IsConst(_Rt_)) {
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+
+ j32Ptr[4] = JE32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+ }
+}
+
+static void recBNE() {
+// Branch if Rs != Rt
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_) && IsConst(_Rt_)) {
+ if (iRegs[_Rs_].k != iRegs[_Rt_].k) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ } else if (IsConst(_Rs_)) {
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rt_], iRegs[_Rs_].k);
+ } else if (IsConst(_Rt_)) {
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], iRegs[_Rt_].k);
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rs_]);
+ CMP32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ }
+ j32Ptr[4] = JNE32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recBLEZ() {
+// Branch if Rs <= 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k <= 0) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JLE32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+}
+
+static void recBGEZ() {
+// Branch if Rs >= 0
+ u32 bpc = _Imm_ * 4 + pc;
+
+// iFlushRegs();
+ if (bpc == pc+4 && psxTestLoadDelay(_Rs_, PSXMu32(bpc)) == 0) {
+ return;
+ }
+
+ if (IsConst(_Rs_)) {
+ if ((s32)iRegs[_Rs_].k >= 0) {
+ iJump(bpc); return;
+ } else {
+ iJump(pc+4); return;
+ }
+ }
+
+ CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0);
+ j32Ptr[4] = JGE32(0);
+
+ iBranch(pc+4, 1);
+
+ x86SetJ32(j32Ptr[4]);
+
+ iBranch(bpc, 0);
+ pc+=4;
+}
+#endif
+
+#if 0
+REC_FUNC(MFC0);
+REC_SYS(MTC0);
+REC_FUNC(CFC0);
+REC_SYS(CTC0);
+REC_FUNC(RFE);
+#endif
+//REC_SYS(MTC0);
+#if 1
+static void recMFC0() {
+// Rt = Cop0->Rd
+ if (!_Rt_) return;
+
+ iRegs[_Rt_].state = ST_UNK;
+ MOV32MtoR(EAX, (uptr)&psxRegs.CP0.r[_Rd_]);
+ MOV32RtoM((uptr)&psxRegs.GPR.r[_Rt_], EAX);
+}
+
+static void recCFC0() {
+// Rt = Cop0->Rd
+
+ recMFC0();
+}
+
+//*
+void psxMTC0();
+static void recMTC0() {
+// Cop0->Rd = Rt
+
+ if (IsConst(_Rt_)) {
+ switch (_Rd_) {
+ case 12:
+ MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k);
+ break;
+ case 13:
+ MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k & ~(0xfc00));
+ break;
+ default:
+ MOV32ItoM((uptr)&psxRegs.CP0.r[_Rd_], iRegs[_Rt_].k);
+ break;
+ }
+ } else {
+ MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[_Rt_]);
+ switch (_Rd_) {
+ case 13:
+ AND32ItoR(EAX, ~(0xfc00));
+ break;
+ }
+ MOV32RtoM((uptr)&psxRegs.CP0.r[_Rd_], EAX);
+ }
+
+ if (_Rd_ == 12 || _Rd_ == 13) {
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.pc, (u32)pc);
+ CALLFunc((uptr)psxTestSWInts);
+ if (branch == 0) {
+ branch = 2;
+ iRet();
+ }
+ }
+}//*/
+
+static void recCTC0() {
+// Cop0->Rd = Rt
+
+ recMTC0();
+}
+
+static void recRFE() {
+ MOV32MtoR(EAX, (uptr)&psxRegs.CP0.n.Status);
+ MOV32RtoR(ECX, EAX);
+ AND32ItoR(EAX, 0xfffffff0);
+ AND32ItoR(ECX, 0x3c);
+ SHR32ItoR(ECX, 2);
+ OR32RtoR (EAX, ECX);
+ MOV32RtoM((uptr)&psxRegs.CP0.n.Status, EAX);
+
+ iFlushRegs();
+ MOV32ItoM((uptr)&psxRegs.pc, (u32)pc);
+ CALLFunc((uptr)psxTestSWInts);
+ if (branch == 0) {
+ branch = 2;
+ iRet();
+ }
+}
+#endif
+
+#include "iGte.h"
+
+//
+
+static void recHLE() {
+ iFlushRegs();
+
+ CALLFunc((uptr)psxHLEt[psxRegs.code & 0xffff]);
+ branch = 2;
+ iRet();
+}
+
+//
+
+static void (*recBSC[64])() = {
+ recSPECIAL, recREGIMM, recJ , recJAL , recBEQ , recBNE , recBLEZ, recBGTZ,
+ recADDI , recADDIU , recSLTI, recSLTIU, recANDI, recORI , recXORI, recLUI ,
+ recCOP0 , recNULL , recCOP2, recNULL , recNULL, recNULL, recNULL, recNULL,
+ recNULL , recNULL , recNULL, recNULL , recNULL, recNULL, recNULL, recNULL,
+ recLB , recLH , recLWL , recLW , recLBU , recLHU , recLWR , recNULL,
+ recSB , recSH , recSWL , recSW , recNULL, recNULL, recSWR , recNULL,
+ recNULL , recNULL , recLWC2, recNULL , recNULL, recNULL, recNULL, recNULL,
+ recNULL , recNULL , recSWC2, recHLE , recNULL, recNULL, recNULL, recNULL
+};
+
+static void (*recSPC[64])() = {
+ recSLL , recNULL, recSRL , recSRA , recSLLV , recNULL , recSRLV, recSRAV,
+ recJR , recJALR, recNULL, recNULL, recSYSCALL, recBREAK, recNULL, recNULL,
+ recMFHI, recMTHI, recMFLO, recMTLO, recNULL , recNULL , recNULL, recNULL,
+ recMULT, recMULTU, recDIV, recDIVU, recNULL , recNULL , recNULL, recNULL,
+ recADD , recADDU, recSUB , recSUBU, recAND , recOR , recXOR , recNOR ,
+ recNULL, recNULL, recSLT , recSLTU, recNULL , recNULL , recNULL, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL , recNULL , recNULL, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL , recNULL , recNULL, recNULL
+};
+
+static void (*recREG[32])() = {
+ recBLTZ , recBGEZ , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recNULL , recNULL , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recBLTZAL, recBGEZAL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recNULL , recNULL , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL
+};
+
+static void (*recCP0[32])() = {
+ recMFC0, recNULL, recCFC0, recNULL, recMTC0, recNULL, recCTC0, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recRFE , recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL
+};
+
+static void (*recCP2[64])() = {
+ recBASIC, recRTPS , recNULL , recNULL, recNULL, recNULL , recNCLIP, recNULL, // 00
+ recNULL , recNULL , recNULL , recNULL, recOP , recNULL , recNULL , recNULL, // 08
+ recDPCS , recINTPL, recMVMVA, recNCDS, recCDP , recNULL , recNCDT , recNULL, // 10
+ recNULL , recNULL , recNULL , recNCCS, recCC , recNULL , recNCS , recNULL, // 18
+ recNCT , recNULL , recNULL , recNULL, recNULL, recNULL , recNULL , recNULL, // 20
+ recSQR , recDCPL , recDPCT , recNULL, recNULL, recAVSZ3, recAVSZ4, recNULL, // 28
+ recRTPT , recNULL , recNULL , recNULL, recNULL, recNULL , recNULL , recNULL, // 30
+ recNULL , recNULL , recNULL , recNULL, recNULL, recGPF , recGPL , recNCCT // 38
+};
+
+static void (*recCP2BSC[32])() = {
+ recMFC2, recNULL, recCFC2, recNULL, recMTC2, recNULL, recCTC2, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL,
+ recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL, recNULL
+};
+
+
+static void recRecompile() {
+ char *p;
+ char *ptr;
+
+ dump = 0;
+ resp = 0;
+
+ /* if x86Ptr reached the mem limit reset whole mem */
+ if (((uptr)x86Ptr - (uptr)recMem) >= (RECMEM_SIZE - PTRMULT*0x10000))
+ recReset();
+
+ x86Align(32);
+ ptr = x86Ptr;
+
+ PC_RECP(psxRegs.pc) = x86Ptr;
+ pc = psxRegs.pc;
+ pcold = pc;
+
+ //Make some stack space for function arguments spill (x86-64 calling conventions)
+ // 0x38 = 7 args, should be plenty...
+ SUB64ItoR(RSP, STACKSIZE);
+
+ for (count=0; count<500;) {
+ p = (char *)PSXM(pc);
+ if (p == NULL) recError();
+ psxRegs.code = *(u32 *)p;
+/*
+ if ((psxRegs.code >> 26) == 0x23) { // LW
+ int i;
+ u32 code;
+
+ for (i=1;; i++) {
+ p = (char *)PSXM(pc+i*4);
+ if (p == NULL) recError();
+ code = *(u32 *)p;
+
+ if ((code >> 26) != 0x23 ||
+ _fRs_(code) != _Rs_ ||
+ _fImm_(code) != (_Imm_+i*4))
+ break;
+ }
+ if (i > 1) {
+ recLWBlock(i);
+ pc = pc + i*4; continue;
+ }
+ }
+
+ if ((psxRegs.code >> 26) == 0x2b) { // SW
+ int i;
+ u32 code;
+
+ for (i=1;; i++) {
+ p = (char *)PSXM(pc+i*4);
+ if (p == NULL) recError();
+ code = *(u32 *)p;
+
+ if ((code >> 26) != 0x2b ||
+ _fRs_(code) != _Rs_ ||
+ _fImm_(code) != (_Imm_+i*4))
+ break;
+ }
+ if (i > 1) {
+ recSWBlock(i);
+ pc = pc + i*4; continue;
+ }
+ }*/
+
+ pc+=4; count++;
+ recBSC[psxRegs.code>>26]();
+
+ if (branch) {
+ branch = 0;
+ if (dump) iDumpBlock(ptr);
+ return;
+ }
+ }
+
+ iFlushRegs();
+
+ MOV32ItoM((uptr)&psxRegs.pc, pc);
+ iRet();
+}
+
+
+R3000Acpu psxRec = {
+ recInit,
+ recReset,
+ recExecute,
+ recExecuteBlock,
+ recClear,
+ recShutdown
+};
diff --git a/libpcsxcore/ix86_64/ix86-64.c b/libpcsxcore/ix86_64/ix86-64.c
new file mode 100644
index 0000000..0582f35
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86-64.c
@@ -0,0 +1,3139 @@
+/*
+ * ix86 core v0.6.2
+ * Authors: linuzappz <linuzappz@pcsx.net>
+ * alexey silinov
+ * goldfinger
+ * zerofrog(@gmail.com)
+ */
+
+// stop compiling if NORECBUILD build (only for Visual Studio)
+#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "ix86-64.h"
+
+#ifdef __x86_64__
+
+#ifdef _MSC_VER
+// visual studio calling convention
+x86IntRegType g_x86savedregs[] = { RBX, RBP, RSI, RDI, R12, R13, R14, R15 };
+x86IntRegType g_x86tempregs[] = { R8, R9, R10, R11, RDX, RCX };
+
+// arranged in savedreg -> tempreg order
+x86IntRegType g_x86allregs[14] = { RBX, RBP, RSI, RDI, R12, R13, R14, R15, R8, R9, R10, R11, RDX, RCX };
+
+#else
+// standard calling convention
+
+// registers saved by called functions (no need to flush them across calls)
+x86IntRegType g_x86savedregs[] = { RBX, RBP, R12, R13, R14, R15 };
+// temp registers that need to be saved across calls
+x86IntRegType g_x86tempregs[] = { RCX, RDX, R8, R9, R10, R11, RSI, RDI };
+
+// arranged in savedreg -> tempreg order
+x86IntRegType g_x86allregs[14] = { RBX, RBP, R12, R13, R14, R15, RCX, RDX, R8, R9, R10, R11, RSI, RDI };
+
+#endif
+
+x86IntRegType g_x868bitregs[11] = { RBX, R12, R13, R14, R15, RCX, RDX, R8, R9, R10, R11 };
+x86IntRegType g_x86non8bitregs[3] = { RBP, RSI, RDI };
+
+#endif // __x86_64__
+
+s8 *x86Ptr;
+u8 *j8Ptr[32];
+u32 *j32Ptr[32];
+
+void WriteRmOffset(x86IntRegType to, int offset)
+{
+ if( (to&7) == ESP ) {
+ if( offset == 0 ) {
+ ModRM( 0, 0, 4 );
+ ModRM( 0, ESP, 4 );
+ }
+ else if( offset < 128 && offset >= -128 ) {
+ ModRM( 1, 0, 4 );
+ ModRM( 0, ESP, 4 );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, 0, 4 );
+ ModRM( 0, ESP, 4 );
+ write32(offset);
+ }
+ }
+ else {
+ if( offset == 0 ) {
+ ModRM( 0, 0, to );
+ }
+ else if( offset < 128 && offset >= -128 ) {
+ ModRM( 1, 0, to );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, 0, to );
+ write32(offset);
+ }
+ }
+}
+
+void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset)
+{
+ if ((from&7) == ESP) {
+ if( offset == 0 ) {
+ ModRM( 0, to, 0x4 );
+ SibSB( 0, 0x4, 0x4 );
+ }
+ else if( offset < 128 && offset >= -128 ) {
+ ModRM( 1, to, 0x4 );
+ SibSB( 0, 0x4, 0x4 );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, to, 0x4 );
+ SibSB( 0, 0x4, 0x4 );
+ write32(offset);
+ }
+ }
+ else {
+ if( offset == 0 ) {
+ ModRM( 0, to, from );
+ }
+ else if( offset < 128 && offset >= -128 ) {
+ ModRM( 1, to, from );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, to, from );
+ write32(offset);
+ }
+ }
+}
+
+// This function is just for rec debugging purposes
+void CheckX86Ptr( void )
+{
+}
+
+void writeVAROP(unsigned opl, u64 op)
+{
+ while (opl--)
+ {
+ write8(op & 0xFF);
+ op >>= 8;
+ }
+}
+
+#define writeVARROP(REX, opl, op) ({ \
+ if (opl > 1 && ((op & 0xFF) == 0x66 || (op & 0xFF) == 0xF3 || (op & 0xFF) == 0xF2)) { \
+ write8(op & 0xFF); \
+ opl --; \
+ op >>= 8; \
+ } \
+ REX; \
+ writeVAROP(opl, op); \
+ })
+
+void MEMADDR_OP(bool w, unsigned opl, u64 op, bool isreg, int reg, uptr p, sptr off)
+{
+#ifdef __x86_64__
+ sptr pr = MEMADDR_(p, 5 + opl + (w || reg >= 8) + off);
+ if (SPTR32(pr))
+ {
+ writeVARROP(RexR(w, reg), opl, op);
+ ModRM(0, reg, DISP32);
+ write32(pr);
+ }
+ else if (UPTR32(p))
+ {
+ writeVARROP(RexR(w, reg), opl, op);
+ ModRM(0, reg, SIB);
+ SibSB(0, SIB, DISP32);
+ write32(p);
+ }
+ else
+ {
+ assert(!isreg || reg != X86_TEMP);
+ MOV64ItoR(X86_TEMP, p);
+ writeVARROP(RexRB(w, reg, X86_TEMP), opl, op);
+ ModRM(0, reg, X86_TEMP);
+ }
+#else
+ writeVARROP(RexR(w, reg), opl, op);
+ ModRM(0, reg, DISP32);
+ write32(p);
+#endif
+}
+
+void SET8R( int cc, int to )
+{
+ RexB(0, to);
+ write8( 0x0F );
+ write8( cc );
+ write8( 0xC0 | ( to ) );
+}
+
+u8* J8Rel( int cc, int to )
+{
+ write8( cc );
+ write8( to );
+ return x86Ptr - 1;
+}
+
+u16* J16Rel( int cc, u32 to )
+{
+ write16( 0x0F66 );
+ write8( cc );
+ write16( to );
+ return (u16*)( x86Ptr - 2 );
+}
+
+u32* J32Rel( int cc, u32 to )
+{
+ write8( 0x0F );
+ write8( cc );
+ write32( to );
+ return (u32*)( x86Ptr - 4 );
+}
+
+void CMOV32RtoR( int cc, int to, int from )
+{
+ RexRB(0,to, from);
+ write8( 0x0F );
+ write8( cc );
+ ModRM( 3, to, from );
+}
+
+void CMOV32MtoR( int cc, x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, cc), true, to, from, 0);
+}
+
+////////////////////////////////////////////////////
+void x86SetPtr( char* ptr )
+{
+ x86Ptr = ptr;
+}
+
+////////////////////////////////////////////////////
+void x86Shutdown( void )
+{
+}
+
+////////////////////////////////////////////////////
+void x86SetJ8( u8* j8 )
+{
+ u32 jump = ( x86Ptr - (s8*)j8 ) - 1;
+
+ if ( jump > 0x7f ) {
+ assert(0);
+ SysPrintf( "j8 greater than 0x7f!!\n" );
+ }
+ *j8 = (u8)jump;
+}
+
+void x86SetJ8A( u8* j8 )
+{
+ u32 jump = ( x86Ptr - (s8*)j8 ) - 1;
+
+ if ( jump > 0x7f ) {
+ assert(0);
+ SysPrintf( "j8 greater than 0x7f!!\n" );
+ }
+
+ if( ((uptr)x86Ptr&0xf) > 4 ) {
+
+ uptr newjump = jump + 16-((uptr)x86Ptr&0xf);
+
+ if( newjump <= 0x7f ) {
+ jump = newjump;
+ while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
+ }
+ }
+ *j8 = (u8)jump;
+}
+
+void x86SetJ16( u16 *j16 )
+{
+ // doesn't work
+ u32 jump = ( x86Ptr - (s8*)j16 ) - 2;
+
+ if ( jump > 0x7fff ) {
+ assert(0);
+ SysPrintf( "j16 greater than 0x7fff!!\n" );
+ }
+ *j16 = (u16)jump;
+}
+
+void x86SetJ16A( u16 *j16 )
+{
+ if( ((uptr)x86Ptr&0xf) > 4 ) {
+ while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
+ }
+ x86SetJ16(j16);
+}
+
+////////////////////////////////////////////////////
+void x86SetJ32( u32* j32 )
+{
+ *j32 = ( x86Ptr - (s8*)j32 ) - 4;
+}
+
+void x86SetJ32A( u32* j32 )
+{
+ while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90;
+ x86SetJ32(j32);
+}
+
+////////////////////////////////////////////////////
+void x86Align( int bytes )
+{
+ // fordward align
+ x86Ptr = (s8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) );
+}
+
+/********************/
+/* IX86 intructions */
+/********************/
+
+void STC( void )
+{
+ write8( 0xF9 );
+}
+
+void CLC( void )
+{
+ write8( 0xF8 );
+}
+
+////////////////////////////////////
+// mov instructions /
+////////////////////////////////////
+
+/* mov r64 to r64 */
+void MOV64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x89 );
+ ModRM( 3, from, to );
+}
+
+/* mov r64 to m64 */
+void MOV64RtoM( uptr to, x86IntRegType from )
+{
+ if (from == RAX)
+ {
+ RexR(1, 0);
+ write8(0xA3);
+ write64(to);
+ }
+ else
+ {
+ MEMADDR_OP(1, VAROP1(0x89), true, from, to, 0);
+ }
+}
+
+/* mov m64 to r64 */
+void MOV64MtoR( x86IntRegType to, uptr from )
+{
+ if (to == RAX)
+ {
+ RexR(1, 0);
+ write8(0xA1);
+ write64(from);
+ }
+ else
+ {
+ MEMADDR_OP(1, VAROP1(0x8B), true, to, from, 0);
+ }
+}
+
+/* mov imm32 to m64 */
+void MOV64I32toM(uptr to, u32 from )
+{
+ MEMADDR_OP(1, VAROP1(0xC7), false, 0, to, 4);
+ write32(from);
+}
+
+// mov imm64 to r64
+void MOV64ItoR( x86IntRegType to, u64 from)
+{
+ RexB(1, to);
+ write8( 0xB8 | (to & 0x7) );
+ write64( from );
+}
+
+/* mov imm32 to r64 */
+void MOV64I32toR( x86IntRegType to, s32 from )
+{
+ RexB(1, to);
+ write8( 0xC7 );
+ ModRM( 0, 0, to );
+ write32( from );
+}
+
+// mov imm64 to [r64+off]
+void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset)
+{
+ RexB(1,to);
+ write8( 0xC7 );
+ WriteRmOffset(to, offset);
+ write32(from);
+}
+
+// mov [r64+offset] to r64
+void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(1, to, from);
+ write8( 0x8B );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+/* mov [r64][r64*scale] to r64 */
+void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) {
+ RexRXB(1, to, from2, from);
+ write8( 0x8B );
+ ModRM( 0, to, 0x4 );
+ SibSB(scale, from2, from );
+}
+
+/* mov r64 to [r64+offset] */
+void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(1,from,to);
+ write8( 0x89 );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/* mov r64 to [r64][r64*scale] */
+void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) {
+ RexRXB(1, to, from2, from);
+ write8( 0x89 );
+ ModRM( 0, to, 0x4 );
+ SibSB(scale, from2, from );
+}
+
+
+/* mov r32 to r32 */
+void MOV32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, from, to);
+ write8( 0x89 );
+ ModRM( 3, from, to );
+}
+
+/* mov r32 to m32 */
+void MOV32RtoM( uptr to, x86IntRegType from )
+{
+ if (from == EAX)
+ {
+ write8(0xA3);
+ write64(to);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0x89), true, from, to, 0);
+ }
+}
+
+/* mov m32 to r32 */
+void MOV32MtoR( x86IntRegType to, uptr from )
+{
+ if (to == RAX)
+ {
+ write8(0xA1);
+ write64(from);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0x8B), true, to, from, 0);
+ }
+}
+
+/* mov [r32] to r32 */
+void MOV32RmtoR( x86IntRegType to, x86IntRegType from ) {
+ RexRB(0, to, from);
+ write8(0x8B);
+ WriteRmOffsetFrom(to, from, 0);
+}
+
+void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) {
+ RexRB(0, to, from);
+ write8( 0x8B );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+/* mov [r32+r32*scale] to r32 */
+void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) {
+ RexRXB(0,to,from2,from);
+ write8( 0x8B );
+ ModRM( 0, to, 0x4 );
+ SibSB(scale, from2, from );
+}
+
+// mov r32 to [r32<<scale+from2]
+void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int from2, int scale )
+{
+ RexRXB(0,to,from1,0);
+ write8( 0x8B );
+ ModRM( 0, to, 0x4 );
+ ModRM( scale, from1, 5);
+ write32(from2);
+}
+
+/* mov r32 to [r32] */
+void MOV32RtoRm( x86IntRegType to, x86IntRegType from ) {
+ RexRB(0, from, to);
+ if ((to&7) == ESP) {
+ write8( 0x89 );
+ ModRM( 0, from, 0x4 );
+ SibSB( 0, 0x4, 0x4 );
+ } else {
+ write8( 0x89 );
+ ModRM( 0, from, to );
+ }
+}
+
+/* mov r32 to [r32][r32*scale] */
+void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) {
+ RexRXB(0, to, from2, from);
+ write8( 0x89 );
+ ModRM( 0, to, 0x4 );
+ SibSB(scale, from2, from );
+}
+
+/* mov imm32 to r32 */
+void MOV32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0, to);
+ write8( 0xB8 | (to & 0x7) );
+ write32( from );
+}
+
+/* mov imm32 to m32 */
+void MOV32ItoM(uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0xC7), false, 0, to, 4);
+ write32(from);
+}
+
+// mov imm32 to [r32+off]
+void MOV32ItoRmOffset( x86IntRegType to, u32 from, int offset)
+{
+ RexB(0,to);
+ write8( 0xC7 );
+ WriteRmOffset(to, offset);
+ write32(from);
+}
+
+// mov r32 to [r32+off]
+void MOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset)
+{
+ RexRB(0,from,to);
+ write8( 0x89 );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/* mov r16 to m16 */
+void MOV16RtoM(uptr to, x86IntRegType from )
+{
+ if (from == EAX)
+ {
+ write8(0x66);
+ write8(0xA3);
+ write64(to);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP2(0x66, 0x89), true, from, to, 0);
+ }
+}
+
+/* mov m16 to r16 */
+void MOV16MtoR( x86IntRegType to, uptr from )
+{
+ if (to == EAX)
+ {
+ write8(0x66);
+ write8(0xA1);
+ write64(from);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP2(0x66, 0x8B), true, to, from, 0);
+ }
+}
+
+void MOV16RmtoR( x86IntRegType to, x86IntRegType from)
+{
+ write8( 0x66 );
+ RexRB(0,to,from);
+ write8( 0x8B );
+ WriteRmOffsetFrom(to, from, 0);
+}
+
+void MOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ write8( 0x66 );
+ RexRB(0,to,from);
+ write8( 0x8B );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale )
+{
+ write8(0x66);
+ RexRXB(0,to,from1,0);
+ write8( 0x8B );
+ ModRM( 0, to, 0x4 );
+ ModRM( scale, from1, 5);
+ write32(from2);
+}
+
+void MOV16RtoRm(x86IntRegType to, x86IntRegType from)
+{
+ write8( 0x66 );
+ RexRB(0,from,to);
+ write8( 0x89 );
+ ModRM( 0, from, to );
+}
+
+/* mov imm16 to m16 */
+void MOV16ItoM( uptr to, u16 from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0xC7), false, 0, to, 2);
+ write16( from );
+}
+
+/* mov r16 to [r32][r32*scale] */
+void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) {
+ write8( 0x66 );
+ RexRXB(0,to,from2,from);
+ write8( 0x89 );
+ ModRM( 0, to, 0x4 );
+ SibSB(scale, from2, from );
+}
+
+void MOV16ItoR( x86IntRegType to, u16 from )
+{
+ RexB(0, to);
+ write16( 0xB866 | ((to & 0x7)<<8) );
+ write16( from );
+}
+
+// mov imm16 to [r16+off]
+void MOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset)
+{
+ write8(0x66);
+ RexB(0,to);
+ write8( 0xC7 );
+ WriteRmOffset(to, offset);
+ write16(from);
+}
+
+// mov r16 to [r16+off]
+void MOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset)
+{
+ write8(0x66);
+ RexRB(0,from,to);
+ write8( 0x89 );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/* mov r8 to m8 */
+void MOV8RtoM( uptr to, x86IntRegType from )
+{
+ if (from == EAX)
+ {
+ write8(0xA2);
+ write64(to);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0x88), true, from, to, 0);
+ }
+}
+
+/* mov m8 to r8 */
+void MOV8MtoR( x86IntRegType to, uptr from )
+{
+ if (to == EAX)
+ {
+ write8(0xA0);
+ write64(from);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0x8A), true, to, from, 0);
+ }
+}
+
+/* mov [r32] to r8 */
+void MOV8RmtoR(x86IntRegType to, x86IntRegType from)
+{
+ RexRB(0,to,from);
+ write8( 0x8A );
+ WriteRmOffsetFrom(to, from, 0);
+}
+
+void MOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset)
+{
+ RexRB(0,to,from);
+ write8( 0x8A );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void MOV8RtoRm(x86IntRegType to, x86IntRegType from)
+{
+ RexRB(0,from,to);
+ write8( 0x88 );
+ WriteRmOffsetFrom(from, to, 0);
+}
+
+/* mov imm8 to m8 */
+void MOV8ItoM( uptr to, u8 from )
+{
+ MEMADDR_OP(0, VAROP1(0xC6), false, 0, to, 1);
+ write8( from );
+}
+
+// mov imm8 to r8
+void MOV8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0, to);
+ write8( 0xB0 | (to & 0x7) );
+ write8( from );
+}
+
+// mov imm8 to [r8+off]
+void MOV8ItoRmOffset( x86IntRegType to, u8 from, int offset)
+{
+ assert( to != ESP );
+ RexB(0,to);
+ write8( 0xC6 );
+ WriteRmOffset(to,offset);
+ write8(from);
+}
+
+// mov r8 to [r8+off]
+void MOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset)
+{
+ assert( to != ESP );
+ RexRB(0,from,to);
+ write8( 0x88 );
+ WriteRmOffsetFrom(from,to,offset);
+}
+
+/* movsx r8 to r32 */
+void MOVSX32R8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xBE0F );
+ ModRM( 3, to, from );
+}
+
+void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xBE0F );
+ ModRM( 0, to, from );
+}
+
+void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(0,to,from);
+ write16( 0xBE0F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movsx m8 to r32 */
+void MOVSX32M8toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xBE), true, to, from, 0);
+}
+
+/* movsx r16 to r32 */
+void MOVSX32R16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xBF0F );
+ ModRM( 3, to, from );
+}
+
+void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xBF0F );
+ ModRM( 0, to, from );
+}
+
+void MOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(0,to,from);
+ write16( 0xBF0F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movsx m16 to r32 */
+void MOVSX32M16toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xBF), true, to, from, 0);
+}
+
+/* movzx r8 to r32 */
+void MOVZX32R8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xB60F );
+ ModRM( 3, to, from );
+}
+
+void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xB60F );
+ ModRM( 0, to, from );
+}
+
+void MOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(0,to,from);
+ write16( 0xB60F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movzx m8 to r32 */
+void MOVZX32M8toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xB6), true, to, from, 0);
+}
+
+/* movzx r16 to r32 */
+void MOVZX32R16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xB70F );
+ ModRM( 3, to, from );
+}
+
+void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xB70F );
+ ModRM( 0, to, from );
+}
+
+void MOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(0,to,from);
+ write16( 0xB70F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movzx m16 to r32 */
+void MOVZX32M16toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xB7), true, to, from, 0);
+}
+
+#ifdef __x86_64__
+
+/* movzx r8 to r64 */
+void MOVZX64R8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1,to,from);
+ write16( 0xB60F );
+ ModRM( 3, to, from );
+}
+
+void MOVZX64Rm8toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1,to,from);
+ write16( 0xB60F );
+ ModRM( 0, to, from );
+}
+
+void MOVZX64Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(1,to,from);
+ write16( 0xB60F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movzx m8 to r64 */
+void MOVZX64M8toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP2(0x0F, 0xB6), true, to, from, 0);
+}
+
+/* movzx r16 to r64 */
+void MOVZX64R16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1,to,from);
+ write16( 0xB70F );
+ ModRM( 3, to, from );
+}
+
+void MOVZX64Rm16toR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1,to,from);
+ write16( 0xB70F );
+ ModRM( 0, to, from );
+}
+
+void MOVZX64Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset )
+{
+ RexRB(1,to,from);
+ write16( 0xB70F );
+ WriteRmOffsetFrom(to,from,offset);
+}
+
+/* movzx m16 to r64 */
+void MOVZX64M16toR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP2(0x0F, 0xB7), true, to, from, 0);
+}
+#endif
+
+/* cmovbe r32 to r32 */
+void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x46, to, from );
+}
+
+/* cmovbe m32 to r32*/
+void CMOVBE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x46, to, from );
+}
+
+/* cmovb r32 to r32 */
+void CMOVB32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x42, to, from );
+}
+
+/* cmovb m32 to r32*/
+void CMOVB32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x42, to, from );
+}
+
+/* cmovae r32 to r32 */
+void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x43, to, from );
+}
+
+/* cmovae m32 to r32*/
+void CMOVAE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x43, to, from );
+}
+
+/* cmova r32 to r32 */
+void CMOVA32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x47, to, from );
+}
+
+/* cmova m32 to r32*/
+void CMOVA32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x47, to, from );
+}
+
+/* cmovo r32 to r32 */
+void CMOVO32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x40, to, from );
+}
+
+/* cmovo m32 to r32 */
+void CMOVO32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x40, to, from );
+}
+
+/* cmovp r32 to r32 */
+void CMOVP32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4A, to, from );
+}
+
+/* cmovp m32 to r32 */
+void CMOVP32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4A, to, from );
+}
+
+/* cmovs r32 to r32 */
+void CMOVS32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x48, to, from );
+}
+
+/* cmovs m32 to r32 */
+void CMOVS32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x48, to, from );
+}
+
+/* cmovno r32 to r32 */
+void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x41, to, from );
+}
+
+/* cmovno m32 to r32 */
+void CMOVNO32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x41, to, from );
+}
+
+/* cmovnp r32 to r32 */
+void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4B, to, from );
+}
+
+/* cmovnp m32 to r32 */
+void CMOVNP32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4B, to, from );
+}
+
+/* cmovns r32 to r32 */
+void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x49, to, from );
+}
+
+/* cmovns m32 to r32 */
+void CMOVNS32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x49, to, from );
+}
+
+/* cmovne r32 to r32 */
+void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x45, to, from );
+}
+
+/* cmovne m32 to r32*/
+void CMOVNE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x45, to, from );
+}
+
+/* cmove r32 to r32*/
+void CMOVE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x44, to, from );
+}
+
+/* cmove m32 to r32*/
+void CMOVE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x44, to, from );
+}
+
+/* cmovg r32 to r32*/
+void CMOVG32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4F, to, from );
+}
+
+/* cmovg m32 to r32*/
+void CMOVG32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4F, to, from );
+}
+
+/* cmovge r32 to r32*/
+void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4D, to, from );
+}
+
+/* cmovge m32 to r32*/
+void CMOVGE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4D, to, from );
+}
+
+/* cmovl r32 to r32*/
+void CMOVL32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4C, to, from );
+}
+
+/* cmovl m32 to r32*/
+void CMOVL32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4C, to, from );
+}
+
+/* cmovle r32 to r32*/
+void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ CMOV32RtoR( 0x4E, to, from );
+}
+
+/* cmovle m32 to r32*/
+void CMOVLE32MtoR( x86IntRegType to, uptr from )
+{
+ CMOV32MtoR( 0x4E, to, from );
+}
+
+////////////////////////////////////
+// arithmetic instructions /
+////////////////////////////////////
+
+/* add imm32 to r64 */
+void ADD64ItoR( x86IntRegType to, u32 from )
+{
+ RexB(1, to);
+ if (from <= 0x7f)
+ {
+ write8(0x83);
+ ModRM( 3, 0, to );
+ write8(from);
+ }
+ else
+ {
+ if (to == RAX) {
+ write8( 0x05 );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 0, to );
+ }
+ write32( from );
+ }
+}
+
+/* add m64 to r64 */
+void ADD64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x03), true, to, from, 0);
+}
+
+/* add r64 to r64 */
+void ADD64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x01 );
+ ModRM( 3, from, to );
+}
+
+/* add imm32 to r32 */
+void ADD32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0, to);
+ if ( to == EAX) {
+ write8( 0x05 );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 0, to );
+ }
+ write32( from );
+}
+
+/* add imm32 to m32 */
+void ADD32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 0, to, 4);
+ write32(from);
+}
+
+// add imm32 to [r32+off]
+void ADD32ItoRmOffset( x86IntRegType to, u32 from, int offset)
+{
+ RexB(0,to);
+ write8( 0x81 );
+ WriteRmOffset(to,offset);
+ write32(from);
+}
+
+/* add r32 to r32 */
+void ADD32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, from, to);
+ write8( 0x01 );
+ ModRM( 3, from, to );
+}
+
+/* add r32 to m32 */
+void ADD32RtoM(uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x01), true, from, to, 0);
+}
+
+/* add m32 to r32 */
+void ADD32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x03), true, to, from, 0);
+}
+
+// add r16 to r16
+void ADD16RtoR( x86IntRegType to , x86IntRegType from )
+{
+ write8(0x66);
+ RexRB(0,to,from);
+ write8( 0x03 );
+ ModRM( 3, to, from );
+}
+
+/* add imm16 to r16 */
+void ADD16ItoR( x86IntRegType to, u16 from )
+{
+ write8( 0x66 );
+ RexB(0,to);
+ if ( to == EAX)
+ {
+ write8( 0x05 );
+ }
+ else
+ {
+ write8( 0x81 );
+ ModRM( 3, 0, to );
+ }
+ write16( from );
+}
+
+/* add imm16 to m16 */
+void ADD16ItoM( uptr to, u16 from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 0, to, 2);
+ write16( from );
+}
+
+/* add r16 to m16 */
+void ADD16RtoM(uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x01), true, from, to, 0);
+}
+
+/* add m16 to r16 */
+void ADD16MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x03), true, to, from, 0);
+}
+
+// add m8 to r8
+void ADD8MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x02), true, to, from, 0);
+}
+
+/* adc imm32 to r32 */
+void ADC32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x15 );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 2, to );
+ }
+ write32( from );
+}
+
+/* adc imm32 to m32 */
+void ADC32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 2, to, 4);
+ write32(from);
+}
+
+/* adc r32 to r32 */
+void ADC32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x11 );
+ ModRM( 3, from, to );
+}
+
+/* adc m32 to r32 */
+void ADC32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x13), true, to, from, 0);
+}
+
+// adc r32 to m32
+void ADC32RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x11), true, from, to, 0);
+}
+
+
+#ifdef __x86_64__
+void INC32R( x86IntRegType to )
+{
+ write8( 0xFF );
+ ModRM(3,0,to);
+}
+#else
+/* inc r32 */
+void INC32R( x86IntRegType to )
+{
+ X86_64ASSERT();
+ write8( 0x40 + to );
+}
+#endif
+/* inc m32 */
+void INC32M( uptr to )
+{
+ MEMADDR_OP(0, VAROP1(0xFF), false, 0, to, 0);
+}
+
+/* inc r16 */
+void INC16R( x86IntRegType to )
+{
+ X86_64ASSERT();
+ write8( 0x66 );
+ write8( 0x40 + to );
+}
+
+/* inc m16 */
+void INC16M( uptr to )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0xFF), false, 0, to, 0);
+}
+
+
+/* sub imm32 to r64 */
+void SUB64ItoR( x86IntRegType to, u32 from )
+{
+ RexB(1, to);
+ if (from <= 0x7f)
+ {
+ write8(0x83);
+ ModRM( 3, 5, to );
+ write8(from);
+ }
+ else
+ {
+ if ( to == RAX ) {
+ write8( 0x2D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 5, to );
+ }
+ write32( from );
+ }
+}
+
+/* sub r64 to r64 */
+void SUB64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x29 );
+ ModRM( 3, from, to );
+}
+
+/* sub m64 to r64 */
+void SUB64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x2B), true, to, from, 0);
+}
+
+/* sub imm32 to r32 */
+void SUB32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x2D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 5, to );
+ }
+ write32( from );
+}
+
+/* sub imm32 to m32 */
+void SUB32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 5, to, 4);
+ write32(from);
+}
+
+/* sub r32 to r32 */
+void SUB32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, from, to);
+ write8( 0x29 );
+ ModRM( 3, from, to );
+}
+
+/* sub m32 to r32 */
+void SUB32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x2B), true, to, from, 0);
+}
+
+// sub r32 to m32
+void SUB32RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x29), true, from, to, 0);
+}
+
+// sub r16 to r16
+void SUB16RtoR( x86IntRegType to, u16 from )
+{
+ write8(0x66);
+ RexRB(0,to,from);
+ write8( 0x2b );
+ ModRM( 3, to, from );
+}
+
+/* sub imm16 to r16 */
+void SUB16ItoR( x86IntRegType to, u16 from ) {
+ write8( 0x66 );
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x2D );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 5, to );
+ }
+ write16( from );
+}
+
+/* sub imm16 to m16 */
+void SUB16ItoM( uptr to, u16 from ) {
+ MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 5, to, 2);
+ write16( from );
+}
+
+/* sub m16 to r16 */
+void SUB16MtoR( x86IntRegType to, uptr from ) {
+ MEMADDR_OP(0, VAROP2(0x66, 0x2B), true, to, from, 0);
+}
+
+/* sbb r64 to r64 */
+void SBB64RtoR( x86IntRegType to, x86IntRegType from ) {
+ RexRB(1, from,to);
+ write8( 0x19 );
+ ModRM( 3, from, to );
+}
+
+/* sbb imm32 to r32 */
+void SBB32ItoR( x86IntRegType to, u32 from ) {
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x1D );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 3, to );
+ }
+ write32( from );
+}
+
+/* sbb imm32 to m32 */
+void SBB32ItoM( uptr to, u32 from ) {
+ MEMADDR_OP(0, VAROP1(0x81), false, 3, to, 4);
+ write32( from );
+}
+
+/* sbb r32 to r32 */
+void SBB32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x19 );
+ ModRM( 3, from, to );
+}
+
+/* sbb m32 to r32 */
+void SBB32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x1B), true, to, from, 0);
+}
+
+/* sbb r32 to m32 */
+void SBB32RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x19), true, from, to, 0);
+}
+
+#ifdef __x86_64__
+void DEC32R( x86IntRegType to )
+{
+ write8( 0xFF );
+ ModRM(3,1,to);
+}
+#else
+/* dec r32 */
+void DEC32R( x86IntRegType to )
+{
+ X86_64ASSERT();
+ write8( 0x48 + to );
+}
+#endif
+
+/* dec m32 */
+void DEC32M( uptr to )
+{
+ MEMADDR_OP(0, VAROP1(0xFF), false, 1, to, 0);
+}
+
+/* dec r16 */
+void DEC16R( x86IntRegType to )
+{
+ X86_64ASSERT();
+ write8( 0x66 );
+ write8( 0x48 + to );
+}
+
+/* dec m16 */
+void DEC16M( uptr to )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0xFF), false, 1, to, 0);
+}
+
+/* mul eax by r32 to edx:eax */
+void MUL32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 4, from );
+}
+
+/* imul eax by r32 to edx:eax */
+void IMUL32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 5, from );
+}
+
+/* mul eax by m32 to edx:eax */
+void MUL32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 4, from, 0);
+}
+
+/* imul eax by m32 to edx:eax */
+void IMUL32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 5, from, 0);
+}
+
+/* imul r32 by r32 to r32 */
+void IMUL32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write16( 0xAF0F );
+ ModRM( 3, to, from );
+}
+
+/* div eax by r32 to edx:eax */
+void DIV32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 6, from );
+}
+
+/* idiv eax by r32 to edx:eax */
+void IDIV32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 7, from );
+}
+
+/* div eax by m32 to edx:eax */
+void DIV32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 6, from, 0);
+}
+
+/* idiv eax by m32 to edx:eax */
+void IDIV32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 7, from, 0);
+}
+
+////////////////////////////////////
+// shifting instructions /
+////////////////////////////////////
+
+/* shl imm8 to r64 */
+void SHL64ItoR( x86IntRegType to, u8 from )
+{
+ RexB(1, to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ ModRM( 3, 4, to );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 4, to );
+ write8( from );
+}
+
+/* shl cl to r64 */
+void SHL64CLtoR( x86IntRegType to )
+{
+ RexB(1, to);
+ write8( 0xD3 );
+ ModRM( 3, 4, to );
+}
+
+/* shr imm8 to r64 */
+void SHR64ItoR( x86IntRegType to, u8 from )
+{
+ RexB(1,to);
+ if ( from == 1 ) {
+ write8( 0xD1 );
+ ModRM( 3, 5, to );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 5, to );
+ write8( from );
+}
+
+/* shr cl to r64 */
+void SHR64CLtoR( x86IntRegType to )
+{
+ RexB(1, to);
+ write8( 0xD3 );
+ ModRM( 3, 5, to );
+}
+
+/* shl imm8 to r32 */
+void SHL32ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0, to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ write8( 0xE0 | (to & 0x7) );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 4, to );
+ write8( from );
+}
+
+/* shl imm8 to m32 */
+void SHL32ItoM( uptr to, u8 from )
+{
+ if ( from == 1 )
+ {
+ MEMADDR_OP(0, VAROP1(0xD1), false, 4, to, 0);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0xC1), false, 4, to, 1);
+ write8( from );
+ }
+}
+
+/* shl cl to r32 */
+void SHL32CLtoR( x86IntRegType to )
+{
+ RexB(0,to);
+ write8( 0xD3 );
+ ModRM( 3, 4, to );
+}
+
+// shl imm8 to r16
+void SHL16ItoR( x86IntRegType to, u8 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ write8( 0xE0 | (to & 0x7) );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 4, to );
+ write8( from );
+}
+
+// shl imm8 to r8
+void SHL8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD0 );
+ write8( 0xE0 | (to & 0x7) );
+ return;
+ }
+ write8( 0xC0 );
+ ModRM( 3, 4, to );
+ write8( from );
+}
+
+/* shr imm8 to r32 */
+void SHR32ItoR( x86IntRegType to, u8 from ) {
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ write8( 0xE8 | (to & 0x7) );
+ }
+ else
+ {
+ write8( 0xC1 );
+ ModRM( 3, 5, to );
+ write8( from );
+ }
+}
+
+/* shr imm8 to m32 */
+void SHR32ItoM( uptr to, u8 from )
+{
+ if ( from == 1 )
+ {
+ MEMADDR_OP(0, VAROP1(0xD1), false, 5, to, 0);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0xC1), false, 5, to, 1);
+ write8( from );
+ }
+}
+
+/* shr cl to r32 */
+void SHR32CLtoR( x86IntRegType to )
+{
+ RexB(0,to);
+ write8( 0xD3 );
+ ModRM( 3, 5, to );
+}
+
+// shr imm8 to r8
+void SHR8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD0 );
+ write8( 0xE8 | (to & 0x7) );
+ }
+ else
+ {
+ write8( 0xC0 );
+ ModRM( 3, 5, to );
+ write8( from );
+ }
+}
+
+/* sar imm8 to r64 */
+void SAR64ItoR( x86IntRegType to, u8 from )
+{
+ RexB(1,to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ ModRM( 3, 7, to );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 7, to );
+ write8( from );
+}
+
+/* sar cl to r64 */
+void SAR64CLtoR( x86IntRegType to )
+{
+ RexB(1, to);
+ write8( 0xD3 );
+ ModRM( 3, 7, to );
+}
+
+/* sar imm8 to r32 */
+void SAR32ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ ModRM( 3, 7, to );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 7, to );
+ write8( from );
+}
+
+/* sar imm8 to m32 */
+void SAR32ItoM( uptr to, u8 from )
+{
+ if (from == 1)
+ {
+ MEMADDR_OP(0, VAROP1(0xD1), false, 7, to, 0);
+ }
+ else
+ {
+ MEMADDR_OP(0, VAROP1(0xC1), false, 7, to, 1);
+ write8( from );
+ }
+}
+
+/* sar cl to r32 */
+void SAR32CLtoR( x86IntRegType to )
+{
+ RexB(0,to);
+ write8( 0xD3 );
+ ModRM( 3, 7, to );
+}
+
+// sar imm8 to r16
+void SAR16ItoR( x86IntRegType to, u8 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( from == 1 )
+ {
+ write8( 0xD1 );
+ ModRM( 3, 7, to );
+ return;
+ }
+ write8( 0xC1 );
+ ModRM( 3, 7, to );
+ write8( from );
+}
+
+void ROR32ItoR( x86IntRegType to,u8 from )
+{
+ RexB(0,to);
+ if ( from == 1 ) {
+ write8( 0xd1 );
+ write8( 0xc8 | to );
+ }
+ else
+ {
+ write8( 0xc1 );
+ write8( 0xc8 | to );
+ write8( from );
+ }
+}
+
+void RCR32ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( from == 1 ) {
+ write8( 0xd1 );
+ write8( 0xd8 | to );
+ }
+ else
+ {
+ write8( 0xc1 );
+ write8( 0xd8 | to );
+ write8( from );
+ }
+}
+
+// shld imm8 to r32
+void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift )
+{
+ RexRB(0,from,to);
+ write8( 0x0F );
+ write8( 0xA4 );
+ ModRM( 3, from, to );
+ write8( shift );
+}
+
+// shrd imm8 to r32
+void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift )
+{
+ RexRB(0,from,to);
+ write8( 0x0F );
+ write8( 0xAC );
+ ModRM( 3, from, to );
+ write8( shift );
+}
+
+////////////////////////////////////
+// logical instructions /
+////////////////////////////////////
+
+/* or imm32 to r32 */
+void OR64ItoR( x86IntRegType to, u32 from )
+{
+ RexB(1, to);
+ if ( to == EAX ) {
+ write8( 0x0D );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 1, to );
+ }
+ write32( from );
+}
+
+/* or m64 to r64 */
+void OR64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x0B), true, to, from, 0);
+}
+
+/* or r64 to r64 */
+void OR64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x09 );
+ ModRM( 3, from, to );
+}
+
+// or r32 to m64
+void OR64RtoM(uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(1, VAROP1(0x09), true, from, to, 0);
+}
+
+/* or imm32 to r32 */
+void OR32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x0D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 1, to );
+ }
+ write32( from );
+}
+
+/* or imm32 to m32 */
+void OR32ItoM(uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 1, to, 4);
+ write32(from);
+}
+
+/* or r32 to r32 */
+void OR32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x09 );
+ ModRM( 3, from, to );
+}
+
+/* or r32 to m32 */
+void OR32RtoM(uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x09), true, from, to, 0);
+}
+
+/* or m32 to r32 */
+void OR32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x0B), true, to, from, 0);
+}
+
+// or r16 to r16
+void OR16RtoR( x86IntRegType to, x86IntRegType from )
+{
+ write8(0x66);
+ RexRB(0,from,to);
+ write8( 0x09 );
+ ModRM( 3, from, to );
+}
+
+// or imm16 to r16
+void OR16ItoR( x86IntRegType to, u16 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x0D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 1, to );
+ }
+ write16( from );
+}
+
+// or imm16 to m316
+void OR16ItoM( uptr to, u16 from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 1, to, 2);
+ write16( from );
+}
+
+/* or m16 to r16 */
+void OR16MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x0B), true, to, from, 0);
+}
+
+// or r16 to m16
+void OR16RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x09), true, from, to, 0);
+}
+
+// or r8 to r8
+void OR8RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x08 );
+ ModRM( 3, from, to );
+}
+
+// or r8 to m8
+void OR8RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x08), true, from, to, 0);
+}
+
+// or imm8 to m8
+void OR8ItoM( uptr to, u8 from )
+{
+ MEMADDR_OP(0, VAROP1(0x80), false, 1, to, 1);
+ write8( from );
+}
+
+// or m8 to r8
+void OR8MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x0A), true, to, from, 0);
+}
+
+/* xor imm32 to r64 */
+void XOR64ItoR( x86IntRegType to, u32 from )
+{
+ RexB(1,to);
+ if ( to == EAX ) {
+ write8( 0x35 );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 6, to );
+ }
+ write32( from );
+}
+
+/* xor r64 to r64 */
+void XOR64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x31 );
+ ModRM( 3, from, to );
+}
+
+/* xor m64 to r64 */
+void XOR64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x33), true, to, from, 0);
+}
+
+/* xor r64 to m64 */
+void XOR64RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(1, VAROP1(0x31), true, from, to, 0);
+}
+
+/* xor imm32 to r32 */
+void XOR32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x35 );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 6, to );
+ }
+ write32( from );
+}
+
+/* xor imm32 to m32 */
+void XOR32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 6, to, 4);
+ write32( from );
+}
+
+/* xor r32 to r32 */
+void XOR32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x31 );
+ ModRM( 3, from, to );
+}
+
+/* xor r16 to r16 */
+void XOR16RtoR( x86IntRegType to, x86IntRegType from )
+{
+ write8( 0x66 );
+ RexRB(0,from,to);
+ write8( 0x31 );
+ ModRM( 3, from, to );
+}
+
+/* xor r32 to m32 */
+void XOR32RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x31), true, from, to, 0);
+}
+
+/* xor m32 to r32 */
+void XOR32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x33), true, to, from, 0);
+}
+
+// xor imm16 to r16
+void XOR16ItoR( x86IntRegType to, u16 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x35 );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 6, to );
+ }
+ write16( from );
+}
+
+// xor r16 to m16
+void XOR16RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x31), true, from, to, 0);
+}
+
+/* and imm32 to r64 */
+void AND64I32toR( x86IntRegType to, u32 from )
+{
+ RexB(1, to);
+ if ( to == EAX ) {
+ write8( 0x25 );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 0x4, to );
+ }
+ write32( from );
+}
+
+/* and m64 to r64 */
+void AND64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x23), true, to, from, 0);
+}
+
+/* and r64 to m64 */
+void AND64RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(1, VAROP1(0x21), true, from, to, 0);
+}
+
+/* and r64 to r64 */
+void AND64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x21 );
+ ModRM( 3, from, to );
+}
+
+/* and imm32 to m64 */
+void AND64I32toM( uptr to, u32 from )
+{
+ MEMADDR_OP(1, VAROP1(0x81), false, 4, to, 4);
+ write32( from );
+}
+
+/* and imm32 to r32 */
+void AND32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x25 );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 0x4, to );
+ }
+ write32( from );
+}
+
+/* and sign ext imm8 to r32 */
+void AND32I8toR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ write8( 0x83 );
+ ModRM( 3, 0x4, to );
+ write8( from );
+}
+
+/* and imm32 to m32 */
+void AND32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 4, to, 4);
+ write32(from);
+}
+
+/* and sign ext imm8 to m32 */
+void AND32I8toM( uptr to, u8 from )
+{
+ MEMADDR_OP(0, VAROP1(0x83), false, 4, to, 1);
+ write8( from );
+}
+
+/* and r32 to r32 */
+void AND32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x21 );
+ ModRM( 3, from, to );
+}
+
+/* and r32 to m32 */
+void AND32RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x21), true, from, to, 0);
+}
+
+/* and m32 to r32 */
+void AND32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x23), true, to, from, 0);
+}
+
+// and r16 to r16
+void AND16RtoR( x86IntRegType to, x86IntRegType from )
+{
+ write8(0x66);
+ RexRB(0,to,from);
+ write8( 0x23 );
+ ModRM( 3, to, from );
+}
+
+/* and imm16 to r16 */
+void AND16ItoR( x86IntRegType to, u16 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x25 );
+ } else {
+ write8( 0x81 );
+ ModRM( 3, 0x4, to );
+ }
+ write16( from );
+}
+
+/* and imm16 to m16 */
+void AND16ItoM( uptr to, u16 from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 4, to, 2);
+ write16( from );
+}
+
+/* and r16 to m16 */
+void AND16RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x21), true, from, to, 0);
+}
+
+/* and m16 to r16 */
+void AND16MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x23), true, to, from, 0);
+}
+
+/* and imm8 to r8 */
+void AND8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x24 );
+ } else {
+ write8( 0x80 );
+ ModRM( 3, 0x4, to );
+ }
+ write8( from );
+}
+
+/* and imm8 to m8 */
+void AND8ItoM( uptr to, u8 from )
+{
+ MEMADDR_OP(0, VAROP1(0x80), false, 4, to, 1);
+ write8( from );
+}
+
+// and r8 to r8
+void AND8RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,to,from);
+ write8( 0x22 );
+ ModRM( 3, to, from );
+}
+
+/* and r8 to m8 */
+void AND8RtoM( uptr to, x86IntRegType from )
+{
+ MEMADDR_OP(0, VAROP1(0x20), true, from, to, 0);
+}
+
+/* and m8 to r8 */
+void AND8MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x22), true, to, from, 0);
+}
+
+/* not r64 */
+void NOT64R( x86IntRegType from )
+{
+ RexB(1, from);
+ write8( 0xF7 );
+ ModRM( 3, 2, from );
+}
+
+/* not r32 */
+void NOT32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 2, from );
+}
+
+// not m32
+void NOT32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 2, from, 0);
+}
+
+/* neg r64 */
+void NEG64R( x86IntRegType from )
+{
+ RexB(1, from);
+ write8( 0xF7 );
+ ModRM( 3, 3, from );
+}
+
+/* neg r32 */
+void NEG32R( x86IntRegType from )
+{
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 3, from );
+}
+
+void NEG32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 3, from, 0);
+}
+
+/* neg r16 */
+void NEG16R( x86IntRegType from )
+{
+ write8( 0x66 );
+ RexB(0,from);
+ write8( 0xF7 );
+ ModRM( 3, 3, from );
+}
+
+////////////////////////////////////
+// jump instructions /
+////////////////////////////////////
+
+u8* JMP( uptr to ) {
+ uptr jump = ( x86Ptr - (s8*)to ) - 1;
+
+ if ( jump > 0x7f ) {
+ assert( to <= 0xffffffff );
+ return (u8*)JMP32( to );
+ } else {
+ return (u8*)JMP8( to );
+ }
+}
+
+/* jmp rel8 */
+u8* JMP8( u8 to )
+{
+ write8( 0xEB );
+ write8( to );
+ return x86Ptr - 1;
+}
+
+/* jmp rel32 */
+u32* JMP32( uptr to )
+{
+ assert(SPTR32((sptr)to));
+ write8( 0xE9 );
+ write32( (sptr)to );
+ return (u32*)(x86Ptr - 4 );
+}
+
+/* jmp r32/r64 */
+void JMPR( x86IntRegType to )
+{
+ RexB(0, to);
+ write8( 0xFF );
+ ModRM( 3, 4, to );
+}
+
+// jmp m32
+void JMP32M( uptr to )
+{
+ /* FIXME */
+ MEMADDR_OP(0, VAROP1(0xFF), false, 4, to, 0);
+}
+
+/* jp rel8 */
+u8* JP8( u8 to ) {
+ return J8Rel( 0x7A, to );
+}
+
+/* jnp rel8 */
+u8* JNP8( u8 to ) {
+ return J8Rel( 0x7B, to );
+}
+
+/* je rel8 */
+u8* JE8( u8 to ) {
+ return J8Rel( 0x74, to );
+}
+
+/* jz rel8 */
+u8* JZ8( u8 to )
+{
+ return J8Rel( 0x74, to );
+}
+
+/* js rel8 */
+u8* JS8( u8 to )
+{
+ return J8Rel( 0x78, to );
+}
+
+/* jns rel8 */
+u8* JNS8( u8 to )
+{
+ return J8Rel( 0x79, to );
+}
+
+/* jg rel8 */
+u8* JG8( u8 to )
+{
+ return J8Rel( 0x7F, to );
+}
+
+/* jge rel8 */
+u8* JGE8( u8 to )
+{
+ return J8Rel( 0x7D, to );
+}
+
+/* jl rel8 */
+u8* JL8( u8 to )
+{
+ return J8Rel( 0x7C, to );
+}
+
+/* ja rel8 */
+u8* JA8( u8 to )
+{
+ return J8Rel( 0x77, to );
+}
+
+u8* JAE8( u8 to )
+{
+ return J8Rel( 0x73, to );
+}
+
+/* jb rel8 */
+u8* JB8( u8 to )
+{
+ return J8Rel( 0x72, to );
+}
+
+/* jbe rel8 */
+u8* JBE8( u8 to )
+{
+ return J8Rel( 0x76, to );
+}
+
+/* jle rel8 */
+u8* JLE8( u8 to )
+{
+ return J8Rel( 0x7E, to );
+}
+
+/* jne rel8 */
+u8* JNE8( u8 to )
+{
+ return J8Rel( 0x75, to );
+}
+
+/* jnz rel8 */
+u8* JNZ8( u8 to )
+{
+ return J8Rel( 0x75, to );
+}
+
+/* jng rel8 */
+u8* JNG8( u8 to )
+{
+ return J8Rel( 0x7E, to );
+}
+
+/* jnge rel8 */
+u8* JNGE8( u8 to )
+{
+ return J8Rel( 0x7C, to );
+}
+
+/* jnl rel8 */
+u8* JNL8( u8 to )
+{
+ return J8Rel( 0x7D, to );
+}
+
+/* jnle rel8 */
+u8* JNLE8( u8 to )
+{
+ return J8Rel( 0x7F, to );
+}
+
+/* jo rel8 */
+u8* JO8( u8 to )
+{
+ return J8Rel( 0x70, to );
+}
+
+/* jno rel8 */
+u8* JNO8( u8 to )
+{
+ return J8Rel( 0x71, to );
+}
+
+// jb rel8
+u16* JB16( u16 to )
+{
+ return J16Rel( 0x82, to );
+}
+
+// jb rel32
+u32* JB32( u32 to )
+{
+ return J32Rel( 0x82, to );
+}
+
+/* je rel32 */
+u32* JE32( u32 to )
+{
+ return J32Rel( 0x84, to );
+}
+
+/* jz rel32 */
+u32* JZ32( u32 to )
+{
+ return J32Rel( 0x84, to );
+}
+
+/* jg rel32 */
+u32* JG32( u32 to )
+{
+ return J32Rel( 0x8F, to );
+}
+
+/* jge rel32 */
+u32* JGE32( u32 to )
+{
+ return J32Rel( 0x8D, to );
+}
+
+/* jl rel32 */
+u32* JL32( u32 to )
+{
+ return J32Rel( 0x8C, to );
+}
+
+/* jle rel32 */
+u32* JLE32( u32 to )
+{
+ return J32Rel( 0x8E, to );
+}
+
+/* jae rel32 */
+u32* JAE32( u32 to )
+{
+ return J32Rel( 0x83, to );
+}
+
+/* jne rel32 */
+u32* JNE32( u32 to )
+{
+ return J32Rel( 0x85, to );
+}
+
+/* jnz rel32 */
+u32* JNZ32( u32 to )
+{
+ return J32Rel( 0x85, to );
+}
+
+/* jng rel32 */
+u32* JNG32( u32 to )
+{
+ return J32Rel( 0x8E, to );
+}
+
+/* jnge rel32 */
+u32* JNGE32( u32 to )
+{
+ return J32Rel( 0x8C, to );
+}
+
+/* jnl rel32 */
+u32* JNL32( u32 to )
+{
+ return J32Rel( 0x8D, to );
+}
+
+/* jnle rel32 */
+u32* JNLE32( u32 to )
+{
+ return J32Rel( 0x8F, to );
+}
+
+/* jo rel32 */
+u32* JO32( u32 to )
+{
+ return J32Rel( 0x80, to );
+}
+
+/* jno rel32 */
+u32* JNO32( u32 to )
+{
+ return J32Rel( 0x81, to );
+}
+
+// js rel32
+u32* JS32( u32 to )
+{
+ return J32Rel( 0x88, to );
+}
+
+
+/* call func */
+void CALLFunc( uptr func )
+{
+ sptr p = MEMADDR_(func, 5);
+ if (SPTR32(p))
+ {
+ CALL32(p);
+ }
+ else
+ {
+ MOV64ItoR(X86_TEMP, func);
+ CALL64R(X86_TEMP);
+ }
+}
+
+/* call rel32 */
+void CALL32( s32 to )
+{
+ write8( 0xE8 );
+ write32( to );
+}
+
+/* call r32 */
+void CALL32R( x86IntRegType to )
+{
+ RexB(0, to);
+ write8( 0xFF );
+ ModRM( 3, 2, to );
+}
+
+/* call r64 */
+void CALL64R( x86IntRegType to )
+{
+ RexB(0, to);
+ write8( 0xFF );
+ ModRM( 3, 2, to );
+}
+
+////////////////////////////////////
+// misc instructions /
+////////////////////////////////////
+
+/* cmp imm32 to r64 */
+void CMP64I32toR( x86IntRegType to, u32 from )
+{
+ RexB(1, to);
+ if ( to == EAX ) {
+ write8( 0x3D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 7, to );
+ }
+ write32( from );
+}
+
+/* cmp m64 to r64 */
+void CMP64MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(1, VAROP1(0x3B), true, 2, from, 0);
+}
+
+// cmp r64 to r64
+void CMP64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1,from,to);
+ write8( 0x39 );
+ ModRM( 3, from, to );
+}
+
+/* cmp imm32 to r32 */
+void CMP32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX ) {
+ write8( 0x3D );
+ }
+ else {
+ write8( 0x81 );
+ ModRM( 3, 7, to );
+ }
+ write32( from );
+}
+
+/* cmp imm32 to m32 */
+void CMP32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0x81), false, 7, to, 4);
+ write32(from);
+}
+
+/* cmp r32 to r32 */
+void CMP32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x39 );
+ ModRM( 3, from, to );
+}
+
+/* cmp m32 to r32 */
+void CMP32MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x3B), true, to, from, 0);
+}
+
+// cmp imm8 to [r32]
+void CMP32I8toRm( x86IntRegType to, u8 from)
+{
+ RexB(0,to);
+ write8( 0x83 );
+ ModRM( 0, 7, to );
+ write8(from);
+}
+
+// cmp imm32 to [r32+off]
+void CMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off)
+{
+ RexB(0,to);
+ write8( 0x83 );
+ ModRM( 1, 7, to );
+ write8(off);
+ write8(from);
+}
+
+// cmp imm8 to [r32]
+void CMP32I8toM( uptr to, u8 from)
+{
+ MEMADDR_OP(0, VAROP1(0x83), false, 7, to, 1);
+ write8( from );
+}
+
+/* cmp imm16 to r16 */
+void CMP16ItoR( x86IntRegType to, u16 from )
+{
+ write8( 0x66 );
+ RexB(0,to);
+ if ( to == EAX )
+ {
+ write8( 0x3D );
+ }
+ else
+ {
+ write8( 0x81 );
+ ModRM( 3, 7, to );
+ }
+ write16( from );
+}
+
+/* cmp imm16 to m16 */
+void CMP16ItoM( uptr to, u16 from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x81), false, 7, to, 2);
+ write16( from );
+}
+
+/* cmp r16 to r16 */
+void CMP16RtoR( x86IntRegType to, x86IntRegType from )
+{
+ write8( 0x66 );
+ RexRB(0,from,to);
+ write8( 0x39 );
+ ModRM( 3, from, to );
+}
+
+/* cmp m16 to r16 */
+void CMP16MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x66, 0x3B), true, to, from, 0);
+}
+
+// cmp imm8 to r8
+void CMP8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( to == EAX )
+ {
+ write8( 0x3C );
+ }
+ else
+ {
+ write8( 0x80 );
+ ModRM( 3, 7, to );
+ }
+ write8( from );
+}
+
+// cmp m8 to r8
+void CMP8MtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0x3A), true, to, from, 0);
+}
+
+/* test r64 to r64 */
+void TEST64RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(1, from, to);
+ write8( 0x85 );
+ ModRM( 3, from, to );
+}
+
+/* test imm32 to r32 */
+void TEST32ItoR( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ if ( to == EAX )
+ {
+ write8( 0xA9 );
+ }
+ else
+ {
+ write8( 0xF7 );
+ ModRM( 3, 0, to );
+ }
+ write32( from );
+}
+
+void TEST32ItoM( uptr to, u32 from )
+{
+ MEMADDR_OP(0, VAROP1(0xF7), false, 0, to, 4);
+ write32( from );
+}
+
+/* test r32 to r32 */
+void TEST32RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0,from,to);
+ write8( 0x85 );
+ ModRM( 3, from, to );
+}
+
+// test imm32 to [r32]
+void TEST32ItoRm( x86IntRegType to, u32 from )
+{
+ RexB(0,to);
+ write8( 0xF7 );
+ ModRM( 0, 0, to );
+ write32(from);
+}
+
+// test imm16 to r16
+void TEST16ItoR( x86IntRegType to, u16 from )
+{
+ write8(0x66);
+ RexB(0,to);
+ if ( to == EAX )
+ {
+ write8( 0xA9 );
+ }
+ else
+ {
+ write8( 0xF7 );
+ ModRM( 3, 0, to );
+ }
+ write16( from );
+}
+
+// test r16 to r16
+void TEST16RtoR( x86IntRegType to, x86IntRegType from )
+{
+ write8(0x66);
+ RexRB(0,from,to);
+ write16( 0x85 );
+ ModRM( 3, from, to );
+}
+
+// test imm8 to r8
+void TEST8ItoR( x86IntRegType to, u8 from )
+{
+ RexB(0,to);
+ if ( to == EAX )
+ {
+ write8( 0xA8 );
+ }
+ else
+ {
+ write8( 0xF6 );
+ ModRM( 3, 0, to );
+ }
+ write8( from );
+}
+
+// test imm8 to r8
+void TEST8ItoM( uptr to, u8 from )
+{
+ MEMADDR_OP(0, VAROP1(0xF6), false, 0, to, 1);
+ write8( from );
+}
+
+/* sets r8 */
+void SETS8R( x86IntRegType to )
+{
+ SET8R( 0x98, to );
+}
+
+/* setl r8 */
+void SETL8R( x86IntRegType to )
+{
+ SET8R( 0x9C, to );
+}
+
+// setge r8
+void SETGE8R( x86IntRegType to ) { SET8R(0x9d, to); }
+// setg r8
+void SETG8R( x86IntRegType to ) { SET8R(0x9f, to); }
+// seta r8
+void SETA8R( x86IntRegType to ) { SET8R(0x97, to); }
+// setae r8
+void SETAE8R( x86IntRegType to ) { SET8R(0x99, to); }
+/* setb r8 */
+void SETB8R( x86IntRegType to ) { SET8R( 0x92, to ); }
+/* setb r8 */
+void SETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); }
+// setz r8
+void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); }
+// sete r8
+void SETE8R( x86IntRegType to ) { SET8R(0x94, to); }
+
+/* push imm32 */
+void PUSH32I( u32 from )
+{
+ //X86_64ASSERT(); //becomes sign extended in x86_64
+ write8( 0x68 );
+ write32( from );
+}
+
+#ifdef __x86_64__
+
+/* push r64 */
+void PUSH64R( x86IntRegType from )
+{
+ RexB(0,from);
+ //write8( 0x51 | from );
+ write8( 0x50 | from );
+}
+
+/* push m64 */
+void PUSH64M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xFF), false, 6, from, 0);
+}
+
+/* pop r64 */
+void POP64R( x86IntRegType from ) {
+ RexB(0,from);
+ //write8( 0x59 | from );
+ write8( 0x58 | from );
+}
+
+void PUSHR(x86IntRegType from) { PUSH64R(from); }
+void POPR(x86IntRegType from) { POP64R(from); }
+
+#else
+
+/* push r32 */
+void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); }
+
+/* push m32 */
+void PUSH32M( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xFF), false, 6, from, 0);
+}
+
+/* pop r32 */
+void POP32R( x86IntRegType from ) { write8( 0x58 | from ); }
+
+/* pushad */
+void PUSHA32( void ) { write8( 0x60 ); }
+
+/* popad */
+void POPA32( void ) { write8( 0x61 ); }
+
+void PUSHR(x86IntRegType from) { PUSH32R(from); }
+void POPR(x86IntRegType from) { POP32R(from); }
+
+#endif
+
+
+/* pushfd */
+void PUSHFD( void ) { write8( 0x9C ); }
+/* popfd */
+void POPFD( void ) { write8( 0x9D ); }
+
+void RET( void ) { write8( 0xC3 ); }
+void RET2( void ) { write16( 0xc3f3 ); }
+
+void CBW( void ) { write16( 0x9866 ); }
+void CWD( void ) { write8( 0x98 ); }
+void CDQ( void ) { write8( 0x99 ); }
+void CWDE() { write8(0x98); }
+
+#ifdef __x86_64__
+void CDQE( void ) { RexR(1,0); write8( 0x98 ); }
+#endif
+
+void LAHF() { write8(0x9f); }
+void SAHF() { write8(0x9e); }
+
+void BT32ItoR( x86IntRegType to, x86IntRegType from )
+{
+ write16( 0xBA0F );
+ write8( 0xE0 | to );
+ write8( from );
+}
+
+void BSRRtoR(x86IntRegType to, x86IntRegType from)
+{
+ write16( 0xBD0F );
+ ModRM( 3, from, to );
+}
+
+void BSWAP32R( x86IntRegType to )
+{
+ write8( 0x0F );
+ write8( 0xC8 + to );
+}
+
+// to = from + offset
+void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset)
+{
+ write8(0x66);
+ LEA32RtoR(to, from, offset);
+}
+
+void LEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset)
+{
+ RexRB(0,to,from);
+ write8(0x8d);
+
+ if( (from&7) == ESP ) {
+ if( offset == 0 ) {
+ ModRM(1, to, from);
+ write8(0x24);
+ }
+ else if( offset < 128 ) {
+ ModRM(1, to, from);
+ write8(0x24);
+ write8(offset);
+ }
+ else {
+ ModRM(2, to, from);
+ write8(0x24);
+ write32(offset);
+ }
+ }
+ else {
+ if( offset == 0 && from != EBP && from!=ESP ) {
+ ModRM(0, to, from);
+ }
+ else if( offset < 128 ) {
+ ModRM(1, to, from);
+ write8(offset);
+ }
+ else {
+ ModRM(2, to, from);
+ write32(offset);
+ }
+ }
+}
+
+// to = from0 + from1
+void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
+{
+ write8(0x66);
+ LEA32RRtoR(to, from0, from1);
+}
+
+void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1)
+{
+ RexRXB(0, to, from0, from1);
+ write8(0x8d);
+
+ if( (from1&7) == EBP ) {
+ ModRM(1, to, 4);
+ ModRM(0, from0, from1);
+ write8(0);
+ }
+ else {
+ ModRM(0, to, 4);
+ ModRM(0, from0, from1);
+ }
+}
+
+// to = from << scale (max is 3)
+void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
+{
+ write8(0x66);
+ LEA32RStoR(to, from, scale);
+}
+
+void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale)
+{
+ if( to == from ) {
+ SHL32ItoR(to, scale);
+ return;
+ }
+
+ if( from != ESP ) {
+ RexRXB(0,to,from,0);
+ write8(0x8d);
+ ModRM(0, to, 4);
+ ModRM(scale, from, 5);
+ write32(0);
+ }
+ else {
+ assert( to != ESP );
+ MOV32RtoR(to, from);
+ LEA32RStoR(to, to, scale);
+ }
+}
+
+#endif
diff --git a/libpcsxcore/ix86_64/ix86-64.h b/libpcsxcore/ix86_64/ix86-64.h
new file mode 100644
index 0000000..009fa5a
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86-64.h
@@ -0,0 +1,1776 @@
+/*
+ * ix86 definitions v0.6.2
+ * Authors: linuzappz <linuzappz@pcsx.net>
+ * alexey silinov
+ * goldfinger
+ * shadow < shadow@pcsx2.net >
+ */
+
+#ifndef __IX86_H__
+#define __IX86_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "../psxcommon.h" // Basic types header
+#include <assert.h>
+#include <stdbool.h>
+
+#if defined(__MINGW32__)
+#define PCSX2_ALIGNED16(x) __declspec(align(16)) x
+#else
+#define PCSX2_ALIGNED16(x) x __attribute((aligned(16)))
+#endif
+
+
+#ifdef __x86_64__
+#define XMMREGS 16
+#define X86REGS 16
+#else
+#define XMMREGS 8
+#define X86REGS 8
+#endif
+
+#define MMXREGS 8
+
+#define SIB 4
+#define DISP32 5
+
+// general types
+typedef int x86IntRegType;
+#define EAX 0
+#define EBX 3
+#define ECX 1
+#define EDX 2
+#define ESI 6
+#define EDI 7
+#define EBP 5
+#define ESP 4
+
+#ifdef __x86_64__
+#define RAX 0
+#define RBX 3
+#define RCX 1
+#define RDX 2
+#define RSI 6
+#define RDI 7
+#define RBP 5
+#define RSP 4
+#define R8 8
+#define R9 9
+#define R10 10
+#define R11 11
+#define R12 12
+#define R13 13
+#define R14 14
+#define R15 15
+
+#define X86_TEMP RAX // don't allocate anything
+
+#ifdef _MSC_VER
+extern x86IntRegType g_x86savedregs[8];
+extern x86IntRegType g_x86tempregs[6];
+#else
+extern x86IntRegType g_x86savedregs[6];
+extern x86IntRegType g_x86tempregs[8];
+#endif
+
+extern x86IntRegType g_x86allregs[14]; // all registers that can be used by the recs
+extern x86IntRegType g_x868bitregs[11];
+extern x86IntRegType g_x86non8bitregs[3];
+
+#ifdef _MSC_VER
+#define X86ARG1 RCX
+#define X86ARG2 RDX
+#define X86ARG3 R8
+#define X86ARG4 R9
+#else
+#define X86ARG1 RDI
+#define X86ARG2 RSI
+#define X86ARG3 RDX
+#define X86ARG4 RCX
+#endif
+
+#else
+
+#define X86ARG1 EAX
+#define X86ARG2 ECX
+#define X86ARG3 EDX
+#define X86ARG4 EBX
+
+#endif // __x86_64__
+
+#define MM0 0
+#define MM1 1
+#define MM2 2
+#define MM3 3
+#define MM4 4
+#define MM5 5
+#define MM6 6
+#define MM7 7
+
+typedef int x86MMXRegType;
+
+#define XMM0 0
+#define XMM1 1
+#define XMM2 2
+#define XMM3 3
+#define XMM4 4
+#define XMM5 5
+#define XMM6 6
+#define XMM7 7
+#define XMM8 8
+#define XMM9 9
+#define XMM10 10
+#define XMM11 11
+#define XMM12 12
+#define XMM13 13
+#define XMM14 14
+#define XMM15 15
+
+typedef int x86SSERegType;
+
+typedef enum
+{
+ XMMT_INT = 0, // integer (sse2 only)
+ XMMT_FPS = 1, // floating point
+ //XMMT_FPD = 3, // double
+} XMMSSEType;
+
+extern XMMSSEType g_xmmtypes[XMMREGS];
+
+void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs
+
+//cpu capabilities structure
+typedef struct {
+ u32 hasFloatingPointUnit;
+ u32 hasVirtual8086ModeEnhancements;
+ u32 hasDebuggingExtensions;
+ u32 hasPageSizeExtensions;
+ u32 hasTimeStampCounter;
+ u32 hasModelSpecificRegisters;
+ u32 hasPhysicalAddressExtension;
+ u32 hasCOMPXCHG8BInstruction;
+ u32 hasAdvancedProgrammableInterruptController;
+ u32 hasSEPFastSystemCall;
+ u32 hasMemoryTypeRangeRegisters;
+ u32 hasPTEGlobalFlag;
+ u32 hasMachineCheckArchitecture;
+ u32 hasConditionalMoveAndCompareInstructions;
+ u32 hasFGPageAttributeTable;
+ u32 has36bitPageSizeExtension;
+ u32 hasProcessorSerialNumber;
+ u32 hasCFLUSHInstruction;
+ u32 hasDebugStore;
+ u32 hasACPIThermalMonitorAndClockControl;
+ u32 hasMultimediaExtensions;
+ u32 hasFastStreamingSIMDExtensionsSaveRestore;
+ u32 hasStreamingSIMDExtensions;
+ u32 hasStreamingSIMD2Extensions;
+ u32 hasSelfSnoop;
+ u32 hasHyperThreading;
+ u32 hasThermalMonitor;
+ u32 hasIntel64BitArchitecture;
+ u32 hasStreamingSIMD3Extensions;
+ //that is only for AMDs
+ u32 hasMultimediaExtensionsExt;
+ u32 hasAMD64BitArchitecture;
+ u32 has3DNOWInstructionExtensionsExt;
+ u32 has3DNOWInstructionExtensions;
+} CAPABILITIES;
+
+extern CAPABILITIES cpucaps;
+
+typedef struct {
+
+ u32 x86Family; // Processor Family
+ u32 x86Model; // Processor Model
+ u32 x86PType; // Processor Type
+ u32 x86StepID; // Stepping ID
+ u32 x86Flags; // Feature Flags
+ u32 x86EFlags; // Extended Feature Flags
+ //all the above returns hex values
+ s8 x86ID[16]; // Vendor ID //the vendor creator (in %s)
+ s8 x86Type[20]; //cpu type in char format //the cpu type (in %s)
+ s8 x86Fam[50]; // family in char format //the original cpu name string (in %s)
+ u32 cpuspeed; // speed of cpu //this will give cpu speed (in %d)
+} CPUINFO;
+
+extern CPUINFO cpuinfo;
+
+extern s8 *x86Ptr;
+extern u8 *j8Ptr[32];
+extern u32 *j32Ptr[32];
+
+
+#ifdef __x86_64__
+#define X86_64ASSERT() assert(0)
+#define MEMADDR_(addr, oplen) (sptr)((uptr)(addr) - ((uptr)x86Ptr + ((u64)(oplen))))
+#define SPTR32(addr) ((addr) < 0x80000000L && (addr) >= -0x80000000L)
+#define UPTR32(addr) ((addr) < 0x100000000L)
+#define MEMADDR(addr, oplen) ({ sptr _a = MEMADDR_(addr, oplen); assert(SPTR32(_a)); _a; })
+#else
+#define X86_64ASSERT()
+#define SPTR32(a) 1
+#define UPTR32(a) 1
+#define MEMADDR(addr, oplen) (addr)
+#endif
+
+#ifdef __x86_64__
+#define Rex( w, r, x, b ) write8( 0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b) )
+#else
+#define Rex(w,r,x,b) assert(0)
+#endif
+#define RexRXB(w, reg, index, base) if(w || (reg) >= 8 || (index) >= 8 || (base) >= 8 ) \
+ Rex(w, (reg)>=8, (index)>=8, (base)>=8)
+#define RexR(w, reg) RexRXB(w, reg, 0, 0)
+#define RexB(w, base) RexRXB(w, 0, 0, base)
+#define RexRB(w, reg, base) RexRXB(w, reg, 0, base)
+
+void x86SetPtr( char *ptr );
+void x86Shutdown( void );
+
+void x86SetJ8( u8 *j8 );
+void x86SetJ8A( u8 *j8 );
+void x86SetJ16( u16 *j16 );
+void x86SetJ16A( u16 *j16 );
+void x86SetJ32( u32 *j32 );
+void x86SetJ32A( u32 *j32 );
+
+void x86Align( int bytes );
+u64 GetCPUTick( void );
+
+// General Helper functions
+#define ModRM(mod, rm, reg) write8( ( mod << 6 ) | ( (rm & 7) << 3 ) | ( reg & 7 ) )
+#define SibSB(ss, rm, index) write8( ( ss << 6 ) | ( rm << 3 ) | ( index ) )
+void SET8R( int cc, int to );
+u8* J8Rel( int cc, int to );
+u32* J32Rel( int cc, u32 to );
+void CMOV32RtoR( int cc, int to, int from );
+void CMOV32MtoR( int cc, int to, uptr from );
+
+void MEMADDR_OP(bool w, unsigned opl, u64 op, bool isreg, int reg, uptr p, sptr off);
+
+#define VAROP1(op) 1, op
+#define VAROP2(op1, op2) 2, (op1) | ((op2) << 8)
+
+//******************
+// IX86 intructions
+//******************
+
+//
+// * scale values:
+// * 0 - *1
+// * 1 - *2
+// * 2 - *4
+// * 3 - *8
+//
+
+void STC( void );
+void CLC( void );
+
+////////////////////////////////////
+// mov instructions //
+////////////////////////////////////
+
+// mov r64 to r64
+void MOV64RtoR( x86IntRegType to, x86IntRegType from );
+// mov r64 to m64
+void MOV64RtoM( uptr to, x86IntRegType from );
+// mov m64 to r64
+void MOV64MtoR( x86IntRegType to, uptr from );
+// mov sign ext imm32 to m64
+void MOV64I32toM( uptr to, u32 from );
+// mov sign ext imm32 to r64
+void MOV64I32toR( x86IntRegType to, s32 from);
+// mov imm64 to r64
+void MOV64ItoR( x86IntRegType to, u64 from);
+// mov imm64 to [r64+off]
+void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset);
+// mov [r64+offset] to r64
+void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset );
+// mov [r64][r64*scale] to r64
+void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale);
+// mov r64 to [r64+offset]
+void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset );
+// mov r64 to [r64][r64*scale]
+void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale);
+
+// mov r32 to r32
+void MOV32RtoR( x86IntRegType to, x86IntRegType from );
+// mov r32 to m32
+void MOV32RtoM( uptr to, x86IntRegType from );
+// mov m32 to r32
+void MOV32MtoR( x86IntRegType to, uptr from );
+// mov [r32] to r32
+void MOV32RmtoR( x86IntRegType to, x86IntRegType from );
+void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset );
+// mov [r32][r32<<scale] to r32
+void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale );
+// mov [imm32(from2) + r32(from1)<<scale] to r32
+void MOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int from2, int scale );
+// mov r32 to [r32]
+void MOV32RtoRm( x86IntRegType to, x86IntRegType from );
+// mov r32 to [r32][r32*scale]
+void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale );
+// mov imm32 to r32
+void MOV32ItoR( x86IntRegType to, u32 from );
+// mov imm32 to m32
+void MOV32ItoM( uptr to, u32 from );
+// mov imm32 to [r32+off]
+void MOV32ItoRmOffset( x86IntRegType to, u32 from, int offset);
+// mov r32 to [r32+off]
+void MOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset);
+
+// mov r16 to m16
+void MOV16RtoM( uptr to, x86IntRegType from );
+// mov m16 to r16
+void MOV16MtoR( x86IntRegType to, uptr from );
+// mov [r32] to r16
+void MOV16RmtoR( x86IntRegType to, x86IntRegType from ) ;
+void MOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset );
+// mov [imm32(from2) + r32(from1)<<scale] to r16
+void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale );
+// mov r16 to [r32]
+void MOV16RtoRm(x86IntRegType to, x86IntRegType from);
+// mov imm16 to m16
+void MOV16ItoM( uptr to, u16 from );
+/* mov r16 to [r32][r32*scale] */
+void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale);
+// mov imm16 to r16
+void MOV16ItoR( x86IntRegType to, u16 from );
+// mov imm16 to [r16+off]
+void MOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset);
+// mov r16 to [r16+off]
+void MOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset);
+
+// mov r8 to m8
+void MOV8RtoM( uptr to, x86IntRegType from );
+// mov m8 to r8
+void MOV8MtoR( x86IntRegType to, uptr from );
+// mov [r32] to r8
+void MOV8RmtoR(x86IntRegType to, x86IntRegType from);
+void MOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset);
+// mov r8 to [r32]
+void MOV8RtoRm(x86IntRegType to, x86IntRegType from);
+// mov imm8 to m8
+void MOV8ItoM( uptr to, u8 from );
+// mov imm8 to r8
+void MOV8ItoR( x86IntRegType to, u8 from );
+// mov imm8 to [r8+off]
+void MOV8ItoRmOffset( x86IntRegType to, u8 from, int offset);
+// mov r8 to [r8+off]
+void MOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset);
+
+// movsx r8 to r32
+void MOVSX32R8toR( x86IntRegType to, x86IntRegType from );
+void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from );
+void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movsx m8 to r32
+void MOVSX32M8toR( x86IntRegType to, uptr from );
+// movsx r16 to r32
+void MOVSX32R16toR( x86IntRegType to, x86IntRegType from );
+void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from );
+void MOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movsx m16 to r32
+void MOVSX32M16toR( x86IntRegType to, uptr from );
+
+// movzx r8 to r32
+void MOVZX32R8toR( x86IntRegType to, x86IntRegType from );
+void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from );
+void MOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movzx m8 to r32
+void MOVZX32M8toR( x86IntRegType to, uptr from );
+// movzx r16 to r32
+void MOVZX32R16toR( x86IntRegType to, x86IntRegType from );
+void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from );
+void MOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movzx m16 to r32
+void MOVZX32M16toR( x86IntRegType to, uptr from );
+
+#ifdef __x86_64__
+void MOVZX64R8toR( x86IntRegType to, x86IntRegType from );
+void MOVZX64Rm8toR( x86IntRegType to, x86IntRegType from );
+void MOVZX64Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movzx m8 to r64
+void MOVZX64M8toR( x86IntRegType to, uptr from );
+// movzx r16 to r64
+void MOVZX64R16toR( x86IntRegType to, x86IntRegType from );
+void MOVZX64Rm16toR( x86IntRegType to, x86IntRegType from );
+void MOVZX64Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset );
+// movzx m16 to r64
+void MOVZX64M16toR( x86IntRegType to, uptr from );
+#endif
+
+// cmovbe r32 to r32
+void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovbe m32 to r32
+void CMOVBE32MtoR( x86IntRegType to, uptr from );
+// cmovb r32 to r32
+void CMOVB32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovb m32 to r32
+void CMOVB32MtoR( x86IntRegType to, uptr from );
+// cmovae r32 to r32
+void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovae m32 to r32
+void CMOVAE32MtoR( x86IntRegType to, uptr from );
+// cmova r32 to r32
+void CMOVA32RtoR( x86IntRegType to, x86IntRegType from );
+// cmova m32 to r32
+void CMOVA32MtoR( x86IntRegType to, uptr from );
+
+// cmovo r32 to r32
+void CMOVO32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovo m32 to r32
+void CMOVO32MtoR( x86IntRegType to, uptr from );
+// cmovp r32 to r32
+void CMOVP32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovp m32 to r32
+void CMOVP32MtoR( x86IntRegType to, uptr from );
+// cmovs r32 to r32
+void CMOVS32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovs m32 to r32
+void CMOVS32MtoR( x86IntRegType to, uptr from );
+// cmovno r32 to r32
+void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovno m32 to r32
+void CMOVNO32MtoR( x86IntRegType to, uptr from );
+// cmovnp r32 to r32
+void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovnp m32 to r32
+void CMOVNP32MtoR( x86IntRegType to, uptr from );
+// cmovns r32 to r32
+void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovns m32 to r32
+void CMOVNS32MtoR( x86IntRegType to, uptr from );
+
+// cmovne r32 to r32
+void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovne m32 to r32
+void CMOVNE32MtoR( x86IntRegType to, uptr from );
+// cmove r32 to r32
+void CMOVE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmove m32 to r32
+void CMOVE32MtoR( x86IntRegType to, uptr from );
+// cmovg r32 to r32
+void CMOVG32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovg m32 to r32
+void CMOVG32MtoR( x86IntRegType to, uptr from );
+// cmovge r32 to r32
+void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovge m32 to r32
+void CMOVGE32MtoR( x86IntRegType to, uptr from );
+// cmovl r32 to r32
+void CMOVL32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovl m32 to r32
+void CMOVL32MtoR( x86IntRegType to, uptr from );
+// cmovle r32 to r32
+void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from );
+// cmovle m32 to r32
+void CMOVLE32MtoR( x86IntRegType to, uptr from );
+
+////////////////////////////////////
+// arithmetic instructions //
+////////////////////////////////////
+
+// add imm32 to r64
+void ADD64ItoR( x86IntRegType to, u32 from );
+// add m64 to r64
+void ADD64MtoR( x86IntRegType to, uptr from );
+
+// add imm32 to r32
+void ADD32ItoR( x86IntRegType to, u32 from );
+// add imm32 to m32
+void ADD32ItoM( uptr to, u32 from );
+// add imm32 to [r32+off]
+void ADD32ItoRmOffset( x86IntRegType to, u32 from, int offset);
+// add r32 to r32
+void ADD32RtoR( x86IntRegType to, x86IntRegType from );
+// add r32 to m32
+void ADD32RtoM( uptr to, x86IntRegType from );
+// add m32 to r32
+void ADD32MtoR( x86IntRegType to, uptr from );
+
+// add r16 to r16
+void ADD16RtoR( x86IntRegType to , x86IntRegType from );
+// add imm16 to r16
+void ADD16ItoR( x86IntRegType to, u16 from );
+// add imm16 to m16
+void ADD16ItoM( uptr to, u16 from );
+// add r16 to m16
+void ADD16RtoM( uptr to, x86IntRegType from );
+// add m16 to r16
+void ADD16MtoR( x86IntRegType to, uptr from );
+
+// add m8 to r8
+void ADD8MtoR( x86IntRegType to, uptr from );
+
+// adc imm32 to r32
+void ADC32ItoR( x86IntRegType to, u32 from );
+// adc imm32 to m32
+void ADC32ItoM( uptr to, u32 from );
+// adc r32 to r32
+void ADC32RtoR( x86IntRegType to, x86IntRegType from );
+// adc m32 to r32
+void ADC32MtoR( x86IntRegType to, uptr from );
+// adc r32 to m32
+void ADC32RtoM( uptr to, x86IntRegType from );
+
+// inc r32
+void INC32R( x86IntRegType to );
+// inc m32
+void INC32M( uptr to );
+// inc r16
+void INC16R( x86IntRegType to );
+// inc m16
+void INC16M( uptr to );
+
+// sub m64 to r64
+void SUB64MtoR( x86IntRegType to, uptr from );
+void SUB64ItoR( x86IntRegType to, u32 from );
+
+// sub imm32 to r32
+void SUB32ItoR( x86IntRegType to, u32 from );
+// sub imm32 to m32
+void SUB32ItoM( uptr to, u32 from ) ;
+// sub r32 to r32
+void SUB32RtoR( x86IntRegType to, x86IntRegType from );
+// sub m32 to r32
+void SUB32MtoR( x86IntRegType to, uptr from ) ;
+// sub r32 to m32
+void SUB32RtoM( uptr to, x86IntRegType from );
+// sub r16 to r16
+void SUB16RtoR( x86IntRegType to, u16 from );
+// sub imm16 to r16
+void SUB16ItoR( x86IntRegType to, u16 from );
+// sub imm16 to m16
+void SUB16ItoM( uptr to, u16 from ) ;
+// sub m16 to r16
+void SUB16MtoR( x86IntRegType to, uptr from );
+
+// sbb r64 to r64
+void SBB64RtoR( x86IntRegType to, x86IntRegType from );
+
+// sbb imm32 to r32
+void SBB32ItoR( x86IntRegType to, u32 from );
+// sbb imm32 to m32
+void SBB32ItoM( uptr to, u32 from );
+// sbb r32 to r32
+void SBB32RtoR( x86IntRegType to, x86IntRegType from );
+// sbb m32 to r32
+void SBB32MtoR( x86IntRegType to, uptr from );
+// sbb r32 to m32
+void SBB32RtoM( uptr to, x86IntRegType from );
+
+// dec r32
+void DEC32R( x86IntRegType to );
+// dec m32
+void DEC32M( uptr to );
+// dec r16
+void DEC16R( x86IntRegType to );
+// dec m16
+void DEC16M( uptr to );
+
+// mul eax by r32 to edx:eax
+void MUL32R( x86IntRegType from );
+// mul eax by m32 to edx:eax
+void MUL32M( uptr from );
+
+// imul eax by r32 to edx:eax
+void IMUL32R( x86IntRegType from );
+// imul eax by m32 to edx:eax
+void IMUL32M( uptr from );
+// imul r32 by r32 to r32
+void IMUL32RtoR( x86IntRegType to, x86IntRegType from );
+
+// div eax by r32 to edx:eax
+void DIV32R( x86IntRegType from );
+// div eax by m32 to edx:eax
+void DIV32M( uptr from );
+
+// idiv eax by r32 to edx:eax
+void IDIV32R( x86IntRegType from );
+// idiv eax by m32 to edx:eax
+void IDIV32M( uptr from );
+
+////////////////////////////////////
+// shifting instructions //
+////////////////////////////////////
+
+// shl imm8 to r64
+void SHL64ItoR( x86IntRegType to, u8 from );
+// shl cl to r64
+void SHL64CLtoR( x86IntRegType to );
+// shr imm8 to r64
+void SHR64ItoR( x86IntRegType to, u8 from );
+// shr cl to r64
+void SHR64CLtoR( x86IntRegType to );
+// sar imm8 to r64
+void SAR64ItoR( x86IntRegType to, u8 from );
+// sar cl to r64
+void SAR64CLtoR( x86IntRegType to );
+
+// shl imm8 to r32
+void SHL32ItoR( x86IntRegType to, u8 from );
+/* shl imm8 to m32 */
+void SHL32ItoM( uptr to, u8 from );
+// shl cl to r32
+void SHL32CLtoR( x86IntRegType to );
+
+// shl imm8 to r16
+void SHL16ItoR( x86IntRegType to, u8 from );
+// shl imm8 to r8
+void SHL8ItoR( x86IntRegType to, u8 from );
+
+// shr imm8 to r32
+void SHR32ItoR( x86IntRegType to, u8 from );
+/* shr imm8 to m32 */
+void SHR32ItoM( uptr to, u8 from );
+// shr cl to r32
+void SHR32CLtoR( x86IntRegType to );
+
+// shr imm8 to r8
+void SHR8ItoR( x86IntRegType to, u8 from );
+
+// sar imm8 to r32
+void SAR32ItoR( x86IntRegType to, u8 from );
+// sar imm8 to m32
+void SAR32ItoM( uptr to, u8 from );
+// sar cl to r32
+void SAR32CLtoR( x86IntRegType to );
+
+// sar imm8 to r16
+void SAR16ItoR( x86IntRegType to, u8 from );
+
+// ror imm8 to r32 (rotate right)
+void ROR32ItoR( x86IntRegType to,u8 from );
+
+void RCR32ItoR( x86IntRegType to,u8 from );
+// shld imm8 to r32
+void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift );
+// shrd imm8 to r32
+void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift );
+
+// sal imm8 to r32
+#define SAL32ItoR SHL32ItoR
+// sal cl to r32
+#define SAL32CLtoR SHL32CLtoR
+
+// logical instructions
+
+// or imm32 to r64
+void OR64ItoR( x86IntRegType to, u32 from );
+// or m64 to r64
+void OR64MtoR( x86IntRegType to, uptr from );
+// or r64 to r64
+void OR64RtoR( x86IntRegType to, x86IntRegType from );
+// or r32 to m64
+void OR64RtoM( uptr to, x86IntRegType from );
+
+// or imm32 to r32
+void OR32ItoR( x86IntRegType to, u32 from );
+// or imm32 to m32
+void OR32ItoM( uptr to, u32 from );
+// or r32 to r32
+void OR32RtoR( x86IntRegType to, x86IntRegType from );
+// or r32 to m32
+void OR32RtoM( uptr to, x86IntRegType from );
+// or m32 to r32
+void OR32MtoR( x86IntRegType to, uptr from );
+// or r16 to r16
+void OR16RtoR( x86IntRegType to, x86IntRegType from );
+// or imm16 to r16
+void OR16ItoR( x86IntRegType to, u16 from );
+// or imm16 to m16
+void OR16ItoM( uptr to, u16 from );
+// or m16 to r16
+void OR16MtoR( x86IntRegType to, uptr from );
+// or r16 to m16
+void OR16RtoM( uptr to, x86IntRegType from );
+
+// or r8 to r8
+void OR8RtoR( x86IntRegType to, x86IntRegType from );
+// or r8 to m8
+void OR8RtoM( uptr to, x86IntRegType from );
+// or imm8 to m8
+void OR8ItoM( uptr to, u8 from );
+// or m8 to r8
+void OR8MtoR( x86IntRegType to, uptr from );
+
+// xor imm32 to r64
+void XOR64ItoR( x86IntRegType to, u32 from );
+// xor r64 to r64
+void XOR64RtoR( x86IntRegType to, x86IntRegType from );
+// xor m64 to r64
+void XOR64MtoR( x86IntRegType to, uptr from );
+// xor r64 to r64
+void XOR64RtoR( x86IntRegType to, x86IntRegType from );
+// xor r64 to m64
+void XOR64RtoM( uptr to, x86IntRegType from );
+// xor imm32 to r32
+void XOR32ItoR( x86IntRegType to, u32 from );
+// xor imm32 to m32
+void XOR32ItoM( uptr to, u32 from );
+// xor r32 to r32
+void XOR32RtoR( x86IntRegType to, x86IntRegType from );
+// xor r16 to r16
+void XOR16RtoR( x86IntRegType to, x86IntRegType from );
+// xor r32 to m32
+void XOR32RtoM( uptr to, x86IntRegType from );
+// xor m32 to r32
+void XOR32MtoR( x86IntRegType to, uptr from );
+// xor r16 to m16
+void XOR16RtoM( uptr to, x86IntRegType from );
+// xor imm16 to r16
+void XOR16ItoR( x86IntRegType to, u16 from );
+
+// and imm32 to r64
+void AND64I32toR( x86IntRegType to, u32 from );
+// and m64 to r64
+void AND64MtoR( x86IntRegType to, uptr from );
+// and r64 to m64
+void AND64RtoM( uptr to, x86IntRegType from );
+// and r64 to r64
+void AND64RtoR( x86IntRegType to, x86IntRegType from );
+// and imm32 to m64
+void AND64I32toM( uptr to, u32 from );
+
+// and imm32 to r32
+void AND32ItoR( x86IntRegType to, u32 from );
+// and sign ext imm8 to r32
+void AND32I8toR( x86IntRegType to, u8 from );
+// and imm32 to m32
+void AND32ItoM( uptr to, u32 from );
+// and sign ext imm8 to m32
+void AND32I8toM( uptr to, u8 from );
+// and r32 to r32
+void AND32RtoR( x86IntRegType to, x86IntRegType from );
+// and r32 to m32
+void AND32RtoM( uptr to, x86IntRegType from );
+// and m32 to r32
+void AND32MtoR( x86IntRegType to, uptr from );
+// and r16 to r16
+void AND16RtoR( x86IntRegType to, x86IntRegType from );
+// and imm16 to r16
+void AND16ItoR( x86IntRegType to, u16 from );
+// and imm16 to m16
+void AND16ItoM( uptr to, u16 from );
+// and r16 to m16
+void AND16RtoM( uptr to, x86IntRegType from );
+// and m16 to r16
+void AND16MtoR( x86IntRegType to, uptr from );
+// and imm8 to r8
+void AND8ItoR( x86IntRegType to, u8 from );
+// and imm8 to m32
+void AND8ItoM( uptr to, u8 from );
+// and r8 to m8
+void AND8RtoM( uptr to, x86IntRegType from );
+// and m8 to r8
+void AND8MtoR( x86IntRegType to, uptr from );
+// and r8 to r8
+void AND8RtoR( x86IntRegType to, x86IntRegType from );
+
+// not r64
+void NOT64R( x86IntRegType from );
+// not r32
+void NOT32R( x86IntRegType from );
+// not m32
+void NOT32M( uptr from );
+// neg r64
+void NEG64R( x86IntRegType from );
+// neg r32
+void NEG32R( x86IntRegType from );
+// neg m32
+void NEG32M( uptr from );
+// neg r16
+void NEG16R( x86IntRegType from );
+
+////////////////////////////////////
+// jump instructions //
+////////////////////////////////////
+
+// jmp rel8
+u8* JMP8( u8 to );
+
+// jmp rel32
+u32* JMP32( uptr to );
+// jmp r32 (r64 if __x86_64__)
+void JMPR( x86IntRegType to );
+// jmp m32
+void JMP32M( uptr to );
+
+// jp rel8
+u8* JP8( u8 to );
+// jnp rel8
+u8* JNP8( u8 to );
+// je rel8
+u8* JE8( u8 to );
+// jz rel8
+u8* JZ8( u8 to );
+// jg rel8
+u8* JG8( u8 to );
+// jge rel8
+u8* JGE8( u8 to );
+// js rel8
+u8* JS8( u8 to );
+// jns rel8
+u8* JNS8( u8 to );
+// jl rel8
+u8* JL8( u8 to );
+// ja rel8
+u8* JA8( u8 to );
+// jae rel8
+u8* JAE8( u8 to );
+// jb rel8
+u8* JB8( u8 to );
+// jbe rel8
+u8* JBE8( u8 to );
+// jle rel8
+u8* JLE8( u8 to );
+// jne rel8
+u8* JNE8( u8 to );
+// jnz rel8
+u8* JNZ8( u8 to );
+// jng rel8
+u8* JNG8( u8 to );
+// jnge rel8
+u8* JNGE8( u8 to );
+// jnl rel8
+u8* JNL8( u8 to );
+// jnle rel8
+u8* JNLE8( u8 to );
+// jo rel8
+u8* JO8( u8 to );
+// jno rel8
+u8* JNO8( u8 to );
+
+// jb rel8
+u16* JB16( u16 to );
+
+// jb rel32
+u32* JB32( u32 to );
+// je rel32
+u32* JE32( u32 to );
+// jz rel32
+u32* JZ32( u32 to );
+// jg rel32
+u32* JG32( u32 to );
+// jge rel32
+u32* JGE32( u32 to );
+// jl rel32
+u32* JL32( u32 to );
+// jle rel32
+u32* JLE32( u32 to );
+// jae rel32
+u32* JAE32( u32 to );
+// jne rel32
+u32* JNE32( u32 to );
+// jnz rel32
+u32* JNZ32( u32 to );
+// jng rel32
+u32* JNG32( u32 to );
+// jnge rel32
+u32* JNGE32( u32 to );
+// jnl rel32
+u32* JNL32( u32 to );
+// jnle rel32
+u32* JNLE32( u32 to );
+// jo rel32
+u32* JO32( u32 to );
+// jno rel32
+u32* JNO32( u32 to );
+// js rel32
+u32* JS32( u32 to );
+
+// call func
+void CALLFunc( uptr func);
+// call rel32
+void CALL32( s32 to );
+// call r32
+void CALL32R( x86IntRegType to );
+// call m32
+void CALL64R( x86IntRegType to );
+
+
+////////////////////////////////////
+// misc instructions //
+////////////////////////////////////
+
+// cmp imm32 to r64
+void CMP64I32toR( x86IntRegType to, u32 from );
+// cmp m64 to r64
+void CMP64MtoR( x86IntRegType to, uptr from );
+// cmp r64 to r64
+void CMP64RtoR( x86IntRegType to, x86IntRegType from );
+
+// cmp imm32 to r32
+void CMP32ItoR( x86IntRegType to, u32 from );
+// cmp imm32 to m32
+void CMP32ItoM( uptr to, u32 from );
+// cmp r32 to r32
+void CMP32RtoR( x86IntRegType to, x86IntRegType from );
+// cmp m32 to r32
+void CMP32MtoR( x86IntRegType to, uptr from );
+// cmp imm32 to [r32]
+void CMP32I8toRm( x86IntRegType to, u8 from);
+// cmp imm32 to [r32+off]
+void CMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off);
+// cmp imm8 to [r32]
+void CMP32I8toM( uptr to, u8 from);
+
+// cmp imm16 to r16
+void CMP16ItoR( x86IntRegType to, u16 from );
+// cmp imm16 to m16
+void CMP16ItoM( uptr to, u16 from );
+// cmp r16 to r16
+void CMP16RtoR( x86IntRegType to, x86IntRegType from );
+// cmp m16 to r16
+void CMP16MtoR( x86IntRegType to, uptr from );
+
+// cmp imm8 to r8
+void CMP8ItoR( x86IntRegType to, u8 from );
+// cmp m8 to r8
+void CMP8MtoR( x86IntRegType to, uptr from );
+
+// test r64 to r64
+void TEST64RtoR( x86IntRegType to, x86IntRegType from );
+// test imm32 to r32
+void TEST32ItoR( x86IntRegType to, u32 from );
+// test imm32 to m32
+void TEST32ItoM( uptr to, u32 from );
+// test r32 to r32
+void TEST32RtoR( x86IntRegType to, x86IntRegType from );
+// test imm32 to [r32]
+void TEST32ItoRm( x86IntRegType to, u32 from );
+// test imm16 to r16
+void TEST16ItoR( x86IntRegType to, u16 from );
+// test r16 to r16
+void TEST16RtoR( x86IntRegType to, x86IntRegType from );
+// test imm8 to r8
+void TEST8ItoR( x86IntRegType to, u8 from );
+// test imm8 to r8
+void TEST8ItoM( uptr to, u8 from );
+
+// sets r8
+void SETS8R( x86IntRegType to );
+// setl r8
+void SETL8R( x86IntRegType to );
+// setge r8
+void SETGE8R( x86IntRegType to );
+// setge r8
+void SETG8R( x86IntRegType to );
+// seta r8
+void SETA8R( x86IntRegType to );
+// setae r8
+void SETAE8R( x86IntRegType to );
+// setb r8
+void SETB8R( x86IntRegType to );
+// setnz r8
+void SETNZ8R( x86IntRegType to );
+// setz r8
+void SETZ8R( x86IntRegType to );
+// sete r8
+void SETE8R( x86IntRegType to );
+
+// push imm32
+void PUSH32I( u32 from );
+
+#ifdef __x86_64__
+void PUSHI( u32 from );
+// push r64
+void PUSH64R( x86IntRegType from );
+// push m64
+void PUSH64M( uptr from );
+// pop r32
+void POP64R( x86IntRegType from );
+#else
+// push r32
+void PUSH32R( x86IntRegType from );
+// push m32
+void PUSH32M( u32 from );
+// push imm32
+void PUSH32I( u32 from );
+// pop r32
+void POP32R( x86IntRegType from );
+// pushad
+void PUSHA32( void );
+// popad
+void POPA32( void );
+#endif
+
+void PUSHR(x86IntRegType from);
+void POPR(x86IntRegType from);
+
+// pushfd
+void PUSHFD( void );
+// popfd
+void POPFD( void );
+// ret
+void RET( void );
+// ret (2-byte code used for misprediction)
+void RET2( void );
+
+void CBW();
+void CWDE();
+// cwd
+void CWD( void );
+// cdq
+void CDQ( void );
+// cdqe
+void CDQE( void );
+
+void LAHF();
+void SAHF();
+
+void BT32ItoR( x86IntRegType to, x86IntRegType from );
+void BSRRtoR(x86IntRegType to, x86IntRegType from);
+void BSWAP32R( x86IntRegType to );
+
+// to = from + offset
+void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset);
+void LEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset);
+
+// to = from0 + from1
+void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1);
+void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1);
+
+// to = from << scale (max is 3)
+void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale);
+void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale);
+
+//******************
+// FPU instructions
+//******************
+
+// fild m32 to fpu reg stack
+void FILD32( uptr from );
+// fistp m32 from fpu reg stack
+void FISTP32( uptr from );
+// fld m32 to fpu reg stack
+void FLD32( uptr from );
+// fld st(i)
+void FLD(int st);
+// fld1 (push +1.0f on the stack)
+void FLD1();
+// fld1 (push log_2 e on the stack)
+void FLDL2E();
+// fst m32 from fpu reg stack
+void FST32( uptr to );
+// fstp m32 from fpu reg stack
+void FSTP32( uptr to );
+// fstp st(i)
+void FSTP(int st);
+
+// fldcw fpu control word from m16
+void FLDCW( uptr from );
+// fstcw fpu control word to m16
+void FNSTCW( uptr to );
+void FXAM();
+void FDECSTP();
+// frndint
+void FRNDINT();
+void FXCH(int st);
+void F2XM1();
+void FSCALE();
+
+// fadd ST(src) to fpu reg stack ST(0)
+void FADD32Rto0( x86IntRegType src );
+// fadd ST(0) to fpu reg stack ST(src)
+void FADD320toR( x86IntRegType src );
+// fsub ST(src) to fpu reg stack ST(0)
+void FSUB32Rto0( x86IntRegType src );
+// fsub ST(0) to fpu reg stack ST(src)
+void FSUB320toR( x86IntRegType src );
+// fsubp -> subtract ST(0) from ST(1), store in ST(1) and POP stack
+void FSUBP( void );
+// fmul ST(src) to fpu reg stack ST(0)
+void FMUL32Rto0( x86IntRegType src );
+// fmul ST(0) to fpu reg stack ST(src)
+void FMUL320toR( x86IntRegType src );
+// fdiv ST(src) to fpu reg stack ST(0)
+void FDIV32Rto0( x86IntRegType src );
+// fdiv ST(0) to fpu reg stack ST(src)
+void FDIV320toR( x86IntRegType src );
+// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src)
+void FDIV320toRP( x86IntRegType src );
+
+// fadd m32 to fpu reg stack
+void FADD32( uptr from );
+// fsub m32 to fpu reg stack
+void FSUB32( uptr from );
+// fmul m32 to fpu reg stack
+void FMUL32( uptr from );
+// fdiv m32 to fpu reg stack
+void FDIV32( uptr from );
+// fcomi st, st( i)
+void FCOMI( x86IntRegType src );
+// fcomip st, st( i)
+void FCOMIP( x86IntRegType src );
+// fucomi st, st( i)
+void FUCOMI( x86IntRegType src );
+// fucomip st, st( i)
+void FUCOMIP( x86IntRegType src );
+// fcom m32 to fpu reg stack
+void FCOM32( uptr from );
+// fabs fpu reg stack
+void FABS( void );
+// fsqrt fpu reg stack
+void FSQRT( void );
+// ftan fpu reg stack
+void FPATAN( void );
+// fsin fpu reg stack
+void FSIN( void );
+// fchs fpu reg stack
+void FCHS( void );
+
+// fcmovb fpu reg to fpu reg stack
+void FCMOVB32( x86IntRegType from );
+// fcmove fpu reg to fpu reg stack
+void FCMOVE32( x86IntRegType from );
+// fcmovbe fpu reg to fpu reg stack
+void FCMOVBE32( x86IntRegType from );
+// fcmovu fpu reg to fpu reg stack
+void FCMOVU32( x86IntRegType from );
+// fcmovnb fpu reg to fpu reg stack
+void FCMOVNB32( x86IntRegType from );
+// fcmovne fpu reg to fpu reg stack
+void FCMOVNE32( x86IntRegType from );
+// fcmovnbe fpu reg to fpu reg stack
+void FCMOVNBE32( x86IntRegType from );
+// fcmovnu fpu reg to fpu reg stack
+void FCMOVNU32( x86IntRegType from );
+void FCOMP32( uptr from );
+void FNSTSWtoAX( void );
+
+// probably a little extreme here, but x86-64 should NOT use MMX
+#ifdef __x86_64__
+
+#define MMXONLY(code)
+
+#else
+
+#define MMXONLY(code) code
+
+//******************
+// MMX instructions
+//******************
+
+// r64 = mm
+
+// movq m64 to r64
+void MOVQMtoR( x86MMXRegType to, uptr from );
+// movq r64 to m64
+void MOVQRtoM( uptr to, x86MMXRegType from );
+
+// pand r64 to r64
+void PANDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PANDNRtoR( x86MMXRegType to, x86MMXRegType from );
+// pand m64 to r64 ;
+void PANDMtoR( x86MMXRegType to, uptr from );
+// pandn r64 to r64
+void PANDNRtoR( x86MMXRegType to, x86MMXRegType from );
+// pandn r64 to r64
+void PANDNMtoR( x86MMXRegType to, uptr from );
+// por r64 to r64
+void PORRtoR( x86MMXRegType to, x86MMXRegType from );
+// por m64 to r64
+void PORMtoR( x86MMXRegType to, uptr from );
+// pxor r64 to r64
+void PXORRtoR( x86MMXRegType to, x86MMXRegType from );
+// pxor m64 to r64
+void PXORMtoR( x86MMXRegType to, uptr from );
+
+// psllq r64 to r64
+void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from );
+// psllq m64 to r64
+void PSLLQMtoR( x86MMXRegType to, uptr from );
+// psllq imm8 to r64
+void PSLLQItoR( x86MMXRegType to, u8 from );
+// psrlq r64 to r64
+void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from );
+// psrlq m64 to r64
+void PSRLQMtoR( x86MMXRegType to, uptr from );
+// psrlq imm8 to r64
+void PSRLQItoR( x86MMXRegType to, u8 from );
+
+// paddusb r64 to r64
+void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from );
+// paddusb m64 to r64
+void PADDUSBMtoR( x86MMXRegType to, uptr from );
+// paddusw r64 to r64
+void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from );
+// paddusw m64 to r64
+void PADDUSWMtoR( x86MMXRegType to, uptr from );
+
+// paddb r64 to r64
+void PADDBRtoR( x86MMXRegType to, x86MMXRegType from );
+// paddb m64 to r64
+void PADDBMtoR( x86MMXRegType to, uptr from );
+// paddw r64 to r64
+void PADDWRtoR( x86MMXRegType to, x86MMXRegType from );
+// paddw m64 to r64
+void PADDWMtoR( x86MMXRegType to, uptr from );
+// paddd r64 to r64
+void PADDDRtoR( x86MMXRegType to, x86MMXRegType from );
+// paddd m64 to r64
+void PADDDMtoR( x86MMXRegType to, uptr from );
+void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from );
+void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from );
+
+// paddq m64 to r64 (sse2 only?)
+void PADDQMtoR( x86MMXRegType to, uptr from );
+// paddq r64 to r64 (sse2 only?)
+void PADDQRtoR( x86MMXRegType to, x86MMXRegType from );
+
+void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from );
+
+void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSUBDMtoR( x86MMXRegType to, uptr from );
+
+// psubq m64 to r64 (sse2 only?)
+void PSUBQMtoR( x86MMXRegType to, uptr from );
+// psubq r64 to r64 (sse2 only?)
+void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from );
+
+// pmuludq m64 to r64 (sse2 only?)
+void PMULUDQMtoR( x86MMXRegType to, uptr from );
+// pmuludq r64 to r64 (sse2 only?)
+void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from );
+
+void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPEQDMtoR( x86MMXRegType to, uptr from );
+void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PCMPGTDMtoR( x86MMXRegType to, uptr from );
+void PSRLWItoR( x86MMXRegType to, u8 from );
+void PSRLDItoR( x86MMXRegType to, u8 from );
+void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSLLWItoR( x86MMXRegType to, u8 from );
+void PSLLDItoR( x86MMXRegType to, u8 from );
+void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from );
+void PSRAWItoR( x86MMXRegType to, u8 from );
+void PSRADItoR( x86MMXRegType to, u8 from );
+void PSRADRtoR( x86MMXRegType to, x86MMXRegType from );
+void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from );
+void PUNPCKLDQMtoR( x86MMXRegType to, uptr from );
+void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from );
+void PUNPCKHDQMtoR( x86MMXRegType to, uptr from );
+void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8
+void MOVQRtoR( x86MMXRegType to, x86MMXRegType from );
+void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset );
+void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset );
+void MOVDMtoMMX( x86MMXRegType to, uptr from );
+void MOVDMMXtoM( uptr to, x86MMXRegType from );
+void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from );
+void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from );
+void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset );
+void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from );
+void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from );
+void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset );
+void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 );
+void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8);
+void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8);
+void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from);
+
+// emms
+void EMMS( void );
+
+//**********************************************************************************/
+//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits
+//**********************************************************************************
+void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
+void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from);
+
+void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from);
+
+void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from);
+void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from);
+
+#endif // !__x86_64__
+
+//*********************
+// SSE instructions *
+//*********************
+void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from );
+void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from );
+
+void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from );
+void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from );
+void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from );
+void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from );
+
+void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from );
+void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from );
+void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from );
+void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from );
+void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from );
+void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from );
+void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset );
+
+void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale );
+void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale );
+void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset );
+void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale );
+void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale );
+void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from );
+void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from );
+
+void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset );
+void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset );
+
+void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset );
+void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset );
+
+void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from );
+
+void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+#ifndef __x86_64__
+void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from );
+void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from );
+void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from );
+void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from );
+void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from );
+#endif
+void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from );
+void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from);
+void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from);
+void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from);
+void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from);
+
+void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
+void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
+void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 );
+void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+// VectorPath
+void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
+void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
+
+void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
+void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
+void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
+void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 );
+
+void SSE_STMXCSR( uptr from );
+void SSE_LDMXCSR( uptr from );
+
+
+//*********************
+// SSE 2 Instructions*
+//*********************
+void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from);
+void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from);
+
+void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from);
+void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from);
+
+void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8);
+void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from );
+
+//**********************************************************************************/
+//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
+//**********************************************************************************
+void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from);
+
+//**********************************************************************************/
+//PUNPCKHWD: Unpack 16bit high
+//**********************************************************************************
+void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from);
+
+// mult by half words
+void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from);
+
+
+//**********************************************************************************/
+//PMOVMSKB: Create 16bit mask from signs of 8bit integers
+//**********************************************************************************
+void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from);
+
+void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from);
+void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from);
+
+//**********************************************************************************/
+//PEXTRW,PINSRW: Packed Extract/Insert Word *
+//**********************************************************************************
+void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 );
+void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 );
+
+
+//**********************************************************************************/
+//PSUBx: Subtract Packed Integers *
+//**********************************************************************************
+void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from );
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PCMPxx: Compare Packed Integers *
+//**********************************************************************************
+void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from );
+void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from );
+void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from );
+//**********************************************************************************/
+//MOVD: Move Dword(32bit) to /from XMM reg *
+//**********************************************************************************
+void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from );
+void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from );
+void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from );
+void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from );
+void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from );
+void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from );
+void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+#ifdef __x86_64__
+void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from );
+void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from );
+#endif
+
+//**********************************************************************************/
+//POR : SSE Bitwise OR *
+//**********************************************************************************
+void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from );
+
+void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from);
+void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from);
+//*********************
+// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data
+// Uses g_xmmtypes to infer the correct type.
+//*********************
+void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from );
+void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset );
+void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from );
+void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from );
+void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from );
+void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from );
+void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset );
+
+void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from );
+void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from);
+void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
+void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from);
+
+void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+
+//*********************
+// 3DNOW instructions *
+//*********************
+void FEMMS( void );
+void PFCMPEQMtoR( x86IntRegType to, uptr from );
+void PFCMPGTMtoR( x86IntRegType to, uptr from );
+void PFCMPGEMtoR( x86IntRegType to, uptr from );
+void PFADDMtoR( x86IntRegType to, uptr from );
+void PFADDRtoR( x86IntRegType to, x86IntRegType from );
+void PFSUBMtoR( x86IntRegType to, uptr from );
+void PFSUBRtoR( x86IntRegType to, x86IntRegType from );
+void PFMULMtoR( x86IntRegType to, uptr from );
+void PFMULRtoR( x86IntRegType to, x86IntRegType from );
+void PFRCPMtoR( x86IntRegType to, uptr from );
+void PFRCPRtoR( x86IntRegType to, x86IntRegType from );
+void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from );
+void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from );
+void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from );
+void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from );
+void PF2IDMtoR( x86IntRegType to, uptr from );
+void PF2IDRtoR( x86IntRegType to, x86IntRegType from );
+void PI2FDMtoR( x86IntRegType to, uptr from );
+void PI2FDRtoR( x86IntRegType to, x86IntRegType from );
+void PFMAXMtoR( x86IntRegType to, uptr from );
+void PFMAXRtoR( x86IntRegType to, x86IntRegType from );
+void PFMINMtoR( x86IntRegType to, uptr from );
+void PFMINRtoR( x86IntRegType to, x86IntRegType from );
+
+void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from);
+void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from);
+void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from);
+void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset );
+void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset );
+
+#ifndef __x86_64__
+void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from);
+void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from);
+#endif
+
+/* SSE2 emulated functions for SSE CPU's by kekko*/
+
+void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 );
+void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from );
+void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from );
+void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from );
+void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from );
+void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from );
+
+////////////////////////////////////////////////////
+#ifdef _DEBUG
+#define WRITECHECK() CheckX86Ptr()
+#else
+#define WRITECHECK()
+#endif
+
+#define writeVAL(val) ({ \
+ WRITECHECK(); \
+ *(typeof(val)*)x86Ptr = (val); \
+ x86Ptr += sizeof(val); \
+ (void)0; \
+ })
+
+#define write8(val ) writeVAL((u8)(val))
+#define write16(val ) writeVAL((u16)(val))
+#define write32( val ) writeVAL((u32)(val))
+#define write64( val ) writeVAL((u64)(val))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __IX86_H__
diff --git a/libpcsxcore/ix86_64/ix86_3dnow.c b/libpcsxcore/ix86_64/ix86_3dnow.c
new file mode 100644
index 0000000..8fd4233
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86_3dnow.c
@@ -0,0 +1,178 @@
+// stop compiling if NORECBUILD build (only for Visual Studio)
+#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
+
+#include "ix86-64.h"
+
+/**********************/
+/* 3DNOW instructions */
+/**********************/
+
+/* femms */
+void FEMMS( void )
+{
+ write16( 0x0E0F );
+}
+
+void PFCMPEQMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0xB0 );
+}
+
+void PFCMPGTMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0xA0 );
+}
+
+void PFCMPGEMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x90 );
+}
+
+void PFADDMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x9E );
+}
+
+void PFADDRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x9E );
+}
+
+void PFSUBMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x9A );
+}
+
+void PFSUBRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x9A );
+}
+
+void PFMULMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0xB4 );
+}
+
+void PFMULRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0xB4 );
+}
+
+void PFRCPMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x96 );
+}
+
+void PFRCPRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x96 );
+}
+
+void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0xA6 );
+}
+
+void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0xB6 );
+}
+
+void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x97 );
+}
+
+void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0xA7 );
+}
+
+void PF2IDMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x1D );
+}
+
+void PF2IDRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x1D );
+}
+
+void PI2FDMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x0D );
+}
+
+void PI2FDRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x0D );
+}
+
+void PFMAXMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0xA4 );
+}
+
+void PFMAXRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0xA4 );
+}
+
+void PFMINMtoR( x86IntRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x0F), true, to, from, 1);
+ write8( 0x94 );
+}
+
+void PFMINRtoR( x86IntRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x0F0F );
+ ModRM( 3, to, from );
+ write8( 0x94 );
+}
+
+#endif
diff --git a/libpcsxcore/ix86_64/ix86_cpudetect.c b/libpcsxcore/ix86_64/ix86_cpudetect.c
new file mode 100644
index 0000000..3c014d8
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86_cpudetect.c
@@ -0,0 +1,487 @@
+/* Cpudetection lib
+ * Copyright (C) 2002-2003 Pcsx2 Team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA
+ */
+#if defined (_WIN32)
+#include <windows.h>
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include "ix86-64.h"
+
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ void __cpuid(int* CPUInfo, int InfoType);
+ unsigned __int64 __rdtsc();
+
+ #pragma intrinsic(__cpuid)
+ #pragma intrinsic(__rdtsc)
+
+#endif
+
+CAPABILITIES cpucaps;
+CPUINFO cpuinfo;
+
+#define cpuid(cmd,a,b,c,d) \
+ __asm__ __volatile__("cpuid" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (cmd))
+
+static s32 iCpuId( u32 cmd, u32 *regs )
+{
+ int flag=1;
+
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ __cpuid( regs, cmd );
+
+ return 0;
+
+#elif defined (_MSC_VER)
+
+#ifdef __x86_64__
+ assert(0);
+#else // __x86_64__
+ __asm
+ {
+ push ebx;
+ push edi;
+
+ pushfd;
+ pop eax;
+ mov edx, eax;
+ xor eax, 1 << 21;
+ push eax;
+ popfd;
+ pushfd;
+ pop eax;
+ xor eax, edx;
+ mov flag, eax;
+ }
+ if ( ! flag )
+ {
+ return -1;
+ }
+
+ __asm
+ {
+ mov eax, cmd;
+ cpuid;
+ mov edi, [regs]
+ mov [edi], eax;
+ mov [edi+4], ebx;
+ mov [edi+8], ecx;
+ mov [edi+12], edx;
+
+ pop edi;
+ pop ebx;
+ }
+#endif // __x86_64__
+ return 0;
+
+
+#else
+
+#ifndef __x86_64__
+ // see if we can use cpuid
+ __asm__ __volatile__ (
+ "sub $0x18, %%esp\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%edx\n"
+ "xor $0x200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "xor %%edx, %%eax\n"
+ "mov %%eax, %0\n"
+ "add $0x18, %%esp\n"
+ : "=r"(flag) :
+ );
+#endif
+
+ if ( !flag )
+ return -1;
+
+ cpuid(cmd, regs[0], regs[1], regs[2], regs[3]);
+ return 0;
+#endif // _MSC_VER
+}
+
+u64 GetCPUTick( void )
+{
+#if defined (_MSC_VER) && _MSC_VER >= 1400
+
+ return __rdtsc();
+
+#elif defined(__MSCW32__) && !defined(__x86_64__)
+
+ __asm rdtsc;
+
+#else
+
+ u32 _a, _d;
+ __asm__ __volatile__ ("rdtsc" : "=a"(_a), "=d"(_d));
+ return (u64)_a | ((u64)_d << 32);
+
+#endif
+}
+
+#if defined __LINUX__
+
+#include <sys/time.h>
+#include <errno.h>
+//*
+unsigned long timeGetTime2()
+{
+ struct timeval tv;
+ gettimeofday(&tv, 0); // well, maybe there are better ways
+ return (unsigned long)tv.tv_sec * 1000 + tv.tv_usec/1000; // to do that, but at least it works
+}
+//*/
+#endif
+
+s64 CPUSpeedHz( unsigned int time )
+{
+ s64 timeStart,
+ timeStop;
+ s64 startTick,
+ endTick;
+ s64 overhead;
+
+ if( ! cpucaps.hasTimeStampCounter )
+ {
+ return 0; //check if function is supported
+ }
+
+ overhead = GetCPUTick() - GetCPUTick();
+
+ timeStart = timeGetTime2( );
+ while( timeGetTime2( ) == timeStart )
+ {
+ timeStart = timeGetTime2( );
+ }
+ for(;;)
+ {
+ timeStop = timeGetTime2( );
+ if ( ( timeStop - timeStart ) > 1 )
+ {
+ startTick = GetCPUTick( );
+ break;
+ }
+ }
+
+ timeStart = timeStop;
+ for(;;)
+ {
+ timeStop = timeGetTime2( );
+ if ( ( timeStop - timeStart ) > time )
+ {
+ endTick = GetCPUTick( );
+ break;
+ }
+ }
+
+ return (s64)( ( endTick - startTick ) + ( overhead ) );
+}
+
+////////////////////////////////////////////////////
+void cpudetectInit( void )
+{
+ u32 regs[ 4 ];
+ u32 cmds;
+ u32 AMDspeed;
+ s8 AMDspeedString[10];
+ int cputype=0; // Cpu type
+ //AMD 64 STUFF
+ u32 x86_64_8BITBRANDID;
+ u32 x86_64_12BITBRANDID;
+ memset( cpuinfo.x86ID, 0, sizeof( cpuinfo.x86ID ) );
+ cpuinfo.x86Family = 0;
+ cpuinfo.x86Model = 0;
+ cpuinfo.x86PType = 0;
+ cpuinfo.x86StepID = 0;
+ cpuinfo.x86Flags = 0;
+ cpuinfo.x86EFlags = 0;
+
+ if ( iCpuId( 0, regs ) == -1 ) return;
+
+ cmds = regs[ 0 ];
+ ((u32*)cpuinfo.x86ID)[ 0 ] = regs[ 1 ];
+ ((u32*)cpuinfo.x86ID)[ 1 ] = regs[ 3 ];
+ ((u32*)cpuinfo.x86ID)[ 2 ] = regs[ 2 ];
+ if ( cmds >= 0x00000001 )
+ {
+ if ( iCpuId( 0x00000001, regs ) != -1 )
+ {
+ cpuinfo.x86StepID = regs[ 0 ] & 0xf;
+ cpuinfo.x86Model = (regs[ 0 ] >> 4) & 0xf;
+ cpuinfo.x86Family = (regs[ 0 ] >> 8) & 0xf;
+ cpuinfo.x86PType = (regs[ 0 ] >> 12) & 0x3;
+ x86_64_8BITBRANDID = regs[1] & 0xff;
+ cpuinfo.x86Flags = regs[ 3 ];
+ }
+ }
+ if ( iCpuId( 0x80000000, regs ) != -1 )
+ {
+ cmds = regs[ 0 ];
+ if ( cmds >= 0x80000001 )
+ {
+ if ( iCpuId( 0x80000001, regs ) != -1 )
+ {
+ x86_64_12BITBRANDID = regs[1] & 0xfff;
+ cpuinfo.x86EFlags = regs[ 3 ];
+
+ }
+ }
+ }
+ switch(cpuinfo.x86PType)
+ {
+ case 0:
+ strcpy( cpuinfo.x86Type, "Standard OEM");
+ break;
+ case 1:
+ strcpy( cpuinfo.x86Type, "Overdrive");
+ break;
+ case 2:
+ strcpy( cpuinfo.x86Type, "Dual");
+ break;
+ case 3:
+ strcpy( cpuinfo.x86Type, "Reserved");
+ break;
+ default:
+ strcpy( cpuinfo.x86Type, "Unknown");
+ break;
+ }
+ if ( cpuinfo.x86ID[ 0 ] == 'G' ){ cputype=0;}//trick lines but if you know a way better ;p
+ if ( cpuinfo.x86ID[ 0 ] == 'A' ){ cputype=1;}
+
+ if ( cputype == 0 ) //intel cpu
+ {
+ if( ( cpuinfo.x86Family >= 7 ) && ( cpuinfo.x86Family < 15 ) )
+ {
+ strcpy( cpuinfo.x86Fam, "Intel P6 family (Not PIV and Higher then PPro" );
+ }
+ else
+ {
+ switch( cpuinfo.x86Family )
+ {
+ // Start at 486 because if it's below 486 there is no cpuid instruction
+ case 4:
+ strcpy( cpuinfo.x86Fam, "Intel 486" );
+ break;
+ case 5:
+ switch( cpuinfo.x86Model )
+ {
+ case 4:
+ case 8: // 0.25 µm
+ strcpy( cpuinfo.x86Fam, "Intel Pentium (MMX)");
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium" );
+ }
+ break;
+ case 6:
+ switch( cpuinfo.x86Model )
+ {
+ case 0: // Pentium pro (P6 A-Step)
+ case 1: // Pentium pro
+ strcpy( cpuinfo.x86Fam, "Intel Pentium Pro" );
+ break;
+
+ case 2: // 66 MHz FSB
+ case 5: // Xeon/Celeron (0.25 µm)
+ case 6: // Internal L2 cache
+ strcpy( cpuinfo.x86Fam, "Intel Pentium II" );
+ break;
+
+ case 7: // Xeon external L2 cache
+ case 8: // Xeon/Celeron with 256 KB on-die L2 cache
+ case 10: // Xeon/Celeron with 1 or 2 MB on-die L2 cache
+ case 11: // Xeon/Celeron with Tualatin core, on-die cache
+ strcpy( cpuinfo.x86Fam, "Intel Pentium III" );
+ break;
+ case 15: // Core 2 Duo Allendale/Conroe
+ strcpy( cpuinfo.x86Fam, "Intel Core 2 Duo" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium Pro (Unknown)" );
+ }
+ break;
+ case 15:
+ switch( cpuinfo.x86Model )
+ {
+ case 0: // Willamette (A-Step)
+ case 1: // Willamette
+ strcpy( cpuinfo.x86Fam, "Willamette Intel Pentium IV" );
+ break;
+ case 2: // Northwood
+ strcpy( cpuinfo.x86Fam, "Northwood Intel Pentium IV" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "Intel Pentium IV (Unknown)" );
+ break;
+ }
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Unknown Intel CPU" );
+ }
+ }
+ }
+ else if ( cputype == 1 ) //AMD cpu
+ {
+ if( cpuinfo.x86Family >= 7 )
+ {
+ if((x86_64_12BITBRANDID !=0) || (x86_64_8BITBRANDID !=0))
+ {
+ if(x86_64_8BITBRANDID == 0 )
+ {
+ switch((x86_64_12BITBRANDID >>6)& 0x3f)
+ {
+ case 4:
+ strcpy(cpuinfo.x86Fam,"AMD Athlon(tm) 64 Processor");
+ AMDspeed = 22 + (x86_64_12BITBRANDID & 0x1f);
+ //AMDspeedString = strtol(AMDspeed, (char**)NULL,10);
+ sprintf(AMDspeedString," %d",AMDspeed);
+ strcat(AMDspeedString,"00+");
+ strcat(cpuinfo.x86Fam,AMDspeedString);
+ break;
+ case 12:
+ strcpy(cpuinfo.x86Fam,"AMD Opteron(tm) Processor");
+ break;
+ case 5:
+ strcpy( cpuinfo.x86Fam, "AMD Athlon X2 Processor" );
+ AMDspeed = 22 + (x86_64_12BITBRANDID & 0x1f);
+ //AMDspeedString = strtol(AMDspeed, (char**)NULL,10);
+ sprintf(AMDspeedString," %d",AMDspeed);
+ strcat(AMDspeedString,"00+");
+ strcat(cpuinfo.x86Fam,AMDspeedString);
+ break;
+ case 44:
+ strcpy( cpuinfo.x86Fam, "AMD Opteron(tm) Dual Core Processor" );
+ break;
+ default:
+ strcpy(cpuinfo.x86Fam,"Unknown AMD 64 proccesor");
+
+ }
+ }
+ else //8bit brand id is non zero
+ {
+ strcpy(cpuinfo.x86Fam,"Unsupported yet AMD64 cpu");
+ }
+ }
+ else
+ {
+ strcpy( cpuinfo.x86Fam, "AMD K7+ Processor" );
+ }
+ }
+ else
+ {
+ switch ( cpuinfo.x86Family )
+ {
+ case 4:
+ switch( cpuinfo.x86Model )
+ {
+ case 14:
+ case 15: // Write-back enhanced
+ strcpy( cpuinfo.x86Fam, "AMD 5x86 Processor" );
+ break;
+
+ case 3: // DX2
+ case 7: // Write-back enhanced DX2
+ case 8: // DX4
+ case 9: // Write-back enhanced DX4
+ strcpy( cpuinfo.x86Fam, "AMD 486 Processor" );
+ break;
+
+
+ default:
+ strcpy( cpuinfo.x86Fam, "AMD Unknown Processor" );
+
+ }
+ break;
+
+ case 5:
+ switch( cpuinfo.x86Model)
+ {
+ case 0: // SSA 5 (75, 90 and 100 Mhz)
+ case 1: // 5k86 (PR 120 and 133 MHz)
+ case 2: // 5k86 (PR 166 MHz)
+ case 3: // K5 5k86 (PR 200 MHz)
+ strcpy( cpuinfo.x86Fam, "AMD K5 Processor" );
+ break;
+
+ case 6:
+ case 7: // (0.25 µm)
+ case 8: // K6-2
+ case 9: // K6-III
+ case 14: // K6-2+ / K6-III+
+ strcpy( cpuinfo.x86Fam, "AMD K6 Series Processor" );
+ break;
+
+ default:
+ strcpy( cpuinfo.x86Fam, "AMD Unknown Processor" );
+ }
+ break;
+ case 6:
+ strcpy( cpuinfo.x86Fam, "AMD Athlon XP Processor" );
+ break;
+ default:
+ strcpy( cpuinfo.x86Fam, "Unknown AMD CPU" );
+ }
+ }
+ }
+ //capabilities
+ cpucaps.hasFloatingPointUnit = ( cpuinfo.x86Flags >> 0 ) & 1;
+ cpucaps.hasVirtual8086ModeEnhancements = ( cpuinfo.x86Flags >> 1 ) & 1;
+ cpucaps.hasDebuggingExtensions = ( cpuinfo.x86Flags >> 2 ) & 1;
+ cpucaps.hasPageSizeExtensions = ( cpuinfo.x86Flags >> 3 ) & 1;
+ cpucaps.hasTimeStampCounter = ( cpuinfo.x86Flags >> 4 ) & 1;
+ cpucaps.hasModelSpecificRegisters = ( cpuinfo.x86Flags >> 5 ) & 1;
+ cpucaps.hasPhysicalAddressExtension = ( cpuinfo.x86Flags >> 6 ) & 1;
+ cpucaps.hasMachineCheckArchitecture = ( cpuinfo.x86Flags >> 7 ) & 1;
+ cpucaps.hasCOMPXCHG8BInstruction = ( cpuinfo.x86Flags >> 8 ) & 1;
+ cpucaps.hasAdvancedProgrammableInterruptController = ( cpuinfo.x86Flags >> 9 ) & 1;
+ cpucaps.hasSEPFastSystemCall = ( cpuinfo.x86Flags >> 11 ) & 1;
+ cpucaps.hasMemoryTypeRangeRegisters = ( cpuinfo.x86Flags >> 12 ) & 1;
+ cpucaps.hasPTEGlobalFlag = ( cpuinfo.x86Flags >> 13 ) & 1;
+ cpucaps.hasMachineCheckArchitecture = ( cpuinfo.x86Flags >> 14 ) & 1;
+ cpucaps.hasConditionalMoveAndCompareInstructions = ( cpuinfo.x86Flags >> 15 ) & 1;
+ cpucaps.hasFGPageAttributeTable = ( cpuinfo.x86Flags >> 16 ) & 1;
+ cpucaps.has36bitPageSizeExtension = ( cpuinfo.x86Flags >> 17 ) & 1;
+ cpucaps.hasProcessorSerialNumber = ( cpuinfo.x86Flags >> 18 ) & 1;
+ cpucaps.hasCFLUSHInstruction = ( cpuinfo.x86Flags >> 19 ) & 1;
+ cpucaps.hasDebugStore = ( cpuinfo.x86Flags >> 21 ) & 1;
+ cpucaps.hasACPIThermalMonitorAndClockControl = ( cpuinfo.x86Flags >> 22 ) & 1;
+ cpucaps.hasMultimediaExtensions = ( cpuinfo.x86Flags >> 23 ) & 1; //mmx
+ cpucaps.hasFastStreamingSIMDExtensionsSaveRestore = ( cpuinfo.x86Flags >> 24 ) & 1;
+ cpucaps.hasStreamingSIMDExtensions = ( cpuinfo.x86Flags >> 25 ) & 1; //sse
+ cpucaps.hasStreamingSIMD2Extensions = ( cpuinfo.x86Flags >> 26 ) & 1; //sse2
+ cpucaps.hasSelfSnoop = ( cpuinfo.x86Flags >> 27 ) & 1;
+ cpucaps.hasHyperThreading = ( cpuinfo.x86Flags >> 28 ) & 1;
+ cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1;
+ cpucaps.hasIntel64BitArchitecture = ( cpuinfo.x86Flags >> 30 ) & 1;
+ //that is only for AMDs
+ cpucaps.hasMultimediaExtensionsExt = ( cpuinfo.x86EFlags >> 22 ) & 1; //mmx2
+ cpucaps.hasAMD64BitArchitecture = ( cpuinfo.x86EFlags >> 29 ) & 1; //64bit cpu
+ cpucaps.has3DNOWInstructionExtensionsExt = ( cpuinfo.x86EFlags >> 30 ) & 1; //3dnow+
+ cpucaps.has3DNOWInstructionExtensions = ( cpuinfo.x86EFlags >> 31 ) & 1; //3dnow
+ cpuinfo.cpuspeed = (u32 )(CPUSpeedHz( 1000 ) / 1000000);
+}
diff --git a/libpcsxcore/ix86_64/ix86_fpu.c b/libpcsxcore/ix86_64/ix86_fpu.c
new file mode 100644
index 0000000..ca49eb7
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86_fpu.c
@@ -0,0 +1,248 @@
+// stop compiling if NORECBUILD build (only for Visual Studio)
+#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
+
+#include <stdio.h>
+#include <string.h>
+#include "ix86-64.h"
+
+/********************/
+/* FPU instructions */
+/********************/
+
+/* fild m32 to fpu reg stack */
+void FILD32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xDB), false, 0, from, 0);
+}
+
+/* fistp m32 from fpu reg stack */
+void FISTP32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xDB), false, 3, from, 0);
+}
+
+/* fld m32 to fpu reg stack */
+void FLD32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD9), false, 0, from, 0);
+}
+
+// fld st(i)
+void FLD(int st) { write16(0xc0d9+(st<<8)); }
+
+void FLD1() { write16(0xe8d9); }
+void FLDL2E() { write16(0xead9); }
+
+/* fst m32 from fpu reg stack */
+void FST32( uptr to )
+{
+ MEMADDR_OP(0, VAROP1(0xD9), false, 2, to, 0);
+}
+
+/* fstp m32 from fpu reg stack */
+void FSTP32( uptr to )
+{
+ MEMADDR_OP(0, VAROP1(0xD9), false, 3, to, 0);
+}
+
+// fstp st(i)
+void FSTP(int st) { write16(0xd8dd+(st<<8)); }
+
+/* fldcw fpu control word from m16 */
+void FLDCW( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD9), false, 5, from, 0);
+}
+
+/* fnstcw fpu control word to m16 */
+void FNSTCW( uptr to )
+{
+ MEMADDR_OP(0, VAROP1(0xD9), false, 7, to, 0);
+}
+
+void FNSTSWtoAX( void )
+{
+ write16( 0xE0DF );
+}
+
+void FXAM()
+{
+ write16(0xe5d9);
+}
+
+void FDECSTP() { write16(0xf6d9); }
+void FRNDINT() { write16(0xfcd9); }
+void FXCH(int st) { write16(0xc8d9+(st<<8)); }
+void F2XM1() { write16(0xf0d9); }
+void FSCALE() { write16(0xfdd9); }
+
+/* fadd ST(src) to fpu reg stack ST(0) */
+void FADD32Rto0( x86IntRegType src )
+{
+ write8( 0xD8 );
+ write8( 0xC0 + src );
+}
+
+/* fadd ST(0) to fpu reg stack ST(src) */
+void FADD320toR( x86IntRegType src )
+{
+ write8( 0xDC );
+ write8( 0xC0 + src );
+}
+
+/* fsub ST(src) to fpu reg stack ST(0) */
+void FSUB32Rto0( x86IntRegType src )
+{
+ write8( 0xD8 );
+ write8( 0xE0 + src );
+}
+
+/* fsub ST(0) to fpu reg stack ST(src) */
+void FSUB320toR( x86IntRegType src )
+{
+ write8( 0xDC );
+ write8( 0xE8 + src );
+}
+
+/* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */
+void FSUBP( void )
+{
+ write8( 0xDE );
+ write8( 0xE9 );
+}
+
+/* fmul ST(src) to fpu reg stack ST(0) */
+void FMUL32Rto0( x86IntRegType src )
+{
+ write8( 0xD8 );
+ write8( 0xC8 + src );
+}
+
+/* fmul ST(0) to fpu reg stack ST(src) */
+void FMUL320toR( x86IntRegType src )
+{
+ write8( 0xDC );
+ write8( 0xC8 + src );
+}
+
+/* fdiv ST(src) to fpu reg stack ST(0) */
+void FDIV32Rto0( x86IntRegType src )
+{
+ write8( 0xD8 );
+ write8( 0xF0 + src );
+}
+
+/* fdiv ST(0) to fpu reg stack ST(src) */
+void FDIV320toR( x86IntRegType src )
+{
+ write8( 0xDC );
+ write8( 0xF8 + src );
+}
+
+void FDIV320toRP( x86IntRegType src )
+{
+ write8( 0xDE );
+ write8( 0xF8 + src );
+}
+
+/* fadd m32 to fpu reg stack */
+void FADD32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 0, from, 0);
+}
+
+/* fsub m32 to fpu reg stack */
+void FSUB32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 4, from, 0);
+}
+
+/* fmul m32 to fpu reg stack */
+void FMUL32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 1, from, 0);
+}
+
+/* fdiv m32 to fpu reg stack */
+void FDIV32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 6, from, 0);
+}
+
+/* fabs fpu reg stack */
+void FABS( void )
+{
+ write16( 0xE1D9 );
+}
+
+/* fsqrt fpu reg stack */
+void FSQRT( void )
+{
+ write16( 0xFAD9 );
+}
+
+void FPATAN(void) { write16(0xf3d9); }
+void FSIN(void) { write16(0xfed9); }
+
+/* fchs fpu reg stack */
+void FCHS( void )
+{
+ write16( 0xE0D9 );
+}
+
+/* fcomi st, st(i) */
+void FCOMI( x86IntRegType src )
+{
+ write8( 0xDB );
+ write8( 0xF0 + src );
+}
+
+/* fcomip st, st(i) */
+void FCOMIP( x86IntRegType src )
+{
+ write8( 0xDF );
+ write8( 0xF0 + src );
+}
+
+/* fucomi st, st(i) */
+void FUCOMI( x86IntRegType src )
+{
+ write8( 0xDB );
+ write8( 0xE8 + src );
+}
+
+/* fucomip st, st(i) */
+void FUCOMIP( x86IntRegType src )
+{
+ write8( 0xDF );
+ write8( 0xE8 + src );
+}
+
+/* fcom m32 to fpu reg stack */
+void FCOM32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 2, from, 0);
+}
+
+/* fcomp m32 to fpu reg stack */
+void FCOMP32( uptr from )
+{
+ MEMADDR_OP(0, VAROP1(0xD8), false, 3, from, 0);
+}
+
+#define FCMOV32( low, high ) \
+ { \
+ write8( low ); \
+ write8( high + from ); \
+ }
+
+void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); }
+void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); }
+void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); }
+void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); }
+void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); }
+void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); }
+void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); }
+void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); }
+
+#endif
diff --git a/libpcsxcore/ix86_64/ix86_mmx.c b/libpcsxcore/ix86_64/ix86_mmx.c
new file mode 100644
index 0000000..eddbbfc
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86_mmx.c
@@ -0,0 +1,646 @@
+// stop compiling if NORECBUILD build (only for Visual Studio)
+#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
+
+#include "ix86-64.h"
+
+#include <assert.h>
+
+/********************/
+/* MMX instructions */
+/********************/
+
+// r64 = mm
+
+/* movq m64 to r64 */
+void MOVQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x6F), true, to, from, 0);
+}
+
+/* movq r64 to m64 */
+void MOVQRtoM( uptr to, x86MMXRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x7F), true, from, to, 0);
+}
+
+/* pand r64 to r64 */
+void PANDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xDB0F );
+ ModRM( 3, to, from );
+}
+
+void PANDNRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xDF0F );
+ ModRM( 3, to, from );
+}
+
+/* por r64 to r64 */
+void PORRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xEB0F );
+ ModRM( 3, to, from );
+}
+
+/* pxor r64 to r64 */
+void PXORRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xEF0F );
+ ModRM( 3, to, from );
+}
+
+/* psllq r64 to r64 */
+void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xF30F );
+ ModRM( 3, to, from );
+}
+
+/* psllq m64 to r64 */
+void PSLLQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xF3), true, to, from, 0);
+}
+
+/* psllq imm8 to r64 */
+void PSLLQItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 6, to);
+ write8( from );
+}
+
+/* psrlq r64 to r64 */
+void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xD30F );
+ ModRM( 3, to, from );
+}
+
+/* psrlq m64 to r64 */
+void PSRLQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xD3), true, to, from, 0);
+}
+
+/* psrlq imm8 to r64 */
+void PSRLQItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 2, to);
+ write8( from );
+}
+
+/* paddusb r64 to r64 */
+void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xDC0F );
+ ModRM( 3, to, from );
+}
+
+/* paddusb m64 to r64 */
+void PADDUSBMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xDC), true, to, from, 0);
+}
+
+/* paddusw r64 to r64 */
+void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xDD0F );
+ ModRM( 3, to, from );
+}
+
+/* paddusw m64 to r64 */
+void PADDUSWMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xDD), true, to, from, 0);
+}
+
+/* paddb r64 to r64 */
+void PADDBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xFC0F );
+ ModRM( 3, to, from );
+}
+
+/* paddb m64 to r64 */
+void PADDBMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xFC), true, to, from, 0);
+}
+
+/* paddw r64 to r64 */
+void PADDWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xFD0F );
+ ModRM( 3, to, from );
+}
+
+/* paddw m64 to r64 */
+void PADDWMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xFD), true, to, from, 0);
+}
+
+/* paddd r64 to r64 */
+void PADDDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xFE0F );
+ ModRM( 3, to, from );
+}
+
+/* paddd m64 to r64 */
+void PADDDMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xFE), true, to, from, 0);
+}
+
+/* emms */
+void EMMS( void )
+{
+ write16( 0x770F );
+}
+
+void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xEC0F );
+ ModRM( 3, to, from );
+}
+
+void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xED0F );
+ ModRM( 3, to, from );
+}
+
+// paddq m64 to r64 (sse2 only?)
+void PADDQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xD4), true, to, from, 0);
+}
+
+// paddq r64 to r64 (sse2 only?)
+void PADDQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xD40F );
+ ModRM( 3, to, from );
+}
+
+void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xE80F );
+ ModRM( 3, to, from );
+}
+
+void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xE90F );
+ ModRM( 3, to, from );
+}
+
+
+void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xF80F );
+ ModRM( 3, to, from );
+}
+
+void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xF90F );
+ ModRM( 3, to, from );
+}
+
+void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xFA0F );
+ ModRM( 3, to, from );
+}
+
+void PSUBDMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xFA), true, to, from, 0);
+}
+
+void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xD80F );
+ ModRM( 3, to, from );
+}
+
+void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xD90F );
+ ModRM( 3, to, from );
+}
+
+// psubq m64 to r64 (sse2 only?)
+void PSUBQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xFB), true, to, from, 0);
+}
+
+// psubq r64 to r64 (sse2 only?)
+void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xFB0F );
+ ModRM( 3, to, from );
+}
+
+// pmuludq m64 to r64 (sse2 only?)
+void PMULUDQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xF4), true, to, from, 0);
+}
+
+// pmuludq r64 to r64 (sse2 only?)
+void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xF40F );
+ ModRM( 3, to, from );
+}
+
+void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x740F );
+ ModRM( 3, to, from );
+}
+
+void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x750F );
+ ModRM( 3, to, from );
+}
+
+void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x760F );
+ ModRM( 3, to, from );
+}
+
+void PCMPEQDMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x76), true, to, from, 0);
+}
+
+void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x640F );
+ ModRM( 3, to, from );
+}
+
+void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x650F );
+ ModRM( 3, to, from );
+}
+
+void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x660F );
+ ModRM( 3, to, from );
+}
+
+void PCMPGTDMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x66), true, to, from, 0);
+}
+
+void PSRLWItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 2 , to );
+ write8( from );
+}
+
+void PSRLDItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 2 , to );
+ write8( from );
+}
+
+void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xD20F );
+ ModRM( 3, to, from );
+}
+
+void PSLLWItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 6 , to );
+ write8( from );
+}
+
+void PSLLDItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 6 , to );
+ write8( from );
+}
+
+void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xF20F );
+ ModRM( 3, to, from );
+}
+
+void PSRAWItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 4 , to );
+ write8( from );
+}
+
+void PSRADItoR( x86MMXRegType to, u8 from )
+{
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 4 , to );
+ write8( from );
+}
+
+void PSRADRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0xE20F );
+ ModRM( 3, to, from );
+}
+
+/* por m64 to r64 */
+void PORMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xEB), true, to, from, 0);
+}
+
+/* pxor m64 to r64 */
+void PXORMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xEF), true, to, from, 0);
+}
+
+/* pand m64 to r64 */
+void PANDMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xDB), true, to, from, 0);
+}
+
+void PANDNMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0xDF), true, to, from, 0);
+}
+
+void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x6A0F );
+ ModRM( 3, to, from );
+}
+
+void PUNPCKHDQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x6A), true, to, from, 0);
+}
+
+void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x620F );
+ ModRM( 3, to, from );
+}
+
+void PUNPCKLDQMtoR( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x62), true, to, from, 0);
+}
+
+void MOVQ64ItoR( x86MMXRegType reg, u64 i )
+{
+ RexR(0, reg);
+ write16(0x6F0F);
+ ModRM(0, reg, DISP32);
+ write32(2);
+ JMP8( 8 );
+ write64( i );
+}
+
+void MOVQRtoR( x86MMXRegType to, x86MMXRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x6F0F );
+ ModRM( 3, to, from );
+}
+
+void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset )
+{
+ RexRB(0, to, from);
+ write16( 0x6F0F );
+
+ if( offset < 128 ) {
+ ModRM( 1, to, from );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, to, from );
+ write32(offset);
+ }
+}
+
+void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset )
+{
+ RexRB(0, from, to);
+ write16( 0x7F0F );
+
+ if( offset < 128 ) {
+ ModRM( 1, from , to );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, from, to );
+ write32(offset);
+ }
+}
+
+/* movd m32 to r64 */
+void MOVDMtoMMX( x86MMXRegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x6E), true, to, from, 0);
+}
+
+/* movd r64 to m32 */
+void MOVDMMXtoM( uptr to, x86MMXRegType from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x7E), true, from, to, 0);
+}
+
+void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x6E0F );
+ ModRM( 3, to, from );
+}
+
+void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x6E0F );
+ ModRM( 0, to, from );
+}
+
+void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset )
+{
+ RexRB(0, to, from);
+ write16( 0x6E0F );
+
+ if( offset < 128 ) {
+ ModRM( 1, to, from );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, to, from );
+ write32(offset);
+ }
+}
+
+void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from )
+{
+ RexRB(0, from, to);
+ write16( 0x7E0F );
+ ModRM( 3, from, to );
+}
+
+void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from )
+{
+ RexRB(0, from, to);
+ write16( 0x7E0F );
+ ModRM( 0, from, to );
+ if( to >= 4 ) {
+ // no idea why
+ assert( to == ESP );
+ write8(0x24);
+ }
+
+}
+
+void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset )
+{
+ RexRB(0, from, to);
+ write16( 0x7E0F );
+
+ if( offset < 128 ) {
+ ModRM( 1, from, to );
+ write8(offset);
+ }
+ else {
+ ModRM( 2, from, to );
+ write32(offset);
+ }
+}
+
+///* movd r32 to r64 */
+//void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from )
+//{
+// write16( 0x6E0F );
+// ModRM( 3, to, from );
+//}
+//
+///* movq r64 to r32 */
+//void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from )
+//{
+// write16( 0x7E0F );
+// ModRM( 3, from, to );
+//}
+
+// untested
+void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
+{
+ RexRB(0, to, from);
+ write16( 0x630F );
+ ModRM( 3, to, from );
+}
+
+void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from)
+{
+ RexRB(0, to, from);
+ write16( 0x6B0F );
+ ModRM( 3, to, from );
+}
+
+void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from)
+{
+ RexRB(0, to, from);
+ write16( 0xD70F );
+ ModRM( 3, to, from );
+}
+
+void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 )
+{
+ RexRB(0, to, from);
+ write16( 0xc40f );
+ ModRM( 3, to, from );
+ write8( imm8 );
+}
+
+void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8)
+{
+ RexRB(0, to, from);
+ write16(0x700f);
+ ModRM( 3, to, from );
+ write8(imm8);
+}
+
+void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8)
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x70), true, to, from, 1 /* XXX was 0? */);
+ write8(imm8);
+}
+
+void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from)
+{
+ RexRB(0, to, from);
+ write16(0xf70f);
+ ModRM( 3, to, from );
+}
+
+#endif
diff --git a/libpcsxcore/ix86_64/ix86_sse.c b/libpcsxcore/ix86_64/ix86_sse.c
new file mode 100644
index 0000000..cb391dc
--- /dev/null
+++ b/libpcsxcore/ix86_64/ix86_sse.c
@@ -0,0 +1,1455 @@
+// stop compiling if NORECBUILD build (only for Visual Studio)
+#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
+
+#include <assert.h>
+#include "ix86-64.h"
+
+PCSX2_ALIGNED16(static unsigned int p[4]);
+PCSX2_ALIGNED16(static unsigned int p2[4]);
+PCSX2_ALIGNED16(static float f[4]);
+
+
+XMMSSEType g_xmmtypes[XMMREGS] = {0};
+
+/********************/
+/* SSE instructions */
+/********************/
+
+#define SSEMtoRv( nc, code, overb ) \
+ assert( cpucaps.hasStreamingSIMDExtensions ); \
+ assert( to < XMMREGS ) ; \
+ MEMADDR_OP(0, nc, code, true, to, from, overb)
+
+#define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb)
+
+#define SSERtoMv( nc, code, overb ) \
+ assert( cpucaps.hasStreamingSIMDExtensions ); \
+ assert( from < XMMREGS) ; \
+ MEMADDR_OP(0, nc, code, true, from, to, overb)
+
+#define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \
+
+#define SSE_SS_MtoR( code, overb ) \
+ SSEMtoRv(3, (code << 8) | 0xF3, overb)
+
+#define SSE_SS_RtoM( code, overb ) \
+ SSERtoMv(3, (code << 8) | 0xF3, overb)
+
+#define SSERtoR( code ) \
+ assert( cpucaps.hasStreamingSIMDExtensions ); \
+ assert( to < XMMREGS && from < XMMREGS) ; \
+ RexRB(0, to, from); \
+ write16( code ); \
+ ModRM( 3, to, from );
+
+#define SSEMtoR66( code ) \
+ SSEMtoRv( 3, (code << 8) | 0x66, 0 )
+
+#define SSERtoM66( code ) \
+ SSERtoMv( 3, (code << 8) | 0x66, 0 )
+
+#define SSERtoR66( code ) \
+ write8( 0x66 ); \
+ SSERtoR( code );
+
+#define _SSERtoR66( code ) \
+ assert( cpucaps.hasStreamingSIMDExtensions ); \
+ assert( to < XMMREGS && from < XMMREGS) ; \
+ write8( 0x66 ); \
+ RexRB(0, from, to); \
+ write16( code ); \
+ ModRM( 3, from, to );
+
+#define SSE_SS_RtoR( code ) \
+ assert( cpucaps.hasStreamingSIMDExtensions ); \
+ assert( to < XMMREGS && from < XMMREGS) ; \
+ write8( 0xf3 ); \
+ RexRB(0, to, from); \
+ write16( code ); \
+ ModRM( 3, to, from );
+
+#define CMPPSMtoR( op ) \
+ SSEMtoR( 0xc20f, 1 ); \
+ write8( op );
+
+#define CMPPSRtoR( op ) \
+ SSERtoR( 0xc20f ); \
+ write8( op );
+
+#define CMPSSMtoR( op ) \
+ SSE_SS_MtoR( 0xc20f, 1 ); \
+ write8( op );
+
+#define CMPSSRtoR( op ) \
+ SSE_SS_RtoR( 0xc20f ); \
+ write8( op );
+
+
+
+void WriteRmOffset(x86IntRegType to, int offset);
+void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
+
+/* movups [r32][r32*scale] to xmm1 */
+void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRXB(0, to, from2, from);
+ write16( 0x100f );
+ ModRM( 0, to, 0x4 );
+ SibSB( scale, from2, from );
+}
+
+/* movups xmm1 to [r32][r32*scale] */
+void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRXB(1, to, from2, from);
+ write16( 0x110f );
+ ModRM( 0, to, 0x4 );
+ SibSB( scale, from2, from );
+}
+
+/* movups [r32] to r32 */
+void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, to, from);
+ write16( 0x100f );
+ ModRM( 0, to, from );
+}
+
+/* movups r32 to [r32] */
+void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16( 0x110f );
+ ModRM( 0, from, to );
+}
+
+/* movlps [r32] to r32 */
+void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(1, to, from);
+ write16( 0x120f );
+ ModRM( 0, to, from );
+}
+
+void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, to, from);
+ write16( 0x120f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+/* movaps r32 to [r32] */
+void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16( 0x130f );
+ ModRM( 0, from, to );
+}
+
+void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16( 0x130f );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/* movaps [r32][r32*scale] to xmm1 */
+void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
+ RexRXB(0, to, from2, from);
+ write16( 0x280f );
+ ModRM( 0, to, 0x4 );
+ SibSB( scale, from2, from );
+}
+
+/* movaps xmm1 to [r32][r32*scale] */
+void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
+ RexRXB(0, to, from2, from);
+ write16( 0x290f );
+ ModRM( 0, to, 0x4 );
+ SibSB( scale, from2, from );
+}
+
+// movaps [r32+offset] to r32
+void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, to, from);
+ write16( 0x280f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+// movaps r32 to [r32+offset]
+void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16( 0x290f );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+// movdqa [r32+offset] to r32
+void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ write8(0x66);
+ RexRB(0, to, from);
+ write16( 0x6f0f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+// movdqa r32 to [r32+offset]
+void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ write8(0x66);
+ RexRB(0, from, to);
+ write16( 0x7f0f );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+// movups [r32+offset] to r32
+void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ RexRB(0, to, from);
+ write16( 0x100f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+// movups r32 to [r32+offset]
+void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16( 0x110f );
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+//**********************************************************************************/
+//MOVAPS: Move aligned Packed Single Precision FP values *
+//**********************************************************************************
+void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); }
+void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); }
+void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x280f ); }
+
+void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); }
+void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); }
+
+void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
+ else {
+ write8(0xf2);
+ SSERtoR( 0x100f);
+ }
+}
+
+void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
+ else {
+ SSE_SS_MtoR( 0x7e0f, 0);
+ }
+}
+
+void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
+ else {
+ SSE_SS_RtoR( 0x7e0f);
+ }
+}
+
+void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
+ else {
+ SSERtoM66(0xd60f);
+ }
+}
+
+#ifndef __x86_64__
+void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
+ else {
+ write8(0xf2);
+ SSERtoR( 0xd60f);
+ }
+}
+void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
+ else {
+ SSE_SS_RtoR( 0xd60f);
+ }
+}
+#endif
+
+//**********************************************************************************/
+//MOVSS: Move Scalar Single-Precision FP value *
+//**********************************************************************************
+void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); }
+void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); }
+void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
+{
+ write8(0xf3);
+ RexRB(0, from, to);
+ write16(0x110f);
+ ModRM(0, from, to);
+}
+
+void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x100f ); }
+
+void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ write8(0xf3);
+ RexRB(0, to, from);
+ write16( 0x100f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ write8(0xf3);
+ RexRB(0, from, to);
+ write16(0x110f);
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); }
+//**********************************************************************************/
+//MOVLPS: Move low Packed Single-Precision FP *
+//**********************************************************************************
+void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); }
+void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); }
+
+void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, to, from);
+ write16( 0x120f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ RexRB(0, from, to);
+ write16(0x130f);
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MOVHPS: Move High Packed Single-Precision FP *
+//**********************************************************************************
+void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); }
+void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); }
+
+void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, to, from);
+ write16( 0x160f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ assert( cpucaps.hasStreamingSIMDExtensions );
+ RexRB(0, from, to);
+ write16(0x170f);
+ WriteRmOffsetFrom(from, to, offset);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MOVLHPS: Moved packed Single-Precision FP low to high *
+//**********************************************************************************
+void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); }
+
+//////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MOVHLPS: Moved packed Single-Precision FP High to Low *
+//**********************************************************************************
+void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); }
+
+///////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//ANDPS: Logical Bit-wise AND for Single FP *
+//**********************************************************************************
+void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); }
+void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); }
+
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values *
+//**********************************************************************************
+void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); }
+void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//RCPPS : Packed Single-Precision FP Reciprocal *
+//**********************************************************************************
+void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); }
+void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); }
+
+void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); }
+void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); }
+
+//////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//ORPS : Bit-wise Logical OR of Single-Precision FP Data *
+//**********************************************************************************
+void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); }
+void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//XORPS : Bitwise Logical XOR of Single-Precision FP Values *
+//**********************************************************************************
+void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); }
+void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); }
+
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//ADDPS : ADD Packed Single-Precision FP Values *
+//**********************************************************************************
+void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); }
+void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); }
+
+////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//ADDSS : ADD Scalar Single-Precision FP Values *
+//**********************************************************************************
+void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); }
+void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//SUBPS: Packed Single-Precision FP Subtract *
+//**********************************************************************************
+void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); }
+void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); }
+
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//SUBSS : Scalar Single-Precision FP Subtract *
+//**********************************************************************************
+void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); }
+void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MULPS : Packed Single-Precision FP Multiply *
+//**********************************************************************************
+void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); }
+void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); }
+
+////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MULSS : Scalar Single-Precision FP Multiply *
+//**********************************************************************************
+void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); }
+void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); }
+
+////////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//Packed Single-Precission FP compare (CMPccPS) *
+//**********************************************************************************
+//missing SSE_CMPPS_I8_to_XMM
+// SSE_CMPPS_M32_to_XMM
+// SSE_CMPPS_XMM_to_XMM
+void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); }
+void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); }
+void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); }
+void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); }
+void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); }
+void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); }
+void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); }
+void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); }
+void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); }
+void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); }
+void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); }
+void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); }
+void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); }
+void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); }
+void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); }
+void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); }
+
+///////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//Scalar Single-Precission FP compare (CMPccSS) *
+//**********************************************************************************
+//missing SSE_CMPSS_I8_to_XMM
+// SSE_CMPSS_M32_to_XMM
+// SSE_CMPSS_XMM_to_XMM
+void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); }
+void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); }
+void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); }
+void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); }
+void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); }
+void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); }
+void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); }
+void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); }
+void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); }
+void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); }
+void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); }
+void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); }
+void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); }
+void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); }
+void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); }
+void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); }
+
+void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from )
+{
+ MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0);
+}
+
+void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ RexRB(0, to, from);
+ write16( 0x2e0f );
+ ModRM( 3, to, from );
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal *
+//**********************************************************************************
+void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); }
+void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal *
+//**********************************************************************************
+void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); }
+void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); }
+
+////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//SQRTPS : Packed Single-Precision FP Square Root *
+//**********************************************************************************
+void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); }
+void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); }
+
+//////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//SQRTSS : Scalar Single-Precision FP Square Root *
+//**********************************************************************************
+void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); }
+void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); }
+
+////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MAXPS: Return Packed Single-Precision FP Maximum *
+//**********************************************************************************
+void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); }
+void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MAXSS: Return Scalar Single-Precision FP Maximum *
+//**********************************************************************************
+void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); }
+void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); }
+
+#ifndef __x86_64__
+/////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion *
+//**********************************************************************************
+void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); }
+void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); }
+
+///////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion *
+//**********************************************************************************
+void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); }
+void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); }
+#endif
+
+void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); }
+void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
+{
+ write8(0xf3);
+ RexRB(0, to, from);
+ write16(0x2c0f);
+ ModRM(3, to, from);
+}
+
+void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); }
+void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
+{
+ write8(0xf3);
+ RexRB(0, to, from);
+ write16(0x2a0f);
+ ModRM(3, to, from);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion *
+//**********************************************************************************
+void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); }
+void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); }
+
+//**********************************************************************************/
+//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion *
+//**********************************************************************************
+void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); }
+void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); }
+
+void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); }
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MINPS: Return Packed Single-Precision FP Minimum *
+//**********************************************************************************
+void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); }
+void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MINSS: Return Scalar Single-Precision FP Minimum *
+//**********************************************************************************
+void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); }
+void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); }
+
+#ifndef __x86_64__
+///////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PMAXSW: Packed Signed Integer Word Maximum *
+//**********************************************************************************
+//missing
+ // SSE_PMAXSW_M64_to_MM
+// SSE2_PMAXSW_M128_to_XMM
+// SSE2_PMAXSW_XMM_to_XMM
+void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
+
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PMINSW: Packed Signed Integer Word Minimum *
+//**********************************************************************************
+//missing
+ // SSE_PMINSW_M64_to_MM
+// SSE2_PMINSW_M128_to_XMM
+// SSE2_PMINSW_XMM_to_XMM
+void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//SHUFPS: Shuffle Packed Single-Precision FP Values *
+//**********************************************************************************
+void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); }
+void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); }
+
+void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
+{
+ RexRB(0, to, from);
+ write16(0xc60f);
+ WriteRmOffsetFrom(to, from, offset);
+ write8(imm8);
+}
+
+////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PSHUFD: Shuffle Packed DoubleWords *
+//**********************************************************************************
+void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
+ }
+ else {
+ SSERtoR66( 0x700F );
+ write8( imm8 );
+ }
+}
+void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); }
+
+void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
+void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); }
+void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); }
+void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); }
+
+///////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data *
+//**********************************************************************************
+void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
+void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); }
+
+////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data *
+//**********************************************************************************
+void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
+void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); }
+
+////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//DIVPS : Packed Single-Precision FP Divide *
+//**********************************************************************************
+void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); }
+void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); }
+
+//////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//DIVSS : Scalar Single-Precision FP Divide *
+//**********************************************************************************
+void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); }
+void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); }
+
+/////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//STMXCSR : Store Streaming SIMD Extension Control/Status *
+//**********************************************************************************
+void SSE_STMXCSR( uptr from ) {
+ MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//LDMXCSR : Load Streaming SIMD Extension Control/Status *
+//**********************************************************************************
+void SSE_LDMXCSR( uptr from ) {
+ MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PADDB,PADDW,PADDD : Add Packed Integers *
+//**********************************************************************************
+void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); }
+void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); }
+void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); }
+void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); }
+void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); }
+void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); }
+
+void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); }
+void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); }
+
+///////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PCMPxx: Compare Packed Integers *
+//**********************************************************************************
+void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
+void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
+void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
+void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
+void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
+void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
+void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
+void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
+void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
+void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
+void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE_CMPEQPS_XMM_to_XMM(to, from);
+ }
+ else {
+ SSERtoR66( 0x760F );
+ }
+}
+
+void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE_CMPEQPS_M128_to_XMM(to, from);
+ }
+ else {
+ SSEMtoR66( 0x760F );
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PEXTRW,PINSRW: Packed Extract/Insert Word *
+//**********************************************************************************
+void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
+void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
+
+////////////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PSUBx: Subtract Packed Integers *
+//**********************************************************************************
+void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); }
+void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); }
+void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); }
+void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); }
+void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); }
+void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); }
+void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); }
+void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); }
+
+///////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//MOVD: Move Dword(32bit) to /from XMM reg *
+//**********************************************************************************
+void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
+void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE2EMU_MOVD_R_to_XMM(to, from);
+ }
+ else {
+ SSERtoR66(0x6E0F);
+ }
+}
+
+void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
+{
+ write8(0x66);
+ RexRB(0, to, from);
+ write16( 0x6e0f );
+ ModRM( 0, to, from);
+}
+
+void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ write8(0x66);
+ RexRB(0, to, from);
+ write16( 0x6e0f );
+ WriteRmOffsetFrom(to, from, offset);
+}
+
+void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
+void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE2EMU_MOVD_XMM_to_R(to, from);
+ }
+ else {
+ _SSERtoR66(0x7E0F);
+ }
+}
+
+void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
+{
+ write8(0x66);
+ RexRB(0, from, to);
+ write16( 0x7e0f );
+ ModRM( 0, from, to );
+}
+
+void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ if( !cpucaps.hasStreamingSIMD2Extensions ) {
+ SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
+ }
+ else {
+ write8(0x66);
+ RexRB(0, from, to);
+ write16( 0x7e0f );
+ WriteRmOffsetFrom(from, to, offset);
+ }
+}
+
+#ifdef __x86_64__
+void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from )
+{
+ assert( from < XMMREGS);
+ write8( 0x66 );
+ RexRB(1, from, to);
+ write16( 0x7e0f );
+ ModRM( 3, from, to );
+}
+
+void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from )
+{
+ assert( to < XMMREGS);
+ write8(0x66);
+ RexRB(1, to, from);
+ write16( 0x6e0f );
+ ModRM( 3, to, from );
+}
+
+#endif
+
+////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//POR : SSE Bitwise OR *
+//**********************************************************************************
+void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); }
+void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); }
+
+// logical and to &= from
+void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); }
+void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); }
+
+// to = (~to) & from
+void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); }
+void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); }
+
+/////////////////////////////////////////////////////////////////////////////////////
+//**********************************************************************************/
+//PXOR : SSE Bitwise XOR *
+//**********************************************************************************
+void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); }
+void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); }
+///////////////////////////////////////////////////////////////////////////////////////
+
+void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); }
+void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);}
+void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); }
+
+void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); }
+void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); }
+void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); }
+
+// shift right logical
+
+void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); }
+void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); }
+void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 2 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); }
+void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); }
+void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 2 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); }
+void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); }
+void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 2 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 3 , to );
+ write8( imm8 );
+}
+
+// shift right arithmetic
+
+void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
+void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
+void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 4 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
+void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
+void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 4 , to );
+ write8( imm8 );
+}
+
+// shift left logical
+
+void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); }
+void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); }
+void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x710F );
+ ModRM( 3, 6 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); }
+void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); }
+void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x720F );
+ ModRM( 3, 6 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); }
+void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); }
+void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 6 , to );
+ write8( imm8 );
+}
+
+void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
+{
+ write8( 0x66 );
+ RexB(0, to);
+ write16( 0x730F );
+ ModRM( 3, 7 , to );
+ write8( imm8 );
+}
+
+
+void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); }
+void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); }
+
+void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); }
+void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); }
+
+void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); }
+void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); }
+
+void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); }
+void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); }
+
+//
+
+void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); }
+void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); }
+
+void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); }
+void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); }
+
+void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); }
+void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); }
+
+void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); }
+void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); }
+
+void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); }
+void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); }
+void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); }
+void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); }
+
+void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); }
+void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); }
+void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); }
+void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); }
+
+//**********************************************************************************/
+//PACKSSWB,PACKSSDW: Pack Saturate Signed Word
+//**********************************************************************************
+void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
+void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
+void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
+void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
+
+void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
+void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
+
+//**********************************************************************************/
+//PUNPCKHWD: Unpack 16bit high
+//**********************************************************************************
+void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
+void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
+
+void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
+void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
+
+void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
+void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
+void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
+void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
+
+void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
+void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
+void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
+void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
+
+void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
+void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
+
+void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
+void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
+
+void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
+void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
+void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
+void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
+
+void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
+void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
+
+void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); }
+
+void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
+void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
+
+void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
+void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); }
+
+void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
+ write8(0xf3);
+ RexRB(0, to, from);
+ write16( 0x120f);
+ ModRM( 3, to, from );
+}
+
+void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); }
+void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); }
+void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); }
+
+// SSE-X
+void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
+ else SSE_MOVAPS_M128_to_XMM(to, from);
+}
+
+void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
+ else SSE_MOVAPS_XMM_to_M128(to, from);
+}
+
+void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
+ else SSE_MOVAPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
+ else SSE_MOVAPSRmtoROffset(to, from, offset);
+}
+
+void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
+ else SSE_MOVAPSRtoRmOffset(to, from, offset);
+}
+
+void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
+ else SSE_MOVAPS_M128_to_XMM(to, from);
+}
+
+void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
+ else SSE_MOVAPS_XMM_to_M128(to, from);
+}
+
+void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
+ else SSE_MOVAPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
+ else SSE_MOVSS_M32_to_XMM(to, from);
+}
+
+void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
+ else SSE_MOVSS_XMM_to_M32(to, from);
+}
+
+void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
+ else SSE_MOVSS_XMM_to_Rm(to, from);
+}
+
+void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
+ else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
+}
+
+void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
+ else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
+}
+
+void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
+ else SSE_ORPS_M128_to_XMM(to, from);
+}
+
+void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
+ else SSE_ORPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
+ else SSE_XORPS_M128_to_XMM(to, from);
+}
+
+void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
+ else SSE_XORPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
+ else SSE_ANDPS_M128_to_XMM(to, from);
+}
+
+void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
+ else SSE_ANDPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
+ else SSE_ANDNPS_M128_to_XMM(to, from);
+}
+
+void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
+ else SSE_ANDNPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
+ else SSE_UNPCKLPS_M128_to_XMM(to, from);
+}
+
+void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
+ else SSE_UNPCKLPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
+ else SSE_UNPCKHPS_M128_to_XMM(to, from);
+}
+
+void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
+ else SSE_UNPCKHPS_XMM_to_XMM(to, from);
+}
+
+void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
+{
+ if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
+ SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
+ if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
+ }
+ else {
+ SSE_MOVHLPS_XMM_to_XMM(to, from);
+ }
+}
+
+// SSE2 emulation
+void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
+{
+ SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e);
+ SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e);
+}
+
+void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from)
+{
+ SSE_XORPS_XMM_to_XMM(to, to);
+ SSE_MOVLPS_M64_to_XMM(to, from);
+}
+
+void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
+{
+ SSE_XORPS_XMM_to_XMM(to, to);
+ SSE2EMU_MOVSD_XMM_to_XMM(to, from);
+}
+
+void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
+{
+ MOV32RmtoROffset(EAX, from, offset);
+ MOV32ItoM((uptr)p+4, 0);
+ MOV32ItoM((uptr)p+8, 0);
+ MOV32RtoM((uptr)p, EAX);
+ MOV32ItoM((uptr)p+12, 0);
+ SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
+}
+
+void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset )
+{
+ SSE_MOVSS_XMM_to_M32((uptr)p, from);
+ MOV32MtoR(EAX, (uptr)p);
+ MOV32RtoRmOffset(to, EAX, offset);
+}
+
+#ifndef __x86_64__
+extern void SetMMXstate();
+
+void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
+{
+ SSE_MOVLPS_XMM_to_M64(p, from);
+ MOVQMtoR(to, p);
+ SetMMXstate();
+}
+
+void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
+{
+ MOVQRtoM(p, from);
+ SSE_MOVLPS_M64_to_XMM(to, p);
+ SetMMXstate();
+}
+#endif
+
+/****************************************************************************/
+/* SSE2 Emulated functions for SSE CPU's by kekko */
+/****************************************************************************/
+void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) {
+ MOV64ItoR(EAX, (uptr)&p);
+ MOV64ItoR(EBX, (uptr)&p2);
+ SSE_MOVUPSRtoRm(EAX, from);
+
+ MOV32ItoR(ECX, (u32)imm8);
+ AND32ItoR(ECX, 3);
+ SHL32ItoR(ECX, 2);
+ ADD32RtoR(ECX, EAX);
+ MOV32RmtoR(ECX, ECX);
+ MOV32RtoRm(EBX, ECX);
+
+ ADD32ItoR(EBX, 4);
+ MOV32ItoR(ECX, (u32)imm8);
+ SHR32ItoR(ECX, 2);
+ AND32ItoR(ECX, 3);
+ SHL32ItoR(ECX, 2);
+ ADD32RtoR(ECX, EAX);
+ MOV32RmtoR(ECX, ECX);
+ MOV32RtoRm(EBX, ECX);
+
+ ADD32ItoR(EBX, 4);
+ MOV32ItoR(ECX, (u32)imm8);
+ SHR32ItoR(ECX, 4);
+ AND32ItoR(ECX, 3);
+ SHL32ItoR(ECX, 2);
+ ADD32RtoR(ECX, EAX);
+ MOV32RmtoR(ECX, ECX);
+ MOV32RtoRm(EBX, ECX);
+
+ ADD32ItoR(EBX, 4);
+ MOV32ItoR(ECX, (u32)imm8);
+ SHR32ItoR(ECX, 6);
+ AND32ItoR(ECX, 3);
+ SHL32ItoR(ECX, 2);
+ ADD32RtoR(ECX, EAX);
+ MOV32RmtoR(ECX, ECX);
+ MOV32RtoRm(EBX, ECX);
+
+ SUB32ItoR(EBX, 12);
+
+ SSE_MOVUPSRmtoR(to, EBX);
+}
+
+void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
+ /* XXX? */
+ MOV64ItoR(to, (uptr)&p);
+ SSE_MOVUPSRtoRm(to, from);
+ MOV32RmtoR(to, to);
+}
+
+#ifndef __x86_64__
+extern void SetFPUstate();
+extern void _freeMMXreg(int mmxreg);
+#endif
+
+void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
+#ifndef __x86_64__
+ SetFPUstate();
+ _freeMMXreg(7);
+#endif
+ SSE_MOVAPS_XMM_to_M128((uptr)f, from);
+
+ FLD32((uptr)&f[0]);
+ FISTP32((uptr)&p2[0]);
+ FLD32((uptr)&f[1]);
+ FISTP32((uptr)&p2[1]);
+ FLD32((uptr)&f[2]);
+ FISTP32((uptr)&p2[2]);
+ FLD32((uptr)&f[3]);
+ FISTP32((uptr)&p2[3]);
+
+ SSE_MOVAPS_M128_to_XMM(to, (uptr)p2);
+}
+
+void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) {
+#ifndef __x86_64__
+ SetFPUstate();
+ _freeMMXreg(7);
+#endif
+ FILD32(from);
+ FSTP32((uptr)&f[0]);
+ FILD32(from+4);
+ FSTP32((uptr)&f[1]);
+ FILD32(from+8);
+ FSTP32((uptr)&f[2]);
+ FILD32(from+12);
+ FSTP32((uptr)&f[3]);
+
+ SSE_MOVAPS_M128_to_XMM(to, (uptr)f);
+}
+
+void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) {
+ /* XXX? */
+ MOV64ItoR(EAX, (uptr)&p);
+ SSE_MOVUPSRtoRm(EAX, from);
+ MOV32RmtoR(EAX, EAX);
+ MOV32RtoM(to, EAX);
+}
+
+void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) {
+ MOV32ItoM((uptr)p+4, 0);
+ MOV32ItoM((uptr)p+8, 0);
+ MOV32RtoM((uptr)p, from);
+ MOV32ItoM((uptr)p+12, 0);
+ SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
+}
+
+#endif