#include #include #include #include #include "snes9x.h" #include "cpuexec.h" #include "sa1.h" #include "apu.h" #include "arm_dynarec/armfn.h" #include "arm_dynarec/armgen.h" #include "arm_dynarec/dynaexec.h" #define BUFFER_SIZE (6 << 20) #define BUFFER_EXTRA (1 << 20) #define BLOCK_SIZE 0x1000 #ifndef NDEBUG int trace[128]; int traceidx; int BreakPC; int BreakOpcode; #endif enum SNESRegArg { ArgNULL, ArgA, ArgX, ArgY, ArgS, ArgD, ArgDB, ArgPB, ArgP, ArgZ, }; enum OpcodeFlag { OFNone = 0, OFBreak = 1 << 0, OFSectionUpdate = 1 << 1, }; #define V(val) val##8, val##16 enum OpAddrMode { AddrNone, V(AddrRegister), V(AddrImmediate), AddrImmediate24, V(AddrZeroPage), V(AddrIndirect), V(AddrIndirectX), V(AddrIndirectY), V(AddrIndirectS), V(AddrIndirectFar), V(AddrAbsolute), V(AddrLong), }; enum OpFunction { V(FnMOV), V(FnMOVSP), V(FnLD), V(FnST), V(FnPUSH), FnPEA, FnPER, V(FnPOP), FnMVN, FnMVP, V(FnOR), V(FnAND), V(FnEOR), V(FnADC), V(FnSBC), V(FnCMP), V(FnBIT), V(FnINC), V(FnDEC), V(FnTSB), V(FnTRB), V(FnASL), V(FnLSR), V(FnROL), V(FnROR), FnBRA, FnBRL, FnJMP, FnJML, FnJSR, FnJSL, FnRTI, FnRTL, FnRTS, FnBPL, FnBMI, FnBVC, FnBVS, FnBCC, FnBCS, FnBZC, FnBZS, FnBRK, FnCOP, FnCLC, FnCLI, FnCLD, FnCLV, FnSEC, FnSEI, FnSED, FnREP, FnSEP, FnXCE, FnSTP, FnXBA, FnWAI, FnWDM, FnNOP, }; #undef V #define CHECK_8_16(field, value) (in->field == value##8 || in->field == value##16 ) #define CHECK_FIELD(field, value) (in->field == value) #define IS_VOP(value) (CHECK_8_16(Op, Fn##value)) #define IS_OP(value) (CHECK_FIELD(Op, Fn##value)) #define IS_VADDRMODE(value) (CHECK_8_16(AddrMode, Addr##value)) #define IS_ADDRMODE(value) (CHECK_FIELD(AddrMode, Addr##value)) #define CASE_VOP(value) case Fn##value##8: case Fn##value##16 #define CASE_OP(value) case Fn##value enum ChecksFlags { CheckWFI = 1, }; struct Instruction; typedef struct Instruction { void (*S9xOpcode)(void); void (*Emitter)(struct Instruction *); uint8_t *PC; uint8_t Opcode; enum OpAddrMode AddrMode; enum OpFunction Op; enum SNESRegArg Arg1; enum SNESRegArg Arg2; uint16_t OutFlags; uint16_t InFlags; bool SectionUpdate; bool ShouldBreak; } Instruction; uint32_t CacheSection; static uint8_t *CachePtr; static uint8_t *Cache; static uint8_t **Sections[13 * 0x1000]; /* Max: Decimal | Emulation = 13 */ #ifndef NDEBUG struct { uint32_t Emits; uint32_t Finds; uint32_t InterpretedFinds; bool InterpretedBlock; uint32_t Ops; uint32_t InterpretedOps; uint32_t ContextSwitches; uint32_t GetSets; uint32_t SlowGetSets; uint32_t OpCounts[0x100]; uint32_t InterpretedOpCounts[0x100]; uint32_t InterpretedBlockStarts[0x100]; } Metrics; void MetricsReset(void) { memset(&Metrics, 0, sizeof(Metrics)); } void MetricsPrint(void) { int i; printf("Cache hit rate: %f\n", (float)(Metrics.Finds - Metrics.Emits) / Metrics.Finds); printf("Avg. instructions per find: %f\n", (float)Metrics.Ops / Metrics.Finds); printf("Counts by instruction: \n"); for (i = 0; i < sizeof(Metrics.OpCounts) / sizeof(Metrics.OpCounts[0]); i++) { printf("0x%X: %d\n", i, Metrics.OpCounts[i]); } printf("Interpreted counts by instruction: \n"); for (i = 0; i < sizeof(Metrics.InterpretedOpCounts) / sizeof(Metrics.InterpretedOpCounts[0]); i++) { if (Metrics.InterpretedOpCounts[i] > 0) printf("0x%X: %d\n", i, Metrics.InterpretedOpCounts[i]); } printf("Interpreted block starts by instruction: \n"); for (i = 0; i < sizeof(Metrics.InterpretedBlockStarts) / sizeof(Metrics.InterpretedBlockStarts[0]); i++) { if (Metrics.InterpretedBlockStarts[i] > 0) printf("0x%X: %d\n", i, Metrics.InterpretedBlockStarts[i]); } } #endif uint8_t **FetchBlock(uint16_t pc); static void *CacheAlloc(size_t size) { uint8_t *prev = CachePtr; CachePtr += size; return (void *)prev; } static void CacheEmpty(void) { memset(Cache, 0, BUFFER_SIZE + BUFFER_EXTRA); memset(Sections, 0, sizeof(Sections)); CachePtr = Cache; } static uint8_t **FindBlock(uint16_t pc) { /* Section is D/E/X/M flags, PB, and top 4 bits of PC */ uint32_t section = (CacheSection | ((pc & 0xF000) >> 12)); uint8_t **block = Sections[section]; pc &= ~0xF000; if (!block || !block[pc]) { if (CachePtr >= Cache + BUFFER_SIZE) { CacheEmpty(); printf("Reset translation cache\n"); block = NULL; } if (!block) block = Sections[section] = CacheAlloc(BLOCK_SIZE * sizeof(uint8_t *)); } return &block[pc]; } #ifndef NDEBUG void CheckInstruction(uint8_t opcode, uint8_t *pc, bool interpreted) { Metrics.Ops++; Metrics.OpCounts[opcode]++; if (interpreted) { Metrics.InterpretedOps++; Metrics.InterpretedOpCounts[opcode]++; if (!Metrics.InterpretedBlock) { Metrics.InterpretedBlock = true; Metrics.InterpretedFinds++; Metrics.InterpretedBlockStarts[opcode]++; } } if (CPU.PC - CPU.PCBase == (BreakPC & 0xffff) && ICPU.Registers.PB == ((BreakPC >> 16) & 0xFF)) DynaBreak(); if (*CPU.PC == BreakOpcode) DynaBreak(); if (pc != CPU.PC) { fprintf(stderr, "Incorrect PC: Expected 0x%X, was 0x%X\n", (uintptr_t)pc, (uintptr_t)CPU.PC); DynaBreak(); } if (opcode != *CPU.PC) { fprintf(stderr, "Incorrect opcode: Expected 0x%X, was 0x%X\n", opcode, *CPU.PC); DynaBreak(); } } #endif static void EmitChecks(enum ChecksFlags flags) { if (Settings.SA1) { GenARM_MOV32_Imm(CondAL, RegR0, (uintptr_t)&SA1); GenARM_LDRB_Imm(CondAL, RegR0, RegR0, offsetof(__typeof__(SA1), Executing)); GenARM_CMP_Imm(CondAL, RegR0, ENCODE_IMM(0, 0)); GenARM_BL(CondNE, PCOFFSET(S9xCallSA1MainLoop)); } GenARM_ANDS_Imm(CondAL, RegR0, RegChecks, ENCODE_IMM(0x88, 4)); /* 0x880 = NMI_FLAG | IRQ_PENDING_FLAG */ GenARM_CMP_Reg(CondEQ, RegCycles, ENCODE_SHIFT(ShiftLSR, 16), RegChecks); GenARM_ORRS_Imm(CondHS, RegR0, RegR0, ENCODE_IMM(1, 0)); if (flags & CheckWFI) GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksWFI)); else GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksNoWFI)); } static void PatchJump(uint32_t *source, uint8_t *target) { *source = ((*source & 0xF0000000) | (0xA << 24) | ((PCREL(source, target) >> 2) & 0xFFFFFF)); } static uint8_t *HandleFlags(bool checkWFI) { uint8_t *NewPC = NULL; if (CPU.Flags & NMI_FLAG) { if (--CPU.NMICycleCount == 0) { CPU.Flags &= ~NMI_FLAG; if (checkWFI && CPU.WaitingForInterrupt) { CPU.WaitingForInterrupt = false; CPU.PC++; } S9xOpcode_NMI(); UpdateSection(); NewPC = CPU.PC; } } if (CPU.Flags & IRQ_PENDING_FLAG) { if (CPU.IRQCycleCount == 0) { if (checkWFI && CPU.WaitingForInterrupt) { CPU.WaitingForInterrupt = false; NewPC = CPU.PC++; } if (CPU.IRQActive && !Settings.DisableIRQ) { if (!CheckFlag(IRQ)) { S9xOpcode_IRQ(); UpdateSection(); NewPC = CPU.PC; } } else CPU.Flags &= ~IRQ_PENDING_FLAG; } else if (--CPU.IRQCycleCount == 0 && CheckFlag(IRQ)) CPU.IRQCycleCount = 1; } return NewPC; } uint8_t *HandleFlagsNoWFI(void) { return HandleFlags(false); } uint8_t *HandleFlagsWFI(void) { return HandleFlags(true); } static int HandleHBlank(bool SFX) { #ifndef NDEBUG Metrics.ContextSwitches++; #endif #ifndef USE_BLARGG_APU if (CPU.WhichEvent == HBLANK_END_EVENT) while (IAPU.APUExecuting && APU.Cycles <= CPU.Cycles) APU_EXECUTE1(); #endif if (SFX) S9xDoHBlankProcessing_SFX(); else S9xDoHBlankProcessing_NoSFX(); #ifdef LAGFIX if(finishedFrame) return 3; #endif if (CPU.Flags & SCAN_KEYS_FLAG) return 1; return 0; } int HandleHBlankSFX(void) { return HandleHBlank(true); } int HandleHBlankNoSFX(void) { return HandleHBlank(false); } uint8_t *PatchJumpDirectChecks(uint8_t *PC, uint32_t *source) { uint8_t *dest = *FetchBlock(PC - CPU.PCBase); uint8_t *checks = CachePtr; EmitChecks(0); GenARM_B(CondAL, PCOFFSET(dest)); __clear_cache(checks, CachePtr); PatchJump(source, checks); __clear_cache(source, source+1); return checks; } uint8_t *PatchJumpDirect(uint8_t *PC, uint32_t *source) { uint8_t *dest = *FetchBlock(PC - CPU.PCBase); PatchJump(source, dest); __clear_cache(source, source+1); return dest; } void DynaCPUShutdown(void) { /* Don't skip cycles with a pending NMI or IRQ - could cause delayed * interrupt. Interrupts are delayed for a few cycles already, but * the delay could allow the shutdown code to cycle skip again. * Was causing screen flashing on Top Gear 3000. */ if (CPU.WaitCounter == 0 && !(CPU.Flags & (IRQ_PENDING_FLAG | NMI_FLAG))) { CPU.WaitAddress = NULL; #ifndef USE_BLARGG_APU CPU.Cycles = CPU.NextEvent; if (IAPU.APUExecuting) { ICPU.CPUExecuting = false; do { APU_EXECUTE1(); } while (APU.Cycles < CPU.NextEvent); ICPU.CPUExecuting = true; } #endif } else if (CPU.WaitCounter >= 2) CPU.WaitCounter = 1; else CPU.WaitCounter--; } void DynaWAIShutdown(void) { CPU.Cycles = CPU.NextEvent; #ifndef USE_BLARGG_APU if (IAPU.APUExecuting) { ICPU.CPUExecuting = false; do { APU_EXECUTE1(); } while (APU.Cycles < CPU.NextEvent); ICPU.CPUExecuting = true; } #endif } static void EmitFlushCyclesPC(int *dpc, int *dcycles) { if (*dpc != 0) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(*dpc, 0)); if (*dcycles != 0) GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(*dcycles, 0)); dpc = 0; dcycles = 0; } /* a1 = value, a4 = sp, preserves a4 */ static void EmitPush(Instruction *in, bool n8) { enum ARMReg RegS = RegA4; /* Preserved during memory ops */ if (n8) GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); else GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); if (CheckEmulation()) { GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); } GenARM_ADD_Imm(CondAL, RegA2, RegS, ENCODE_IMM(1, 0)); GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(0xFF, 16)); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); } else { GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); } } /* a1 = value, a4 = sp, preserves a4 */ static void EmitPop(Instruction *in, bool n8) { enum ARMReg RegS = RegA4; /* Preserved during memory ops */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); if (n8) GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); else GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); if (CheckEmulation()) { GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); } if (n8) { GenARM_MOV_Reg(CondAL, RegA1, 0, RegS); GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); } else { GenARM_SUB_Imm(CondAL, RegA1, RegS, ENCODE_IMM(1, 0)); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); } } static enum ARMReg EmitLoadRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg RegMEM, bool read) { enum ARMReg reg; switch(arg) { case ArgA: reg = RegA; break; case ArgX: reg = RegX; break; case ArgY: reg = RegY; break; case ArgP: reg = RegP; break; case ArgD: if (read) GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.D)); reg = RegMEM; break; case ArgS: if (read) GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.S)); reg = RegMEM; break; case ArgDB: if (read) GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.DB)); reg = RegMEM; break; case ArgPB: if (read) GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.PB)); reg = RegMEM; break; default: fprintf(stderr, "Unhandled SNES Register: %d\n", arg); reg = -1; break; } return reg; } /* Preserves reg, modifies R3 */ static void EmitStoreRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg reg, bool n8) { switch(arg) { case ArgA: if (reg != RegA) { if (n8) GenARM_MOVB_Reg(CondAL, RegA, reg); else GenARM_UXTH_Reg(CondAL, RegA, reg); } break; case ArgX: if (reg != RegX) { if (n8) GenARM_MOVB_Reg(CondAL, RegX, reg); else GenARM_UXTH_Reg(CondAL, RegX, reg); } break; case ArgY: if (reg != RegY) { if (n8) GenARM_MOVB_Reg(CondAL, RegY, reg); else GenARM_UXTH_Reg(CondAL, RegY, reg); } break; case ArgP: { uint8_t *branch; GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); GenARM_EOR_Reg(CondAL, reg, RegP, 0, reg); GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(MemoryFlag | IndexFlag, 0)); GenARM_BL(CondAL, PCOFFSET(S9xCallUnpackStatusFast)); GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(IndexFlag, 0)); GenARM_BIC_Imm(CondNE, RegX, RegX, ENCODE_IMM(0xFF, 8)); GenARM_BIC_Imm(CondNE, RegY, RegY, ENCODE_IMM(0xFF, 8)); /* Update interpreter lookup table and CacheSection if processor flags changed */ GenARM_EOR_Reg(CondAL, reg, reg, 0, RegP); GenARM_TST_Imm(CondAL, reg, ENCODE_IMM((Emulation | MemoryFlag | IndexFlag | Decimal) >> 2, 2)); branch = CachePtr; GenARM_B(CondEQ, PCOFFSET(branch)); GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); GenARM_BL(CondAL, PCOFFSET(S9xCallFixCycles)); PatchJump((uint32_t *)branch, CachePtr); break; } case ArgD: GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.D)); break; case ArgDB: GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.DB)); GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); break; case ArgPB: GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedPB)); break; case ArgS: if (CheckEmulation()) { GenARM_BIC_Imm(CondAL, RegA3, reg, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Imm(CondAL, RegA3, RegA3, ENCODE_IMM(1, 8)); GenARM_STRH_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.S)); } else { GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.S)); } break; default: fprintf(stderr, "Unhandled SNES Register: %d\n", arg); break; } } static uint16_t EmitLoadLiteral(Instruction *in, bool n8, int *dpc, int *dcycles) { uint16_t literal; *dcycles += CPU.MemSpeed; *dpc += 1; literal = in->PC[1]; if (!n8) { *dcycles += CPU.MemSpeed; *dpc += 1; literal |= in->PC[2] << 8; } return literal; } static int32_t EmitLoadRelative(Instruction *in, bool n8, int *dpc, int *dcycles) { int32_t disp; int32_t pc; int32_t target; bool overflow; if (n8) { disp = (int8_t)in->PC[1]; pc = (int32_t)((in->PC + 2) - CPU.PCBase); (*dpc)++; *dcycles += CPU.MemSpeed; } else { disp = (int16_t)(in->PC[1] | (in->PC[2] << 8)); pc = (int32_t)((in->PC + 3) - CPU.PCBase); *dpc += 2; *dcycles += CPU.MemSpeedx2; } target = (pc + disp) & 0xFFFF; overflow = target != (pc + disp); if (overflow) disp = (target - pc); return disp; } /* Stores result in A1 for reads, A2 for writes, because that is where * S9XRead / S9xWrite look for address. Modifies the other. Reads also * write over A4. */ static void EmitAddrZeroPage(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { enum ARMReg rd = read ? RegA1 : RegA2; enum ARMReg scratch = read ? RegA2 : RegA1; *dpc += 1; *dcycles += CPU.MemSpeed; GenARM_MOV_Imm(CondAL, rd, ENCODE_IMM(in->PC[1], 0)); if (offsetReg == ArgS) GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.S)); else GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.D)); if (offsetReg == ArgX) GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); else if (offsetReg == ArgY) GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); if (offsetReg == ArgX || offsetReg == ArgY || offsetReg == ArgS) *dcycles += ONE_CYCLE; if (read) { /* Set OpenBus to in->PC[1] */ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[1], 0)); GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); } GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); if (CheckEmulation() && (offsetReg == ArgX || offsetReg == ArgY)) { GenARM_UXTB_Reg(CondAL, rd, rd); *dcycles += ONE_CYCLE; } else { GenARM_UXTH_Reg(CondAL, rd, rd); } } /* Stores result in A1 for reads, A2 for writes, because that is where * S9XRead / S9xWrite look for address. Modifies the other. Reads also * write over A4. */ static void EmitAddrAbsolute(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { enum ARMReg rd = read ? RegA1 : RegA2; enum ARMReg scratch = read ? RegA2 : RegA1; *dpc += 2; *dcycles += CPU.MemSpeedx2; GenARM_MOV32_Imm(CondAL, scratch, in->PC[1] + (in->PC[2] << 8)); GenARM_LDR_Imm(CondAL, rd, RegCPU, ICPU_OFFSET(ShiftedDB)); if (offsetReg == ArgX) GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegX); else if (offsetReg == ArgY) GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegY); if (read) { /* Set OpenBus to in->PC[2] */ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); } GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); } /* Stores result in A1 for reads, A2 for writes, because that's where * S9XRead / S9xWrite look for address. Reads also write over A4. */ static void EmitAddrLong(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { enum ARMReg rd = read ? RegA1 : RegA2; *dpc += 3; *dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; GenARM_MOV32_Imm(CondAL, rd, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); if (offsetReg == ArgX) { GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); GenARM_BIC_Imm(CondAL, rd, rd, ENCODE_IMM(0xFF, 24)); } if (read) { /* Set OpenBus to in->PC[2] */ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); } } /* Stores result in A1 for reads, A2 for writes, because that's where * S9XRead / S9xWrite look for address. Writes over A4. */ static void EmitAddrIndirect(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { enum ARMReg rd = read ? RegA1 : RegA2; enum ARMReg scratch = read ? RegA2 : RegA1; enum SNESRegArg ZPArg = ArgNULL; if (offsetReg == ArgX) ZPArg = ArgX; else if (IS_VADDRMODE(IndirectS)) ZPArg = ArgS; EmitAddrZeroPage(true, in, ZPArg, dpc, dcycles); GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); if (rd != RegA1) GenARM_MOV_Reg(CondAL, rd, 0, RegA1); if (offsetReg == ArgY) { GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); } if (IS_VADDRMODE(IndirectS)) *dcycles += ONE_CYCLE; GenARM_LDR_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(ShiftedDB)); GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); if (read) { /* Set OpenBus to last byte fetched */ GenARM_MOV_Reg(CondAL, RegA4, ENCODE_SHIFT(ShiftLSR, 8), rd); GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); } } /* Stores result in A1 for reads, A2 for writes, because that's where * S9XRead / S9xWrite look for address. Writes over A4. */ static void EmitAddrIndirectFar(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { enum ARMReg rd = read ? RegA1 : RegA2; EmitAddrZeroPage(true, in, ArgNULL, dpc, dcycles); GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); /* GetFast preserves A4 */ GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); if (offsetReg == ArgY) { GenARM_ADD_Reg(CondAL, RegA4, RegA4, 0, RegY); } if (read) { /* Set OpenBus to last byte fetched */ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); } GenARM_ADD_Reg(CondAL, rd, RegA4, ENCODE_SHIFT(ShiftLSL, 16), RegA1); } static void EmitGetAddress(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) { switch(in->AddrMode) { case AddrZeroPage8: case AddrZeroPage16: EmitAddrZeroPage(read, in, offsetReg, dpc, dcycles); break; case AddrAbsolute8: case AddrAbsolute16: EmitAddrAbsolute(read, in, offsetReg, dpc, dcycles); break; case AddrLong8: case AddrLong16: EmitAddrLong(read, in, offsetReg, dpc, dcycles); break; case AddrIndirect8: case AddrIndirect16: case AddrIndirectX8: case AddrIndirectX16: case AddrIndirectY8: case AddrIndirectY16: case AddrIndirectS8: case AddrIndirectS16: EmitAddrIndirect(read, in, offsetReg, dpc, dcycles); break; case AddrIndirectFar8: case AddrIndirectFar16: EmitAddrIndirectFar(read, in, offsetReg, dpc, dcycles); break; default: break; } } /* Returns loaded register, leaves address in RegA2 for storing */ static enum ARMReg EmitLoadMemRegister(Instruction *in, enum ARMReg RegMEM, bool n8, int *dpc, int *dcycles) { EmitGetAddress(true, in, in->Arg2, dpc, dcycles); GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); if (n8) GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); else GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_MOV_Reg(CondAL, RegMEM, 0, RegA1); return RegMEM; } /* Assumes address is in RegA2 */ static void EmitStoreMemRegister(enum ARMReg RegMEM, bool n8) { GenARM_MOV_Reg(CondAL, RegA1, 0, RegMEM); if (n8) GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); else GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); } /* Expects Z to be set in ARM flags, HS to be set in ARM if CarryHS is true (SBC, CMP) */ static void EmitSetFlags(enum ARMReg reg, uint32_t flags, bool n8, bool CarryHS) { if (flags == 0) return; GenARM_BIC_Imm(CondAL, RegP, RegP, flags); if (flags & Zero) GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); if ((flags & Carry) && CarryHS) GenARM_ORR_Imm(CondHS, RegP, RegP, Carry); if (n8) { if (flags & Negative) { GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 6)); GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); } if ((flags & Carry) && !CarryHS) { GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 8)); GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); } } else { if (flags & Negative) { GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 14)); GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); } if ((flags & Carry) && !CarryHS) { GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 16)); GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); } } } static void EmitLD(enum ARMReg reg, bool n8, int *dpc, int *dcycles) { /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(dpc, dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_MOVBS_Reg(CondAL, reg, RegR0, RegR0); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_MOVS_Reg(CondAL, reg, 0, RegR0); } } static void EmitST(enum SNESRegArg reg, bool n8) { switch(reg) { case ArgA: GenARM_MOV_Reg(CondAL, RegA1, 0, RegA); break; case ArgX: GenARM_MOV_Reg(CondAL, RegA1, 0, RegX); break; case ArgY: GenARM_MOV_Reg(CondAL, RegA1, 0, RegY); break; case ArgZ: GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); break; default: break; } if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); } else { GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); } } static void EmitBranch(Instruction *in, uint32_t flag, bool set, int *dpc, int *dcycles) { int32_t disp = EmitLoadRelative(in, true, dpc, dcycles); EmitFlushCyclesPC(dpc, dcycles); /* Interpreter runs BranchCheck here. Only when APU is disabled * until next reset. So cost of the load seems not worth it, unless * games break. */ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); /* If condition doesn't match, jump to next instruction */ if (set) GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); else GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); if (Settings.Shutdown) { GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); } if (disp > 0) { if (disp & 0xFF) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } else if (disp < 0) { disp = -disp; if (disp & 0xFF) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } if (Settings.Shutdown) { GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); } EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpDirect)); } static void EmitNativeCall(Instruction *in) { int dpc = 1; int dcycles = CPU.MemSpeed; bool didBreak = false; bool n8; switch(in->Op) { CASE_VOP(MOV): CASE_VOP(MOVSP): { enum ARMReg RegSRC; enum ARMReg RegDST; n8 = (in->Op == FnMOV8) || (in->Op == FnMOVSP8); dcycles += ONE_CYCLE; RegDST = EmitLoadRegister(in, in->Arg1, RegA2, false); RegSRC = EmitLoadRegister(in, in->Arg2, RegA3, true); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_MOVBS_Reg(CondAL, RegDST, RegA1, RegSRC); } else { GenARM_UXTHS_Reg(CondAL, RegDST, RegSRC); } EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); EmitStoreRegister(in, in->Arg1, RegDST, n8); EmitChecks(0); break; } CASE_VOP(LD): { enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); n8 = in->Op == FnLD8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BIC_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(0xFF, 0)); GenARM_ORR_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(literal, 0)); if (in->OutFlags & Zero) GenARM_ANDS_Imm(CondAL, RegA1, RegDST, ENCODE_IMM(literal, 0)); } else { GenARM_MOV32_Imm(CondAL, RegDST, literal); if (in->OutFlags & Zero) GenARM_ANDS_Reg(CondAL, RegDST, RegDST, 0, RegDST); } } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); EmitLD(RegDST, n8, &dpc, &dcycles); } EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); EmitChecks(0); break; } CASE_VOP(ST): { n8 = in->Op == FnST8; EmitGetAddress(false, in, in->Arg2, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); EmitST(in->Arg1, n8); EmitChecks(0); break; } CASE_VOP(PUSH): { enum ARMReg RegSRC; n8 = in->Op == FnPUSH8; if (in->Arg1 == ArgNULL) { RegSRC = EmitLoadMemRegister(in, RegA1, n8, &dpc, &dcycles); } else { RegSRC = EmitLoadRegister(in, in->Arg1, RegA1, true); dcycles += ONE_CYCLE; } GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); if (RegSRC != RegA1) GenARM_MOV_Reg(CondAL, RegA1, 0, RegSRC); EmitFlushCyclesPC(&dpc, &dcycles); EmitPush(in, n8); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitChecks(0); break; } CASE_OP(PEA): { uint16_t literal = EmitLoadLiteral(in, false, &dpc, &dcycles); GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_MOV32_Imm(CondAL, RegA1, literal); EmitFlushCyclesPC(&dpc, &dcycles); EmitPush(in, false); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitChecks(0); break; } CASE_OP(PER): { uint16_t literal = (in->PC + 3 - CPU.PCBase + in->PC[1] + (in->PC[2] << 8)) & 0xffff; dcycles += CPU.MemSpeedx2 + ONE_CYCLE; dpc += 2; GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_MOV32_Imm(CondAL, RegA1, literal); EmitFlushCyclesPC(&dpc, &dcycles); EmitPush(in, false); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitChecks(0); break; } CASE_VOP(POP): { n8 = in->Op == FnPOP8; dcycles += TWO_CYCLES; GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitPop(in, n8); EmitFlushCyclesPC(&dpc, &dcycles); if (in->OutFlags & Zero) GenARM_MOVS_Reg(CondAL, RegA1, 0, RegA1); /* Set ZF on A1 */ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); EmitStoreRegister(in, in->Arg1, RegA1, n8); EmitChecks(0); break; } CASE_OP(MVP): CASE_OP(MVN): { uint8_t *start; dpc += 2; dcycles += CPU.MemSpeedx2 + TWO_CYCLES; /* Load DD */ /* Set DB / ShiftedDB */ GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->PC[1], 0)); GenARM_STRB_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.DB)); GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), RegA3); GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); /* A3 = DD << 16 */ /* Load SS */ /* Set OpenBus to SS */ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); /* A4 = SS << 16 */ start = CachePtr; GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->PC[2], 16)); GenARM_ORR_Reg(CondAL, RegA1, RegA1, 0, RegX); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); /* SetByte -- A1 val, A2 addr */ GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[1], 16)); GenARM_ORR_Reg(CondAL, RegA2, RegA2, 0, RegY); GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); if (in->Op == FnMVN) { if (CheckIndex()) { GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); GenARM_MOVB_Reg(CondAL, RegX, RegA1); GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); GenARM_MOVB_Reg(CondAL, RegY, RegA1); } else { GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegX, RegA1); GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegY, RegA1); } } else { if (CheckIndex()) { GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); GenARM_MOVB_Reg(CondAL, RegX, RegA1); GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); GenARM_MOVB_Reg(CondAL, RegY, RegA1); } else { GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegX, RegA1); GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegY, RegA1); } } GenARM_CMP_Imm(CondAL, RegA, ENCODE_IMM(0, 0)); GenARM_SUB_Imm(CondAL, RegA1, RegA, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegA, RegA1); GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(dpc, 0)); EmitChecks(0); GenARM_B(CondAL, PCOFFSET(start)); didBreak = true; break; } CASE_VOP(OR): { n8 = in->Op == FnOR8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_ORR_Imm(CondAL, RegA, RegA, ENCODE_IMM(literal, 0)); GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); } else { GenARM_MOV32_Imm(CondAL, RegR0, literal); GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); } } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegR0); GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); } } EmitChecks(0); break; } CASE_VOP(AND): { n8 = in->Op == FnAND8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_ANDS_Imm(CondAL, RegA1, RegA, ENCODE_IMM(literal, 0)); GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); } else { GenARM_MOV32_Imm(CondAL, RegA1, literal); GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); } } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_ANDS_Reg(CondAL, RegA1, RegA, 0, RegA1); GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); } } EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); EmitChecks(0); break; } CASE_VOP(EOR): { n8 = in->Op == FnEOR8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_AND_Imm(CondAL, RegA1, RegA, ENCODE_IMM(0xFF, 0)); GenARM_EORS_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(literal, 0)); GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); } else { GenARM_MOV32_Imm(CondAL, RegA1, literal); GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); } } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_AND_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); GenARM_EORS_Reg(CondAL, RegA1, RegA1, 0, RegA2); GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); } } EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); EmitChecks(0); break; } CASE_VOP(ADC): { enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); n8 = in->Op == FnADC8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_MOV32_Imm(CondAL, RegA1, literal); } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); else GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); } GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); if (CheckDecimal()) { uint32_t shift; for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) { GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0xA, shift)); GenARM_ADD_Imm(CondHS, RegA3, RegA3, ENCODE_IMM(0x6, shift)); } if (n8) { GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); } else { GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); } } else { GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); if (n8) { GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); } else { GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); } } EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Carry | Overflow), n8, false); if (in->OutFlags & Overflow) { /* Calculate overflow flag */ if (CheckDecimal()) { GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, n8 ? 8 : 16)); GenARM_SUB_Imm(CondGE, RegA3, RegA3, ENCODE_IMM(0x60, n8 ? 0 : 8)); } GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); if (n8) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); } else { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); } GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); } EmitChecks(0); break; } CASE_VOP(SBC): { enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); n8 = in->Op == FnSBC8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_MOV32_Imm(CondAL, RegA1, literal); } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); else GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); } GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); if (n8) GenARM_UXTB_Reg(CondAL, RegA1, RegA1); else GenARM_UXTH_Reg(CondAL, RegA1, RegA1); GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); if (CheckDecimal()) { uint32_t shift; for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) { GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0x10, shift)); GenARM_SUB_Imm(CondLO, RegA3, RegA3, ENCODE_IMM(0x6, shift)); } if (n8) { GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); } else { GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); } } else { GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); if (n8) { GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); } else { GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); } } EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Overflow), n8, false); if (in->OutFlags & Carry) { GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); if (n8) GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 8)); else GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 16)); GenARM_ORR_Imm(CondGE, RegP, RegP, ENCODE_IMM(Carry, 0)); } if (in->OutFlags & Overflow) { /* Calculate overflow flag */ GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); if (n8) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); } else { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); } GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); } EmitChecks(0); break; } CASE_VOP(CMP): { enum ARMReg RegW = EmitLoadRegister(in, in->Arg1, RegR0, true); n8 = in->Op == FnCMP8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); GenARM_SUBS_Imm(CondAL, RegR0, RegR1, ENCODE_IMM(literal, 0)); } else { GenARM_MOV32_Imm(CondAL, RegR0, literal); GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); } } else { EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); GenARM_SUBS_Reg(CondAL, RegR0, RegR1, 0, RegR0); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); } } EmitSetFlags(RegR0, in->OutFlags & (Carry | Zero | Negative), n8, true); EmitChecks(0); break; } CASE_VOP(BIT): { n8 = in->Op == FnBIT8; if (IS_VADDRMODE(Immediate)) { uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_TST_Imm(CondAL, RegA, ENCODE_IMM(literal, 0)); } else { GenARM_MOV32_Imm(CondAL, RegA1, literal); GenARM_TST_Reg(CondAL, RegA, 0, RegA1); } EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); } else { uint32_t flags; EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_TST_Reg(CondAL, RegA, 0, RegA1); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_TST_Reg(CondAL, RegA, 0, RegA1); } flags = in->OutFlags & (Zero | Negative | Overflow); if (flags == 0) return; GenARM_BIC_Imm(CondAL, RegP, RegP, flags); if (flags & Zero) GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); if (n8) { if (flags & Negative) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); } if ((flags & Overflow)) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 0)); GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); } } else { if (flags & Negative) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); } if ((flags & Overflow)) { GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 8)); GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); } } } EmitChecks(0); break; } CASE_VOP(INC): CASE_VOP(DEC): { enum ARMReg RegW; n8 = in->Op == FnINC8 || in->Op == FnDEC8; if (in->Arg1 == ArgNULL) RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); else RegW = EmitLoadRegister(in, in->Arg1, RegA2, true); /* Add cycles */ dcycles += ONE_CYCLE; /* Clear WaitAddress */ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_SHIFT(0, 0)); GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); EmitFlushCyclesPC(&dpc, &dcycles); if (IS_VOP(INC)) GenARM_ADD_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); else GenARM_SUB_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); if (n8) { GenARM_MOVBS_Reg(CondAL, RegW, RegA1, RegA1); } else { GenARM_UXTHS_Reg(CondAL, RegW, RegA1); } EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); if (in->Arg1 == ArgNULL) EmitStoreMemRegister(RegW, n8); EmitChecks(0); break; } CASE_VOP(TSB): CASE_VOP(TRB): { n8 = in->Op == FnTSB8 || in->Op == FnTRB8; EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); dcycles += ONE_CYCLE; /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_AND_Imm(CondAL, RegA4, RegA, ENCODE_IMM(0xFF, 0)); GenARM_TST_Reg(CondAL, RegA4, 0, RegA1); EmitSetFlags(RegA4, in->OutFlags & (Zero), n8, false); if (IS_VOP(TSB)) { GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA1); } else { GenARM_MVN_Reg(CondAL, RegA4, 0, RegA4); GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); } GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); } else { GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_TST_Reg(CondAL, RegA, 0, RegA1); EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); if (IS_VOP(TSB)) { GenARM_ORR_Reg(CondAL, RegA1, RegA, 0, RegA1); } else { GenARM_MVN_Reg(CondAL, RegA4, 0, RegA); GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); } GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); } EmitChecks(0); break; } CASE_VOP(ASL): { enum ARMReg RegW; n8 = in->Op == FnASL8; if (in->Arg1 == ArgNULL) RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); else RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); /* Add cycles */ dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); if (n8) { GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); } else { GenARM_UXTHS_Reg(CondAL, RegW, RegA1); } EmitSetFlags(RegA1, in->OutFlags & (Carry | Zero | Negative), CheckMemory(), false); if (in->Arg1 == ArgNULL) EmitStoreMemRegister(RegW, n8); EmitChecks(0); break; } CASE_VOP(LSR): { enum ARMReg RegW; n8 = in->Op == FnLSR8; if (in->Arg1 == ArgNULL) RegW = EmitLoadMemRegister(in, RegA3, n8, &dpc, &dcycles); else RegW = EmitLoadRegister(in, in->Arg1, RegA3, true); /* Add cycles */ dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); GenARM_BIC_Imm(CondAL, RegW, RegW, ENCODE_IMM(0xFF, 0)); GenARM_ORR_Reg(CondAL, RegW, RegW, 0, RegA1); } else { GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); } /* Flags easier to set inline here */ GenARM_BIC_Imm(CondAL, RegP, RegP, in->OutFlags & (Carry | Zero | Negative)); /* Negative is always unset */ if (in->OutFlags & Zero) GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); if (in->OutFlags & Carry) GenARM_ORR_Imm(CondCS, RegP, RegP, Carry); if (in->Arg1 == ArgNULL) EmitStoreMemRegister(RegW, n8); EmitChecks(0); break; } CASE_VOP(ROL): { enum ARMReg RegW; n8 = in->Op == FnROL8; if (in->Arg1 == ArgNULL) RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); else RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); /* Add cycles */ dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 0)); if (in->OutFlags & Carry) { GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(1, n8 ? 8 : 16)); GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Carry, 0)); } if (n8) { GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); } else { GenARM_UXTHS_Reg(CondAL, RegW, RegA1); } EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); if (in->Arg1 == ArgNULL) EmitStoreMemRegister(RegW, n8); EmitChecks(0); break; } CASE_VOP(ROR): { enum ARMReg RegW; n8 = in->Op == FnROR8; if (in->Arg1 == ArgNULL) RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); else RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); /* Add cycles */ dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); if (n8) { GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 8)); GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); GenARM_MOVB_Reg(CondAL, RegW, RegA1); } else { GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); GenARM_ORR_Imm(CondNE, RegW, RegW, ENCODE_IMM(1, 16)); GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); } if (in->OutFlags & Carry) { GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); GenARM_ORR_Imm(CondCS, RegP, RegP, ENCODE_IMM(Carry, 0)); } EmitSetFlags(n8 ? RegA1 : RegW, in->OutFlags & (Zero | Negative), n8, false); if (in->Arg1 == ArgNULL) EmitStoreMemRegister(RegW, n8); EmitChecks(0); break; } CASE_OP(BRA): CASE_OP(BRL): { int32_t disp; n8 = in->Op == FnBRA; disp = EmitLoadRelative(in, n8, &dpc, &dcycles); dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); if (in->Op == FnBRA && Settings.Shutdown) { GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); } if (disp > 0) { if (disp & 0xFF) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } else if (disp < 0) { disp = -disp; if (disp & 0xFF) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } if (in->Op == FnBRA && Settings.Shutdown) { GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); } EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpDirect)); didBreak = true; break; } CASE_OP(JMP): CASE_OP(JML): { switch(in->AddrMode) { case AddrImmediate16: dcycles += CPU.MemSpeedx2; GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8) | ICPU.ShiftedPB); /* Only flush cycles because PC will change */ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); break; case AddrImmediate24: dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); EmitStoreRegister(in, ArgPB, RegA2, true); GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(in->PC[3], 16)); /* Only flush cycles because PC will change */ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); break; case AddrAbsolute16: dcycles += CPU.MemSpeedx2; dpc += 2; GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); if (in->Arg2 == ArgX) { GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); dcycles += ONE_CYCLE; } /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); if (in->Op == FnJMP) { GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); } else { GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(3, 0)); GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA2); } break; default: break; } GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); if (in->AddrMode == AddrImmediate24 || in->Op == FnJML) GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); if (in->AddrMode == AddrImmediate16 && Settings.Shutdown) { GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); } EmitChecks(0); if (in->AddrMode == AddrImmediate16 || in->AddrMode == AddrImmediate24) GenARM_BL(CondAL, PCOFFSET(JumpDirect)); else GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(JSR): CASE_OP(JSL): { if (in->Op == FnJSL) { GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(ICPU.Registers.PB, 0)); GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); dcycles += CPU.MemSpeed * 3; dpc += 3; /* Only flush cycles because PC will change */ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); EmitPush(in, true); } else { dcycles += CPU.MemSpeedx2 + ONE_CYCLE; dpc += 2; /* Only flush cycles because PC will change */ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); } GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(dpc - 1, 0)); GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); EmitPush(in, false); /* Load new PC */ if (in->Op == FnJSL) { GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); GenARM_STRB_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA2); GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); } else if (in->Arg2 == ArgNULL) { GenARM_MOV32_Imm(CondAL, RegA2, in->PC[1] + (in->PC[2] << 8)); GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(ICPU.Registers.PB, 16)); } else { GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); /* Set OpenBus to PC[2] */ GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); /* Update cycles / cpu for slow lookup */ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); } GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); if (in->Op == FnJSL) GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); EmitChecks(0); if (in->Arg2 == ArgNULL) GenARM_BL(CondAL, PCOFFSET(JumpDirect)); else GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(RTI): { uint8_t *branch1; dcycles += ONE_CYCLE * 2; GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); EmitPop(in, true); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitStoreRegister(in, ArgP, RegA1, false); GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); EmitPop(in, false); GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* RegCPU will be overwritten later */ /* Ignore pop PB if Emulation is set */ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); branch1 = CachePtr; GenARM_B(CondNE, PCOFFSET(branch1)); EmitPop(in, true); GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); PatchJump((uint32_t *)branch1, CachePtr); GenARM_LDR_Imm(CondNE, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); EmitChecks(0); GenARM_B(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(RTL): CASE_OP(RTS): { dcycles += ONE_CYCLE * 2; if (in->Op == FnRTS) dcycles += ONE_CYCLE; GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); EmitPop(in, false); GenARM_ADD_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(1, 0)); GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* No need to preserve RegCPU. It will be overwritten later, and this should never be a WaitAddress */ if (in->Op == FnRTL) { EmitPop(in, true); GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); } else { GenARM_ADD_Imm(CondAL, RegA1, RegCPUPC, ENCODE_IMM(ICPU.Registers.PB, 16)); } GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); if (in->Op == FnRTL) GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); EmitChecks(0); GenARM_B(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(BPL): EmitBranch(in, Negative, false, &dpc, &dcycles); didBreak = true; break; CASE_OP(BMI): EmitBranch(in, Negative, true, &dpc, &dcycles); didBreak = true; break; CASE_OP(BVC): EmitBranch(in, Overflow, false, &dpc, &dcycles); didBreak = true; break; CASE_OP(BVS): EmitBranch(in, Overflow, true, &dpc, &dcycles); didBreak = true; break; CASE_OP(BCC): EmitBranch(in, Carry, false, &dpc, &dcycles); didBreak = true; break; CASE_OP(BCS): EmitBranch(in, Carry, true, &dpc, &dcycles); didBreak = true; break; CASE_OP(BZC): EmitBranch(in, Zero, false, &dpc, &dcycles); didBreak = true; break; CASE_OP(BZS): EmitBranch(in, Zero, true, &dpc, &dcycles); didBreak = true; break; CASE_OP(BRK): CASE_OP(COP): { uint32_t destPC; dcycles += ONE_CYCLE; GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); if (!CheckEmulation()) { GenARM_LDRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); EmitPush(in, true); dcycles += ONE_CYCLE; } EmitFlushCyclesPC(&dpc, &dcycles); GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); if (!CheckEmulation()) { GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(1, 0)); } GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); EmitPush(in, false); GenARM_AND_Imm(CondAL, RegA1, RegP, ENCODE_IMM(0xFF, 0)); EmitPush(in, true); GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Decimal, 0)); GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(IRQ, 0)); GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); GenARM_STR_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(ShiftedPB)); if (in->Op == FnBRK) destPC = CheckEmulation() ? 0xFFFE : 0xFFE6; else destPC = CheckEmulation() ? 0xFFF4 : 0xFFE4; GenARM_MOV32_Imm(CondAL, RegA1, destPC); GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(1, 0)); GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(CLC): CASE_OP(CLI): CASE_OP(CLD): CASE_OP(CLV): CASE_OP(SEC): CASE_OP(SEI): CASE_OP(SED): { uint16_t flag; bool update = false; switch(in->Op) { CASE_OP(SEC): CASE_OP(CLC): flag = Carry; break; CASE_OP(SEI): CASE_OP(CLI): flag = IRQ; update = true; break; CASE_OP(SED): CASE_OP(CLD): flag = Decimal; update = true; break; CASE_OP(CLV): flag = Overflow; break; default: fprintf(stderr, "Invalid opcode: %X\n", in->Opcode); break; } dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); if (in->Op == FnCLC || in->Op == FnCLD || in->Op == FnCLI || in->Op == FnCLV) GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); else GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); if (update) GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); EmitChecks(0); if (in->SectionUpdate) GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); if (in->ShouldBreak) { GenARM_BL(CondAL, PCOFFSET(JumpDirect)); didBreak = true; } break; } CASE_OP(SEP): CASE_OP(REP): { dpc++; dcycles += CPU.MemSpeed + ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); if (IS_OP(REP)) GenARM_BIC_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); else GenARM_ORR_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); EmitStoreRegister(in, ArgP, RegA1, false); EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpDirect)); didBreak = true; break; } CASE_OP(XCE): { dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BIC_Imm(CondAL, RegA2, RegP, ENCODE_IMM(Emulation >> 8, 8)); GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(Carry, 0)); GenARM_ORR_Reg(CondAL, RegA2, RegA2, ENCODE_SHIFT(ShiftLSR, 8), RegP); GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); GenARM_ORR_Imm(CondNE, RegA2, RegA2, ENCODE_IMM(Emulation >> 8, 8)); EmitStoreRegister(in, ArgP, RegA2, false); EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); didBreak = true; break; } CASE_OP(STP): { #ifdef NO_SPEEDHACKS dpc--; GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(DEBUG_MODE_FLAG, 0)); GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); #else int8_t disp = (in->PC[1] & 0x7F) | ((in->PC[1] & 0x40) << 1); uint32_t pc = in->PC + 2 - CPU.PCBase; uint32_t target = (pc + disp) & 0xFFFF; bool overflow = target != (pc + disp); dpc++; if (overflow) disp = (target - pc); GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); /* Interpreter runs BranchCheck here. Only when APU is disabled * until next reset. So cost of the load seems not worth it, unless * games break. */ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Zero, 0)); if (in->PC[1] & 0x80) GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); else GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); if (Settings.Shutdown) { GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); } if (disp > 0) { if (disp & 0xFF) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } else if (disp < 0) { disp = -disp; if (disp & 0xFF) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } if (Settings.Shutdown) { GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); } #endif EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpDirect)); didBreak = true; break; } CASE_OP(XBA): { dcycles += TWO_CYCLES; EmitFlushCyclesPC(&dpc, &dcycles); GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 8), RegA); GenARM_MOV_Reg(CondAL, RegA, ENCODE_SHIFT(ShiftLSL, 24), RegA); GenARM_ORR_Reg(CondAL, RegA, RegA1, ENCODE_SHIFT(ShiftLSR, 16), RegA); EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), true, false); EmitChecks(0); break; } CASE_OP(WAI): { uint8_t *loop; GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(1, 0)); GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitingForInterrupt)); dpc--; loop = CachePtr; EmitFlushCyclesPC(&dpc, &dcycles); if (Settings.Shutdown) { GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); } EmitChecks(CheckWFI); GenARM_B(CondAL, PCOFFSET(loop)); break; } CASE_OP(WDM): { #ifndef NO_SPEEDHACKS int8_t disp = 0xF0 | (in->PC[1] & 0xF); uint32_t pc = in->PC + 2 - CPU.PCBase; uint32_t target = (pc + disp) & 0xFFFF; bool overflow = target != (pc + disp); uint8_t flag; uint8_t skip; dpc++; if (overflow) disp = (target - pc); GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); EmitFlushCyclesPC(&dpc, &dcycles); GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); switch(in->PC[1] & 0xF0) { case 0x10: /* BPL */ flag = Negative; skip = CondNE; break; case 0x30: /* BMI */ flag = Negative; skip = CondEQ; break; case 0x50: /* BVC */ flag = Overflow; skip = CondNE; break; case 0x70: /* BVS */ flag = Overflow; skip = CondEQ; break; case 0x80: /* BRA */ flag = 0; break; case 0x90: /* BCC */ flag = Carry; skip = CondNE; break; case 0xB0: /* BCS */ flag = Carry; skip = CondEQ; break; case 0xD0: /* BNE */ flag = Zero; skip = CondNE; break; case 0xF0: /* BEQ */ flag = Zero; skip = CondEQ; break; default: didBreak = true; break; } if (!didBreak) { /* Interpreter runs BranchCheck here. Only when APU is disabled * until next reset. So cost of the load seems not worth it, unless * games break. */ if (flag) { GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); GenARM_BL(skip, PCOFFSET(JumpDirectChecks)); } GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); if (Settings.Shutdown) { GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); } if (disp > 0) { if (disp & 0xFF) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } else if (disp < 0) { disp = -disp; if (disp & 0xFF) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); if (disp & 0xFF00) GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); } if (Settings.Shutdown) { GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); } } #endif EmitChecks(0); GenARM_BL(CondAL, PCOFFSET(JumpDirect)); didBreak = true; break; } CASE_OP(NOP): { dcycles += ONE_CYCLE; EmitFlushCyclesPC(&dpc, &dcycles); EmitChecks(0); break; } } if (in->ShouldBreak && !didBreak) { GenARM_B(CondAL, PCOFFSET(JumpIndirect)); } } static void EmitS9xCall(Instruction *in) { /* CPU.PCAtOpcodeStart = CPU.PC; */ /* CPU.Cycles += CPU.MemSpeed; */ /* CPU.PC++; */ GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); GenARM_BL(CondAL, PCOFFSET((uintptr_t)in->S9xOpcode)); if (in->SectionUpdate) GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); EmitChecks(0); if (in->ShouldBreak) { GenARM_B(CondAL, PCOFFSET(JumpIndirect)); } } static void EmitInterpreterLoop(Instruction *in) { /* CPU.PCAtOpcodeStart = CPU.PC; */ /* CPU.Cycles += CPU.MemSpeed; */ /* CPU.PC++; */ uint8_t *start = CachePtr; GenARM_LDRB_Imm(CondAL, RegA1, RegCPUPC, 0); GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); GenARM_LDR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(S9xOpcodes)); GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); GenARM_LDR_Reg(CondAL, RegA2, RegA2, 1, ENCODE_SHIFT(ShiftLSL, 2), RegA1); GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); GenARM_BLX_Reg(CondAL, RegA2); GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); EmitChecks(0); GenARM_B(CondAL, PCOFFSET(start)); } static void EmitOne(Instruction *in) { #ifndef NDEBUG GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->Opcode, 0)); GenARM_MOV32_Imm(CondAL, RegA2, (uintptr_t)in->PC); GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->Emitter == EmitS9xCall, 0)); GenARM_BL(CondAL, PCOFFSET(S9xCallCheckInstruction)); #endif /* Call Opcode */ in->Emitter(in); } #define SizeNone 0 #define SizeRegister8 0 #define SizeRegister16 0 #define SizeImmediate8 1 #define SizeImmediate16 2 #define SizeImmediate24 3 #define SizeZeroPage8 1 #define SizeZeroPage16 1 #define SizeIndirect8 1 #define SizeIndirect16 1 #define SizeIndirectX8 1 #define SizeIndirectX16 1 #define SizeIndirectY8 1 #define SizeIndirectY16 1 #define SizeIndirectS8 1 #define SizeIndirectS16 1 #define SizeIndirectFar8 1 #define SizeIndirectFar16 1 #define SizeAbsolute8 2 #define SizeAbsolute16 2 #define SizeLong8 3 #define SizeLong16 3 #define OPSIZE(cond, t, f) cond ? Size##t : Size##f #define OPADDRMODE(cond, t, f) cond ? Addr##t : Addr##f #define OPFN(cond, t, f) cond ? Fn##t : Fn##f #define OP(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitNativeCall; in.ShouldBreak = (flags) & OFBreak; in.SectionUpdate = (flags) & OFSectionUpdate; in.Op = OPFN(op); in.AddrMode = OPADDRMODE(addrmode); in.Arg1 = Arg##arg1; in.Arg2 = Arg##arg2; in.OutFlags = gen; in.InFlags = used; break; #define OPD(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitS9xCall; in.S9xOpcode = ICPU.S9xOpcodes[0x##opcode].S9xOpcode; in.SectionUpdate = (flags) & OFSectionUpdate; in.ShouldBreak = (flags) & OFBreak; in.OutFlags = gen; in.InFlags = used; break; static size_t ParseInstructions(uint8_t *pc, Instruction *instructions, size_t max) { bool shouldBreak; int i; for (i = 0, shouldBreak = false; i < max && !shouldBreak; i++) { uint8_t opcode; Instruction in = {0}; in.PC = pc; #include "arm_dynarec/opgen.h" in.Opcode = opcode; instructions[i] = in; shouldBreak = in.ShouldBreak; } if (i > 0) instructions[i - 1].ShouldBreak = true; return i; } static void ScanInstructions(Instruction *instructions, size_t length) { uint16_t CurrentFlags = (Negative | Zero | Carry | Overflow); uint16_t i; if (length < 2) return; /* Dead flag elimination */ for (i = length - 1; i > 0; i--) { Instruction *in = &instructions[i - 1]; Instruction *next = &instructions[i]; CurrentFlags = CurrentFlags | next->InFlags; in->OutFlags = in->OutFlags & CurrentFlags; CurrentFlags = CurrentFlags & ~in->OutFlags; } } static void Emit(uint16_t pc, uint8_t **block) { uint8_t *start = CachePtr; uint8_t *StartPC = pc + CPU.PCBase; uint32_t address = pc | ICPU.ShiftedPB; int32_t MemoryBlock = (address >> MEMMAP_SHIFT) & MEMMAP_MASK; Instruction instructions[100]; size_t length; int i; if (Memory.BlockIsRAM[MemoryBlock]) { /* Code in RAM, for now just run interpreter until interrupt */ Instruction in = { 0 }; instructions[0] = in; EmitInterpreterLoop(&instructions[0]); } else { length = ParseInstructions(StartPC, instructions, sizeof(instructions) / sizeof(instructions[0])); ScanInstructions(instructions, length); for (i = 0; i < length; i++) EmitOne(&instructions[i]); } __clear_cache(start, CachePtr); *block = start; } uint8_t **FetchBlock(uint16_t pc) { uint8_t **block = FindBlock(pc); #ifndef NDEBUG Metrics.Finds++; Metrics.InterpretedBlock = false; if (!*block) Metrics.Emits++; #endif if (!*block) Emit(pc, block); #ifndef NDEBUG trace[traceidx++] = (CacheSection << 16) | pc; trace[traceidx++] = (uintptr_t)*block; traceidx %= 128; #endif return block; } void S9xMainLoop_Dyna(void) { UpdateSection(); #ifdef LAGFIX do { #endif uint8_t **block = FetchBlock(CPU.PC - CPU.PCBase); BlockEnter(*block); ICPU.Registers.PC = CPU.PC - CPU.PCBase; #ifndef USE_BLARGG_APU IAPU.Registers.PC = IAPU.PC - IAPU.RAM; #endif #ifdef LAGFIX if(!finishedFrame) { #endif S9xPackStatus(); #ifndef USE_BLARGG_APU S9xAPUPackStatus(); #endif CPU.Flags &= ~SCAN_KEYS_FLAG; #ifdef LAGFIX } else { finishedFrame = false; break; } } while(!finishedFrame); #endif } int DynaInit(void) { static bool inited = false; if (!inited) { inited = true; #ifndef NDEBUG DynaBreak(); #endif Cache = MapRWX((void *)((uintptr_t)DynaInit), BUFFER_SIZE + BUFFER_EXTRA); if (!Cache) return -1; } return 0; } int DynaReset(void) { if (DynaInit()) return -1; printf("Starting dynarec\n"); CacheEmpty(); return 0; }