diff options
Diffstat (limited to 'source/arm_dynarec/dynaexec.c')
-rw-r--r-- | source/arm_dynarec/dynaexec.c | 3049 |
1 files changed, 3049 insertions, 0 deletions
diff --git a/source/arm_dynarec/dynaexec.c b/source/arm_dynarec/dynaexec.c new file mode 100644 index 0000000..d63d710 --- /dev/null +++ b/source/arm_dynarec/dynaexec.c @@ -0,0 +1,3049 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "snes9x.h" +#include "cpuexec.h" +#include "sa1.h" +#include "apu.h" +#include "arm_dynarec/armfn.h" +#include "arm_dynarec/armgen.h" +#include "arm_dynarec/dynaexec.h" + +#define BUFFER_SIZE (6 << 20) +#define BUFFER_EXTRA (1 << 20) +#define BLOCK_SIZE 0x1000 + +#ifndef NDEBUG +int trace[128]; +int traceidx; +int BreakPC; +int BreakOpcode; +#endif + +enum SNESRegArg +{ + ArgNULL, + ArgA, + ArgX, + ArgY, + ArgS, + ArgD, + ArgDB, + ArgPB, + ArgP, + ArgZ, +}; + +enum OpcodeFlag +{ + OFNone = 0, + OFBreak = 1 << 0, + OFSectionUpdate = 1 << 1, +}; + +#define V(val) val##8, val##16 + +enum OpAddrMode +{ + AddrNone, + V(AddrRegister), + V(AddrImmediate), + AddrImmediate24, + V(AddrZeroPage), + V(AddrIndirect), + V(AddrIndirectX), + V(AddrIndirectY), + V(AddrIndirectS), + V(AddrIndirectFar), + V(AddrAbsolute), + V(AddrLong), +}; + +enum OpFunction +{ + V(FnMOV), + V(FnMOVSP), + V(FnLD), + V(FnST), + V(FnPUSH), + FnPEA, + FnPER, + V(FnPOP), + FnMVN, + FnMVP, + V(FnOR), + V(FnAND), + V(FnEOR), + V(FnADC), + V(FnSBC), + V(FnCMP), + V(FnBIT), + V(FnINC), + V(FnDEC), + V(FnTSB), + V(FnTRB), + V(FnASL), + V(FnLSR), + V(FnROL), + V(FnROR), + FnBRA, + FnBRL, + FnJMP, + FnJML, + FnJSR, + FnJSL, + FnRTI, + FnRTL, + FnRTS, + FnBPL, + FnBMI, + FnBVC, + FnBVS, + FnBCC, + FnBCS, + FnBZC, + FnBZS, + FnBRK, + FnCOP, + FnCLC, + FnCLI, + FnCLD, + FnCLV, + FnSEC, + FnSEI, + FnSED, + FnREP, + FnSEP, + FnXCE, + FnSTP, + FnXBA, + FnWAI, + FnWDM, + FnNOP, +}; + +#undef V + +#define CHECK_8_16(field, value) (in->field == value##8 || in->field == value##16 ) +#define CHECK_FIELD(field, value) (in->field == value) +#define IS_VOP(value) (CHECK_8_16(Op, Fn##value)) +#define IS_OP(value) (CHECK_FIELD(Op, Fn##value)) +#define IS_VADDRMODE(value) (CHECK_8_16(AddrMode, Addr##value)) +#define IS_ADDRMODE(value) (CHECK_FIELD(AddrMode, Addr##value)) + +#define CASE_VOP(value) case Fn##value##8: case Fn##value##16 +#define CASE_OP(value) case Fn##value + +enum ChecksFlags +{ + CheckWFI = 1, +}; + +struct Instruction; + +typedef struct Instruction { + void (*S9xOpcode)(void); + void (*Emitter)(struct Instruction *); + uint8_t *PC; + uint8_t Opcode; + enum OpAddrMode AddrMode; + enum OpFunction Op; + enum SNESRegArg Arg1; + enum SNESRegArg Arg2; + uint16_t OutFlags; + uint16_t InFlags; + bool SectionUpdate; + bool ShouldBreak; +} Instruction; + +uint32_t CacheSection; +static uint8_t *CachePtr; +static uint8_t *Cache; +static uint8_t **Sections[13 * 0x1000]; /* Max: Decimal | Emulation = 13 */ + +#ifndef NDEBUG +struct +{ + uint32_t Emits; + uint32_t Finds; + uint32_t InterpretedFinds; + bool InterpretedBlock; + uint32_t Ops; + uint32_t InterpretedOps; + uint32_t ContextSwitches; + uint32_t GetSets; + uint32_t SlowGetSets; + uint32_t OpCounts[0x100]; + uint32_t InterpretedOpCounts[0x100]; + uint32_t InterpretedBlockStarts[0x100]; +} Metrics; + +void MetricsReset(void) +{ + memset(&Metrics, 0, sizeof(Metrics)); +} + +void MetricsPrint(void) +{ + int i; + printf("Cache hit rate: %f\n", (float)(Metrics.Finds - Metrics.Emits) / Metrics.Finds); + printf("Avg. instructions per find: %f\n", (float)Metrics.Ops / Metrics.Finds); + + printf("Counts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.OpCounts) / sizeof(Metrics.OpCounts[0]); i++) + { + printf("0x%X: %d\n", i, Metrics.OpCounts[i]); + } + + printf("Interpreted counts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.InterpretedOpCounts) / sizeof(Metrics.InterpretedOpCounts[0]); i++) + { + if (Metrics.InterpretedOpCounts[i] > 0) + printf("0x%X: %d\n", i, Metrics.InterpretedOpCounts[i]); + } + + printf("Interpreted block starts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.InterpretedBlockStarts) / sizeof(Metrics.InterpretedBlockStarts[0]); i++) + { + if (Metrics.InterpretedBlockStarts[i] > 0) + printf("0x%X: %d\n", i, Metrics.InterpretedBlockStarts[i]); + } +} +#endif + +uint8_t **FetchBlock(uint16_t pc); + +static void *CacheAlloc(size_t size) +{ + uint8_t *prev = CachePtr; + CachePtr += size; + return (void *)prev; +} + +static void CacheEmpty(void) +{ + memset(Cache, 0, BUFFER_SIZE + BUFFER_EXTRA); + memset(Sections, 0, sizeof(Sections)); + CachePtr = Cache; +} + +static uint8_t **FindBlock(uint16_t pc) +{ + /* Section is D/E/X/M flags, PB, and top 4 bits of PC */ + uint32_t section = (CacheSection | ((pc & 0xF000) >> 12)); + uint8_t **block = Sections[section]; + pc &= ~0xF000; + + if (!block || !block[pc]) + { + if (CachePtr >= Cache + BUFFER_SIZE) + { + CacheEmpty(); + printf("Reset translation cache\n"); + block = NULL; + } + + if (!block) + block = Sections[section] = CacheAlloc(BLOCK_SIZE * sizeof(uint8_t *)); + } + + return &block[pc]; +} + + +#ifndef NDEBUG + +void CheckInstruction(uint8_t opcode, uint8_t *pc, bool interpreted) +{ + Metrics.Ops++; + Metrics.OpCounts[opcode]++; + + if (interpreted) + { + Metrics.InterpretedOps++; + Metrics.InterpretedOpCounts[opcode]++; + if (!Metrics.InterpretedBlock) + { + Metrics.InterpretedBlock = true; + Metrics.InterpretedFinds++; + Metrics.InterpretedBlockStarts[opcode]++; + } + } + + if (CPU.PC - CPU.PCBase == (BreakPC & 0xffff) && ICPU.Registers.PB == ((BreakPC >> 16) & 0xFF)) + DynaBreak(); + + if (*CPU.PC == BreakOpcode) + DynaBreak(); + + if (pc != CPU.PC) + { + fprintf(stderr, "Incorrect PC: Expected 0x%X, was 0x%X\n", (uintptr_t)pc, (uintptr_t)CPU.PC); + DynaBreak(); + } + + if (opcode != *CPU.PC) + { + fprintf(stderr, "Incorrect opcode: Expected 0x%X, was 0x%X\n", opcode, *CPU.PC); + DynaBreak(); + } +} +#endif + +static void EmitChecks(enum ChecksFlags flags) +{ + if (Settings.SA1) + { + GenARM_MOV32_Imm(CondAL, RegR0, (uintptr_t)&SA1); + GenARM_LDRB_Imm(CondAL, RegR0, RegR0, offsetof(__typeof__(SA1), Executing)); + GenARM_CMP_Imm(CondAL, RegR0, ENCODE_IMM(0, 0)); + GenARM_BL(CondNE, PCOFFSET(S9xCallSA1MainLoop)); + } + + GenARM_ANDS_Imm(CondAL, RegR0, RegChecks, ENCODE_IMM(0x88, 4)); /* 0x880 = NMI_FLAG | IRQ_PENDING_FLAG */ + GenARM_CMP_Reg(CondEQ, RegCycles, ENCODE_SHIFT(ShiftLSR, 16), RegChecks); + GenARM_ORRS_Imm(CondHS, RegR0, RegR0, ENCODE_IMM(1, 0)); + + if (flags & CheckWFI) + GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksWFI)); + else + GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksNoWFI)); +} + +static void PatchJump(uint32_t *source, uint8_t *target) +{ + *source = ((*source & 0xF0000000) | (0xA << 24) | ((PCREL(source, target) >> 2) & 0xFFFFFF)); +} + +static uint8_t *HandleFlags(bool checkWFI) +{ + uint8_t *NewPC = NULL; + + if (CPU.Flags & NMI_FLAG) + { + if (--CPU.NMICycleCount == 0) + { + CPU.Flags &= ~NMI_FLAG; + if (checkWFI && CPU.WaitingForInterrupt) + { + CPU.WaitingForInterrupt = false; + CPU.PC++; + } + S9xOpcode_NMI(); + UpdateSection(); + NewPC = CPU.PC; + } + } + + if (CPU.Flags & IRQ_PENDING_FLAG) + { + if (CPU.IRQCycleCount == 0) + { + if (checkWFI && CPU.WaitingForInterrupt) + { + CPU.WaitingForInterrupt = false; + NewPC = CPU.PC++; + } + if (CPU.IRQActive && !Settings.DisableIRQ) + { + if (!CheckFlag(IRQ)) + { + S9xOpcode_IRQ(); + UpdateSection(); + NewPC = CPU.PC; + } + } + else + CPU.Flags &= ~IRQ_PENDING_FLAG; + } + else if (--CPU.IRQCycleCount == 0 && CheckFlag(IRQ)) + CPU.IRQCycleCount = 1; + } + + return NewPC; +} + +uint8_t *HandleFlagsNoWFI(void) +{ + return HandleFlags(false); +} + +uint8_t *HandleFlagsWFI(void) +{ + return HandleFlags(true); +} + +static int HandleHBlank(bool SFX) +{ +#ifndef NDEBUG + Metrics.ContextSwitches++; +#endif + +#ifndef USE_BLARGG_APU + if (CPU.WhichEvent == HBLANK_END_EVENT) + while (IAPU.APUExecuting && APU.Cycles <= CPU.Cycles) + APU_EXECUTE1(); +#endif + + if (SFX) + S9xDoHBlankProcessing_SFX(); + else + S9xDoHBlankProcessing_NoSFX(); + +#ifdef LAGFIX + if(finishedFrame) + return 3; +#endif + if (CPU.Flags & SCAN_KEYS_FLAG) + return 1; + + return 0; +} + +int HandleHBlankSFX(void) +{ + return HandleHBlank(true); +} + +int HandleHBlankNoSFX(void) +{ + return HandleHBlank(false); +} + +uint8_t *PatchJumpDirectChecks(uint8_t *PC, uint32_t *source) +{ + uint8_t *dest = *FetchBlock(PC - CPU.PCBase); + uint8_t *checks = CachePtr; + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(dest)); + __clear_cache(checks, CachePtr); + PatchJump(source, checks); + __clear_cache(source, source+1); + return checks; +} + +uint8_t *PatchJumpDirect(uint8_t *PC, uint32_t *source) +{ + uint8_t *dest = *FetchBlock(PC - CPU.PCBase); + PatchJump(source, dest); + __clear_cache(source, source+1); + return dest; +} + +void DynaCPUShutdown(void) +{ + /* Don't skip cycles with a pending NMI or IRQ - could cause delayed + * interrupt. Interrupts are delayed for a few cycles already, but + * the delay could allow the shutdown code to cycle skip again. + * Was causing screen flashing on Top Gear 3000. */ + if (CPU.WaitCounter == 0 && !(CPU.Flags & (IRQ_PENDING_FLAG | NMI_FLAG))) + { + CPU.WaitAddress = NULL; +#ifndef USE_BLARGG_APU + CPU.Cycles = CPU.NextEvent; + if (IAPU.APUExecuting) + { + ICPU.CPUExecuting = false; + do + { + APU_EXECUTE1(); + } while (APU.Cycles < CPU.NextEvent); + ICPU.CPUExecuting = true; + } +#endif + } + else if (CPU.WaitCounter >= 2) + CPU.WaitCounter = 1; + else + CPU.WaitCounter--; +} + +void DynaWAIShutdown(void) +{ + CPU.Cycles = CPU.NextEvent; +#ifndef USE_BLARGG_APU + if (IAPU.APUExecuting) + { + ICPU.CPUExecuting = false; + do + { + APU_EXECUTE1(); + } while (APU.Cycles < CPU.NextEvent); + ICPU.CPUExecuting = true; + } +#endif +} + +static void EmitFlushCyclesPC(int *dpc, int *dcycles) +{ + if (*dpc != 0) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(*dpc, 0)); + + if (*dcycles != 0) + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(*dcycles, 0)); + + dpc = 0; + dcycles = 0; +} + +/* a1 = value, a4 = sp, preserves a4 */ +static void EmitPush(Instruction *in, bool n8) +{ + enum ARMReg RegS = RegA4; /* Preserved during memory ops */ + + if (n8) + GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); + else + GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); + + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); + } + + GenARM_ADD_Imm(CondAL, RegA2, RegS, ENCODE_IMM(1, 0)); + GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(0xFF, 16)); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } +} + +/* a1 = value, a4 = sp, preserves a4 */ +static void EmitPop(Instruction *in, bool n8) +{ + enum ARMReg RegS = RegA4; /* Preserved during memory ops */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + + if (n8) + GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); + else + GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); + + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); + } + + if (n8) + { + GenARM_MOV_Reg(CondAL, RegA1, 0, RegS); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + } + else + { + GenARM_SUB_Imm(CondAL, RegA1, RegS, ENCODE_IMM(1, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } +} + +static enum ARMReg EmitLoadRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg RegMEM, bool read) +{ + enum ARMReg reg; + + switch(arg) + { + case ArgA: + reg = RegA; + break; + case ArgX: + reg = RegX; + break; + case ArgY: + reg = RegY; + break; + case ArgP: + reg = RegP; + break; + case ArgD: + if (read) + GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.D)); + reg = RegMEM; + break; + case ArgS: + if (read) + GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.S)); + reg = RegMEM; + break; + case ArgDB: + if (read) + GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.DB)); + reg = RegMEM; + break; + case ArgPB: + if (read) + GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.PB)); + reg = RegMEM; + break; + default: + fprintf(stderr, "Unhandled SNES Register: %d\n", arg); + reg = -1; + break; + } + + return reg; +} + +/* Preserves reg, modifies R3 */ +static void EmitStoreRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg reg, bool n8) +{ + switch(arg) + { + case ArgA: + if (reg != RegA) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegA, reg); + else + GenARM_UXTH_Reg(CondAL, RegA, reg); + } + break; + case ArgX: + if (reg != RegX) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegX, reg); + else + GenARM_UXTH_Reg(CondAL, RegX, reg); + } + break; + case ArgY: + if (reg != RegY) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegY, reg); + else + GenARM_UXTH_Reg(CondAL, RegY, reg); + } + break; + case ArgP: + { + uint8_t *branch; + GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); + GenARM_EOR_Reg(CondAL, reg, RegP, 0, reg); + GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(MemoryFlag | IndexFlag, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xCallUnpackStatusFast)); + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(IndexFlag, 0)); + GenARM_BIC_Imm(CondNE, RegX, RegX, ENCODE_IMM(0xFF, 8)); + GenARM_BIC_Imm(CondNE, RegY, RegY, ENCODE_IMM(0xFF, 8)); + + /* Update interpreter lookup table and CacheSection if processor flags changed */ + GenARM_EOR_Reg(CondAL, reg, reg, 0, RegP); + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM((Emulation | MemoryFlag | IndexFlag | Decimal) >> 2, 2)); + branch = CachePtr; + GenARM_B(CondEQ, PCOFFSET(branch)); + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + GenARM_BL(CondAL, PCOFFSET(S9xCallFixCycles)); + PatchJump((uint32_t *)branch, CachePtr); + break; + } + case ArgD: + GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.D)); + break; + case ArgDB: + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); + GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.DB)); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); + break; + case ArgPB: + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); + GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedPB)); + break; + case ArgS: + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegA3, reg, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegA3, RegA3, ENCODE_IMM(1, 8)); + GenARM_STRH_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.S)); + } + else + { + GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.S)); + } + + break; + default: + fprintf(stderr, "Unhandled SNES Register: %d\n", arg); + break; + } +} + +static uint16_t EmitLoadLiteral(Instruction *in, bool n8, int *dpc, int *dcycles) +{ + uint16_t literal; + *dcycles += CPU.MemSpeed; + *dpc += 1; + literal = in->PC[1]; + + if (!n8) + { + *dcycles += CPU.MemSpeed; + *dpc += 1; + literal |= in->PC[2] << 8; + } + + return literal; +} + +static int32_t EmitLoadRelative(Instruction *in, bool n8, int *dpc, int *dcycles) +{ + int32_t disp; + int32_t pc; + int32_t target; + bool overflow; + + if (n8) + { + disp = (int8_t)in->PC[1]; + pc = (int32_t)((in->PC + 2) - CPU.PCBase); + (*dpc)++; + *dcycles += CPU.MemSpeed; + } + else + { + disp = (int16_t)(in->PC[1] | (in->PC[2] << 8)); + pc = (int32_t)((in->PC + 3) - CPU.PCBase); + *dpc += 2; + *dcycles += CPU.MemSpeedx2; + } + + target = (pc + disp) & 0xFFFF; + overflow = target != (pc + disp); + + if (overflow) + disp = (target - pc); + + return disp; +} + +/* Stores result in A1 for reads, A2 for writes, because that is where + * S9XRead / S9xWrite look for address. Modifies the other. Reads also + * write over A4. */ +static void EmitAddrZeroPage(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + + *dpc += 1; + *dcycles += CPU.MemSpeed; + GenARM_MOV_Imm(CondAL, rd, ENCODE_IMM(in->PC[1], 0)); + + if (offsetReg == ArgS) + GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.S)); + else + GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.D)); + + if (offsetReg == ArgX) + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); + else if (offsetReg == ArgY) + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); + + if (offsetReg == ArgX || offsetReg == ArgY || offsetReg == ArgS) + *dcycles += ONE_CYCLE; + + if (read) + { + /* Set OpenBus to in->PC[1] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[1], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); + + if (CheckEmulation() && (offsetReg == ArgX || offsetReg == ArgY)) + { + GenARM_UXTB_Reg(CondAL, rd, rd); + *dcycles += ONE_CYCLE; + } + else + { + GenARM_UXTH_Reg(CondAL, rd, rd); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that is where + * S9XRead / S9xWrite look for address. Modifies the other. Reads also + * write over A4. */ +static void EmitAddrAbsolute(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + *dpc += 2; + *dcycles += CPU.MemSpeedx2; + GenARM_MOV32_Imm(CondAL, scratch, in->PC[1] + (in->PC[2] << 8)); + GenARM_LDR_Imm(CondAL, rd, RegCPU, ICPU_OFFSET(ShiftedDB)); + + if (offsetReg == ArgX) + GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegX); + else if (offsetReg == ArgY) + GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegY); + + if (read) + { + /* Set OpenBus to in->PC[2] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Reads also write over A4. */ +static void EmitAddrLong(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + *dpc += 3; + *dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; + + GenARM_MOV32_Imm(CondAL, rd, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); + + if (offsetReg == ArgX) + { + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); + GenARM_BIC_Imm(CondAL, rd, rd, ENCODE_IMM(0xFF, 24)); + } + + if (read) + { + /* Set OpenBus to in->PC[2] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Writes over A4. */ +static void EmitAddrIndirect(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + enum SNESRegArg ZPArg = ArgNULL; + + if (offsetReg == ArgX) + ZPArg = ArgX; + else if (IS_VADDRMODE(IndirectS)) + ZPArg = ArgS; + + EmitAddrZeroPage(true, in, ZPArg, dpc, dcycles); + + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + if (rd != RegA1) + GenARM_MOV_Reg(CondAL, rd, 0, RegA1); + + if (offsetReg == ArgY) + { + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); + } + + if (IS_VADDRMODE(IndirectS)) + *dcycles += ONE_CYCLE; + + GenARM_LDR_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(ShiftedDB)); + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); + + if (read) + { + /* Set OpenBus to last byte fetched */ + GenARM_MOV_Reg(CondAL, RegA4, ENCODE_SHIFT(ShiftLSR, 8), rd); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Writes over A4. */ +static void EmitAddrIndirectFar(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + + EmitAddrZeroPage(true, in, ArgNULL, dpc, dcycles); + + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + /* GetFast preserves A4 */ + GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + + if (offsetReg == ArgY) + { + GenARM_ADD_Reg(CondAL, RegA4, RegA4, 0, RegY); + } + + if (read) + { + /* Set OpenBus to last byte fetched */ + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, RegA4, ENCODE_SHIFT(ShiftLSL, 16), RegA1); +} + +static void EmitGetAddress(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + switch(in->AddrMode) + { + case AddrZeroPage8: + case AddrZeroPage16: + EmitAddrZeroPage(read, in, offsetReg, dpc, dcycles); + break; + case AddrAbsolute8: + case AddrAbsolute16: + EmitAddrAbsolute(read, in, offsetReg, dpc, dcycles); + break; + case AddrLong8: + case AddrLong16: + EmitAddrLong(read, in, offsetReg, dpc, dcycles); + break; + case AddrIndirect8: + case AddrIndirect16: + case AddrIndirectX8: + case AddrIndirectX16: + case AddrIndirectY8: + case AddrIndirectY16: + case AddrIndirectS8: + case AddrIndirectS16: + EmitAddrIndirect(read, in, offsetReg, dpc, dcycles); + break; + case AddrIndirectFar8: + case AddrIndirectFar16: + EmitAddrIndirectFar(read, in, offsetReg, dpc, dcycles); + break; + default: + break; + } +} + +/* Returns loaded register, leaves address in RegA2 for storing */ +static enum ARMReg EmitLoadMemRegister(Instruction *in, enum ARMReg RegMEM, bool n8, int *dpc, int *dcycles) +{ + EmitGetAddress(true, in, in->Arg2, dpc, dcycles); + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + GenARM_MOV_Reg(CondAL, RegMEM, 0, RegA1); + + return RegMEM; +} + +/* Assumes address is in RegA2 */ +static void EmitStoreMemRegister(enum ARMReg RegMEM, bool n8) +{ + GenARM_MOV_Reg(CondAL, RegA1, 0, RegMEM); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); +} + +/* Expects Z to be set in ARM flags, HS to be set in ARM if CarryHS is true (SBC, CMP) */ +static void EmitSetFlags(enum ARMReg reg, uint32_t flags, bool n8, bool CarryHS) +{ + if (flags == 0) + return; + + GenARM_BIC_Imm(CondAL, RegP, RegP, flags); + + if (flags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if ((flags & Carry) && CarryHS) + GenARM_ORR_Imm(CondHS, RegP, RegP, Carry); + + if (n8) + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 6)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Carry) && !CarryHS) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); + } + } + else + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 14)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Carry) && !CarryHS) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 16)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); + } + } +} + +static void EmitLD(enum ARMReg reg, bool n8, int *dpc, int *dcycles) +{ + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(dpc, dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_MOVBS_Reg(CondAL, reg, RegR0, RegR0); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_MOVS_Reg(CondAL, reg, 0, RegR0); + } +} + +static void EmitST(enum SNESRegArg reg, bool n8) +{ + switch(reg) + { + case ArgA: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegA); + break; + case ArgX: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegX); + break; + case ArgY: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegY); + break; + case ArgZ: + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + break; + default: + break; + } + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } +} + +static void EmitBranch(Instruction *in, uint32_t flag, bool set, int *dpc, int *dcycles) +{ + int32_t disp = EmitLoadRelative(in, true, dpc, dcycles); + + EmitFlushCyclesPC(dpc, dcycles); + + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); + /* If condition doesn't match, jump to next instruction */ + if (set) + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + else + GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); +} + +static void EmitNativeCall(Instruction *in) +{ + int dpc = 1; + int dcycles = CPU.MemSpeed; + bool didBreak = false; + bool n8; + + switch(in->Op) + { + CASE_VOP(MOV): + CASE_VOP(MOVSP): + { + enum ARMReg RegSRC; + enum ARMReg RegDST; + n8 = (in->Op == FnMOV8) || (in->Op == FnMOVSP8); + + dcycles += ONE_CYCLE; + RegDST = EmitLoadRegister(in, in->Arg1, RegA2, false); + RegSRC = EmitLoadRegister(in, in->Arg2, RegA3, true); + + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegDST, RegA1, RegSRC); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegDST, RegSRC); + } + + EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); + EmitStoreRegister(in, in->Arg1, RegDST, n8); + EmitChecks(0); + break; + } + CASE_VOP(LD): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnLD8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BIC_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ORR_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(literal, 0)); + + if (in->OutFlags & Zero) + GenARM_ANDS_Imm(CondAL, RegA1, RegDST, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegDST, literal); + if (in->OutFlags & Zero) + GenARM_ANDS_Reg(CondAL, RegDST, RegDST, 0, RegDST); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + EmitLD(RegDST, n8, &dpc, &dcycles); + } + + EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(ST): + { + n8 = in->Op == FnST8; + + EmitGetAddress(false, in, in->Arg2, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + EmitST(in->Arg1, n8); + EmitChecks(0); + break; + } + CASE_VOP(PUSH): + { + enum ARMReg RegSRC; + n8 = in->Op == FnPUSH8; + + if (in->Arg1 == ArgNULL) + { + RegSRC = EmitLoadMemRegister(in, RegA1, n8, &dpc, &dcycles); + } + else + { + RegSRC = EmitLoadRegister(in, in->Arg1, RegA1, true); + dcycles += ONE_CYCLE; + } + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + if (RegSRC != RegA1) + GenARM_MOV_Reg(CondAL, RegA1, 0, RegSRC); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, n8); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_OP(PEA): + { + uint16_t literal = EmitLoadLiteral(in, false, &dpc, &dcycles); + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + GenARM_MOV32_Imm(CondAL, RegA1, literal); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, false); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_OP(PER): + { + uint16_t literal = (in->PC + 3 - CPU.PCBase + in->PC[1] + (in->PC[2] << 8)) & 0xffff; + dcycles += CPU.MemSpeedx2 + ONE_CYCLE; + dpc += 2; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + GenARM_MOV32_Imm(CondAL, RegA1, literal); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, false); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_VOP(POP): + { + n8 = in->Op == FnPOP8; + dcycles += TWO_CYCLES; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitPop(in, n8); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->OutFlags & Zero) + GenARM_MOVS_Reg(CondAL, RegA1, 0, RegA1); /* Set ZF on A1 */ + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + EmitStoreRegister(in, in->Arg1, RegA1, n8); + EmitChecks(0); + break; + } + CASE_OP(MVP): + CASE_OP(MVN): + { + uint8_t *start; + dpc += 2; + dcycles += CPU.MemSpeedx2 + TWO_CYCLES; + + /* Load DD */ + /* Set DB / ShiftedDB */ + GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->PC[1], 0)); + GenARM_STRB_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.DB)); + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), RegA3); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); + + /* A3 = DD << 16 */ + + /* Load SS */ + /* Set OpenBus to SS */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + + /* A4 = SS << 16 */ + start = CachePtr; + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->PC[2], 16)); + GenARM_ORR_Reg(CondAL, RegA1, RegA1, 0, RegX); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + + /* SetByte -- A1 val, A2 addr */ + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[1], 16)); + GenARM_ORR_Reg(CondAL, RegA2, RegA2, 0, RegY); + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + + if (in->Op == FnMVN) + { + if (CheckIndex()) + { + GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegX, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegY, RegA1); + } + else + { + GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegX, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegY, RegA1); + } + } + else + { + if (CheckIndex()) + { + GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegX, RegA1); + GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegY, RegA1); + } + else + { + GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegX, RegA1); + GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegY, RegA1); + } + } + + GenARM_CMP_Imm(CondAL, RegA, ENCODE_IMM(0, 0)); + GenARM_SUB_Imm(CondAL, RegA1, RegA, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegA, RegA1); + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(dpc, 0)); + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(start)); + didBreak = true; + break; + } + CASE_VOP(OR): + { + n8 = in->Op == FnOR8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_ORR_Imm(CondAL, RegA, RegA, ENCODE_IMM(literal, 0)); + GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); + } + else + { + GenARM_MOV32_Imm(CondAL, RegR0, literal); + GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegR0); + GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + } + } + + EmitChecks(0); + break; + } + CASE_VOP(AND): + { + n8 = in->Op == FnAND8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_ANDS_Imm(CondAL, RegA1, RegA, ENCODE_IMM(literal, 0)); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_ANDS_Reg(CondAL, RegA1, RegA, 0, RegA1); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(EOR): + { + n8 = in->Op == FnEOR8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegA, ENCODE_IMM(0xFF, 0)); + GenARM_EORS_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(literal, 0)); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + GenARM_EORS_Reg(CondAL, RegA1, RegA1, 0, RegA2); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(ADC): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnADC8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV32_Imm(CondAL, RegA1, literal); + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } + + GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); + + if (CheckDecimal()) + { + uint32_t shift; + + for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) + { + GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0xA, shift)); + GenARM_ADD_Imm(CondHS, RegA3, RegA3, ENCODE_IMM(0x6, shift)); + } + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + else + { + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + + EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Carry | Overflow), n8, false); + + if (in->OutFlags & Overflow) + { + /* Calculate overflow flag */ + if (CheckDecimal()) { + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, n8 ? 8 : 16)); + GenARM_SUB_Imm(CondGE, RegA3, RegA3, ENCODE_IMM(0x60, n8 ? 0 : 8)); + } + + GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); + GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); + if (n8) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + } + else + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + } + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); + } + + EmitChecks(0); + break; + } + CASE_VOP(SBC): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnSBC8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV32_Imm(CondAL, RegA1, literal); + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } + + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + if (n8) + GenARM_UXTB_Reg(CondAL, RegA1, RegA1); + else + GenARM_UXTH_Reg(CondAL, RegA1, RegA1); + + GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); + + if (CheckDecimal()) + { + uint32_t shift; + + for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) + { + GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0x10, shift)); + GenARM_SUB_Imm(CondLO, RegA3, RegA3, ENCODE_IMM(0x6, shift)); + } + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + else + { + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + + EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Overflow), n8, false); + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + if (n8) + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 8)); + else + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 16)); + GenARM_ORR_Imm(CondGE, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + if (in->OutFlags & Overflow) + { + /* Calculate overflow flag */ + GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); + GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); + if (n8) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + } + else + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + } + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); + } + + EmitChecks(0); + break; + } + CASE_VOP(CMP): + { + enum ARMReg RegW = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnCMP8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_SUBS_Imm(CondAL, RegR0, RegR1, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegR0, literal); + GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_SUBS_Reg(CondAL, RegR0, RegR1, 0, RegR0); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); + } + } + + EmitSetFlags(RegR0, in->OutFlags & (Carry | Zero | Negative), n8, true); + EmitChecks(0); + break; + } + CASE_VOP(BIT): + { + n8 = in->Op == FnBIT8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_TST_Imm(CondAL, RegA, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + + EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); + } + else + { + uint32_t flags; + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + + flags = in->OutFlags & (Zero | Negative | Overflow); + + if (flags == 0) + return; + + GenARM_BIC_Imm(CondAL, RegP, RegP, flags); + + if (flags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if (n8) + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Overflow)) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 0)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); + } + } + else + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Overflow)) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); + } + } + } + + EmitChecks(0); + break; + } + CASE_VOP(INC): + CASE_VOP(DEC): + { + enum ARMReg RegW; + n8 = in->Op == FnINC8 || in->Op == FnDEC8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA2, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + + /* Clear WaitAddress */ + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_SHIFT(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + + EmitFlushCyclesPC(&dpc, &dcycles); + + if (IS_VOP(INC)) + GenARM_ADD_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); + else + GenARM_SUB_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA1, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(TSB): + CASE_VOP(TRB): + { + n8 = in->Op == FnTSB8 || in->Op == FnTRB8; + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + dcycles += ONE_CYCLE; + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegA4, RegA, ENCODE_IMM(0xFF, 0)); + + GenARM_TST_Reg(CondAL, RegA4, 0, RegA1); + EmitSetFlags(RegA4, in->OutFlags & (Zero), n8, false); + + if (IS_VOP(TSB)) + { + GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + else + { + GenARM_MVN_Reg(CondAL, RegA4, 0, RegA4); + GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); + + if (IS_VOP(TSB)) + { + GenARM_ORR_Reg(CondAL, RegA1, RegA, 0, RegA1); + } + else + { + GenARM_MVN_Reg(CondAL, RegA4, 0, RegA); + GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } + + EmitChecks(0); + break; + } + CASE_VOP(ASL): + { + enum ARMReg RegW; + n8 = in->Op == FnASL8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Carry | Zero | Negative), CheckMemory(), false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(LSR): + { + enum ARMReg RegW; + n8 = in->Op == FnLSR8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA3, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA3, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); + GenARM_BIC_Imm(CondAL, RegW, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_ORR_Reg(CondAL, RegW, RegW, 0, RegA1); + } + else + { + GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); + } + + /* Flags easier to set inline here */ + GenARM_BIC_Imm(CondAL, RegP, RegP, in->OutFlags & (Carry | Zero | Negative)); + + /* Negative is always unset */ + if (in->OutFlags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if (in->OutFlags & Carry) + GenARM_ORR_Imm(CondCS, RegP, RegP, Carry); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(ROL): + { + enum ARMReg RegW; + n8 = in->Op == FnROL8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 0)); + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(1, n8 ? 8 : 16)); + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(ROR): + { + enum ARMReg RegW; + n8 = in->Op == FnROR8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 8)); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); + GenARM_MOVB_Reg(CondAL, RegW, RegA1); + } + else + { + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegW, RegW, ENCODE_IMM(1, 16)); + GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); + } + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondCS, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + EmitSetFlags(n8 ? RegA1 : RegW, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_OP(BRA): + CASE_OP(BRL): + { + int32_t disp; + n8 = in->Op == FnBRA; + disp = EmitLoadRelative(in, n8, &dpc, &dcycles); + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->Op == FnBRA && Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (in->Op == FnBRA && Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + + CASE_OP(JMP): + CASE_OP(JML): + { + switch(in->AddrMode) + { + case AddrImmediate16: + dcycles += CPU.MemSpeedx2; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8) | ICPU.ShiftedPB); + + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + break; + case AddrImmediate24: + dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); + EmitStoreRegister(in, ArgPB, RegA2, true); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(in->PC[3], 16)); + + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + break; + case AddrAbsolute16: + dcycles += CPU.MemSpeedx2; + dpc += 2; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + + if (in->Arg2 == ArgX) + { + GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); + GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + dcycles += ONE_CYCLE; + } + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + if (in->Op == FnJMP) + { + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + else + { + GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); + GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(3, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA2); + } + + break; + default: + break; + } + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->AddrMode == AddrImmediate24 || in->Op == FnJML) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + if (in->AddrMode == AddrImmediate16 && Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + + if (in->AddrMode == AddrImmediate16 || in->AddrMode == AddrImmediate24) + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + else + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + + didBreak = true; + break; + } + CASE_OP(JSR): + CASE_OP(JSL): + { + if (in->Op == FnJSL) + { + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(ICPU.Registers.PB, 0)); + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + dcycles += CPU.MemSpeed * 3; + dpc += 3; + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + EmitPush(in, true); + } + else + { + dcycles += CPU.MemSpeedx2 + ONE_CYCLE; + dpc += 2; + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + } + + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); + GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(dpc - 1, 0)); + GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); + + EmitPush(in, false); + + /* Load new PC */ + if (in->Op == FnJSL) + { + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); + GenARM_STRB_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA2); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); + } + else if (in->Arg2 == ArgNULL) + { + GenARM_MOV32_Imm(CondAL, RegA2, in->PC[1] + (in->PC[2] << 8)); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + /* Set OpenBus to PC[2] */ + + GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); + GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->Op == FnJSL) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + if (in->Arg2 == ArgNULL) + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + else + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(RTI): + { + uint8_t *branch1; + dcycles += ONE_CYCLE * 2; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + EmitPop(in, true); + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitStoreRegister(in, ArgP, RegA1, false); + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitPop(in, false); + GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* RegCPU will be overwritten later */ + + /* Ignore pop PB if Emulation is set */ + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); + branch1 = CachePtr; + GenARM_B(CondNE, PCOFFSET(branch1)); + + EmitPop(in, true); + + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + + PatchJump((uint32_t *)branch1, CachePtr); + + GenARM_LDR_Imm(CondNE, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(RTL): + CASE_OP(RTS): + { + dcycles += ONE_CYCLE * 2; + if (in->Op == FnRTS) + dcycles += ONE_CYCLE; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + EmitPop(in, false); + + GenARM_ADD_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* No need to preserve RegCPU. It will be overwritten later, and this should never be a WaitAddress */ + + if (in->Op == FnRTL) + { + EmitPop(in, true); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); + } + else + { + GenARM_ADD_Imm(CondAL, RegA1, RegCPUPC, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->Op == FnRTL) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(BPL): + EmitBranch(in, Negative, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BMI): + EmitBranch(in, Negative, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BVC): + EmitBranch(in, Overflow, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BVS): + EmitBranch(in, Overflow, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BCC): + EmitBranch(in, Carry, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BCS): + EmitBranch(in, Carry, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BZC): + EmitBranch(in, Zero, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BZS): + EmitBranch(in, Zero, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BRK): + CASE_OP(COP): + { + uint32_t destPC; + dcycles += ONE_CYCLE; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + if (!CheckEmulation()) + { + GenARM_LDRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + EmitPush(in, true); + dcycles += ONE_CYCLE; + } + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); + + if (!CheckEmulation()) + { + GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(1, 0)); + } + + GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); + + EmitPush(in, false); + + GenARM_AND_Imm(CondAL, RegA1, RegP, ENCODE_IMM(0xFF, 0)); + EmitPush(in, true); + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); + + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Decimal, 0)); + GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(IRQ, 0)); + GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(ShiftedPB)); + + if (in->Op == FnBRK) + destPC = CheckEmulation() ? 0xFFFE : 0xFFE6; + else + destPC = CheckEmulation() ? 0xFFF4 : 0xFFE4; + + GenARM_MOV32_Imm(CondAL, RegA1, destPC); + GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(CLC): + CASE_OP(CLI): + CASE_OP(CLD): + CASE_OP(CLV): + CASE_OP(SEC): + CASE_OP(SEI): + CASE_OP(SED): + { + uint16_t flag; + bool update = false; + + switch(in->Op) + { + CASE_OP(SEC): + CASE_OP(CLC): + flag = Carry; + break; + CASE_OP(SEI): + CASE_OP(CLI): + flag = IRQ; + update = true; + break; + CASE_OP(SED): + CASE_OP(CLD): + flag = Decimal; + update = true; + break; + CASE_OP(CLV): + flag = Overflow; + break; + default: + fprintf(stderr, "Invalid opcode: %X\n", in->Opcode); + break; + } + + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->Op == FnCLC || in->Op == FnCLD || in->Op == FnCLI || in->Op == FnCLV) + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); + else + GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); + + if (update) + GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); + + EmitChecks(0); + + if (in->SectionUpdate) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + if (in->ShouldBreak) + { + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + } + break; + } + CASE_OP(SEP): + CASE_OP(REP): + { + dpc++; + dcycles += CPU.MemSpeed + ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (IS_OP(REP)) + GenARM_BIC_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); + else + GenARM_ORR_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); + + EmitStoreRegister(in, ArgP, RegA1, false); + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(XCE): + { + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BIC_Imm(CondAL, RegA2, RegP, ENCODE_IMM(Emulation >> 8, 8)); + GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Reg(CondAL, RegA2, RegA2, ENCODE_SHIFT(ShiftLSR, 8), RegP); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA2, RegA2, ENCODE_IMM(Emulation >> 8, 8)); + + EmitStoreRegister(in, ArgP, RegA2, false); + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(STP): + { +#ifdef NO_SPEEDHACKS + dpc--; + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(DEBUG_MODE_FLAG, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); +#else + int8_t disp = (in->PC[1] & 0x7F) | ((in->PC[1] & 0x40) << 1); + uint32_t pc = in->PC + 2 - CPU.PCBase; + uint32_t target = (pc + disp) & 0xFFFF; + bool overflow = target != (pc + disp); + dpc++; + + if (overflow) + disp = (target - pc); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Zero, 0)); + + if (in->PC[1] & 0x80) + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + else + GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } +#endif + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(XBA): + { + dcycles += TWO_CYCLES; + EmitFlushCyclesPC(&dpc, &dcycles); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 8), RegA); + GenARM_MOV_Reg(CondAL, RegA, ENCODE_SHIFT(ShiftLSL, 24), RegA); + GenARM_ORR_Reg(CondAL, RegA, RegA1, ENCODE_SHIFT(ShiftLSR, 16), RegA); + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), true, false); + EmitChecks(0); + break; + } + CASE_OP(WAI): + { + uint8_t *loop; + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(1, 0)); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitingForInterrupt)); + dpc--; + loop = CachePtr; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (Settings.Shutdown) { + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + } + + EmitChecks(CheckWFI); + GenARM_B(CondAL, PCOFFSET(loop)); + break; + } + CASE_OP(WDM): + { +#ifndef NO_SPEEDHACKS + int8_t disp = 0xF0 | (in->PC[1] & 0xF); + uint32_t pc = in->PC + 2 - CPU.PCBase; + uint32_t target = (pc + disp) & 0xFFFF; + bool overflow = target != (pc + disp); + uint8_t flag; + uint8_t skip; + dpc++; + + if (overflow) + disp = (target - pc); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + + switch(in->PC[1] & 0xF0) + { + case 0x10: /* BPL */ + flag = Negative; + skip = CondNE; + break; + case 0x30: /* BMI */ + flag = Negative; + skip = CondEQ; + break; + case 0x50: /* BVC */ + flag = Overflow; + skip = CondNE; + break; + case 0x70: /* BVS */ + flag = Overflow; + skip = CondEQ; + break; + case 0x80: /* BRA */ + flag = 0; + break; + case 0x90: /* BCC */ + flag = Carry; + skip = CondNE; + break; + case 0xB0: /* BCS */ + flag = Carry; + skip = CondEQ; + break; + case 0xD0: /* BNE */ + flag = Zero; + skip = CondNE; + break; + case 0xF0: /* BEQ */ + flag = Zero; + skip = CondEQ; + break; + default: + didBreak = true; + break; + } + + if (!didBreak) + { + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + if (flag) + { + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); + GenARM_BL(skip, PCOFFSET(JumpDirectChecks)); + } + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + } + +#endif + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(NOP): + { + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + EmitChecks(0); + break; + } + } + + if (in->ShouldBreak && !didBreak) + { + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + } +} + +static void EmitS9xCall(Instruction *in) +{ + /* CPU.PCAtOpcodeStart = CPU.PC; */ + /* CPU.Cycles += CPU.MemSpeed; */ + /* CPU.PC++; */ + + GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); + + GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); + + GenARM_BL(CondAL, PCOFFSET((uintptr_t)in->S9xOpcode)); + + if (in->SectionUpdate) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); + + EmitChecks(0); + + if (in->ShouldBreak) + { + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + } +} + +static void EmitInterpreterLoop(Instruction *in) +{ + /* CPU.PCAtOpcodeStart = CPU.PC; */ + /* CPU.Cycles += CPU.MemSpeed; */ + /* CPU.PC++; */ + uint8_t *start = CachePtr; + + GenARM_LDRB_Imm(CondAL, RegA1, RegCPUPC, 0); + GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); + GenARM_LDR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(S9xOpcodes)); + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); + GenARM_LDR_Reg(CondAL, RegA2, RegA2, 1, ENCODE_SHIFT(ShiftLSL, 2), RegA1); + + GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); + + GenARM_BLX_Reg(CondAL, RegA2); + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); + + EmitChecks(0); + + GenARM_B(CondAL, PCOFFSET(start)); +} + +static void EmitOne(Instruction *in) +{ + +#ifndef NDEBUG + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->Opcode, 0)); + GenARM_MOV32_Imm(CondAL, RegA2, (uintptr_t)in->PC); + GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->Emitter == EmitS9xCall, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xCallCheckInstruction)); +#endif + + /* Call Opcode */ + in->Emitter(in); +} + +#define SizeNone 0 +#define SizeRegister8 0 +#define SizeRegister16 0 +#define SizeImmediate8 1 +#define SizeImmediate16 2 +#define SizeImmediate24 3 +#define SizeZeroPage8 1 +#define SizeZeroPage16 1 +#define SizeIndirect8 1 +#define SizeIndirect16 1 +#define SizeIndirectX8 1 +#define SizeIndirectX16 1 +#define SizeIndirectY8 1 +#define SizeIndirectY16 1 +#define SizeIndirectS8 1 +#define SizeIndirectS16 1 +#define SizeIndirectFar8 1 +#define SizeIndirectFar16 1 +#define SizeAbsolute8 2 +#define SizeAbsolute16 2 +#define SizeLong8 3 +#define SizeLong16 3 + +#define OPSIZE(cond, t, f) cond ? Size##t : Size##f +#define OPADDRMODE(cond, t, f) cond ? Addr##t : Addr##f +#define OPFN(cond, t, f) cond ? Fn##t : Fn##f + +#define OP(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ + case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitNativeCall; in.ShouldBreak = (flags) & OFBreak; in.SectionUpdate = (flags) & OFSectionUpdate; in.Op = OPFN(op); in.AddrMode = OPADDRMODE(addrmode); in.Arg1 = Arg##arg1; in.Arg2 = Arg##arg2; in.OutFlags = gen; in.InFlags = used; break; + +#define OPD(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ + case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitS9xCall; in.S9xOpcode = ICPU.S9xOpcodes[0x##opcode].S9xOpcode; in.SectionUpdate = (flags) & OFSectionUpdate; in.ShouldBreak = (flags) & OFBreak; in.OutFlags = gen; in.InFlags = used; break; + +static size_t ParseInstructions(uint8_t *pc, Instruction *instructions, size_t max) +{ + bool shouldBreak; + int i; + + for (i = 0, shouldBreak = false; i < max && !shouldBreak; i++) + { + uint8_t opcode; + Instruction in = {0}; + + in.PC = pc; + +#include "arm_dynarec/opgen.h" + + in.Opcode = opcode; + + instructions[i] = in; + shouldBreak = in.ShouldBreak; + } + + if (i > 0) + instructions[i - 1].ShouldBreak = true; + + return i; +} + +static void ScanInstructions(Instruction *instructions, size_t length) +{ + uint16_t CurrentFlags = (Negative | Zero | Carry | Overflow); + uint16_t i; + + if (length < 2) + return; + + /* Dead flag elimination */ + for (i = length - 1; i > 0; i--) + { + Instruction *in = &instructions[i - 1]; + Instruction *next = &instructions[i]; + + CurrentFlags = CurrentFlags | next->InFlags; + in->OutFlags = in->OutFlags & CurrentFlags; + CurrentFlags = CurrentFlags & ~in->OutFlags; + } +} + +static void Emit(uint16_t pc, uint8_t **block) +{ + uint8_t *start = CachePtr; + uint8_t *StartPC = pc + CPU.PCBase; + uint32_t address = pc | ICPU.ShiftedPB; + int32_t MemoryBlock = (address >> MEMMAP_SHIFT) & MEMMAP_MASK; + + Instruction instructions[100]; + size_t length; + int i; + + if (Memory.BlockIsRAM[MemoryBlock]) + { + /* Code in RAM, for now just run interpreter until interrupt */ + Instruction in = { 0 }; + instructions[0] = in; + EmitInterpreterLoop(&instructions[0]); + } + else + { + length = ParseInstructions(StartPC, instructions, sizeof(instructions) / sizeof(instructions[0])); + ScanInstructions(instructions, length); + + for (i = 0; i < length; i++) + EmitOne(&instructions[i]); + } + + __clear_cache(start, CachePtr); + *block = start; +} + +uint8_t **FetchBlock(uint16_t pc) +{ + uint8_t **block = FindBlock(pc); +#ifndef NDEBUG + Metrics.Finds++; + Metrics.InterpretedBlock = false; + + if (!*block) + Metrics.Emits++; +#endif + if (!*block) + Emit(pc, block); + +#ifndef NDEBUG + trace[traceidx++] = (CacheSection << 16) | pc; + trace[traceidx++] = (uintptr_t)*block; + traceidx %= 128; +#endif + + return block; +} + +void S9xMainLoop_Dyna(void) +{ + UpdateSection(); + +#ifdef LAGFIX + do + { +#endif + uint8_t **block = FetchBlock(CPU.PC - CPU.PCBase); + BlockEnter(*block); + + ICPU.Registers.PC = CPU.PC - CPU.PCBase; +#ifndef USE_BLARGG_APU + IAPU.Registers.PC = IAPU.PC - IAPU.RAM; +#endif + +#ifdef LAGFIX + if(!finishedFrame) + { +#endif + S9xPackStatus(); +#ifndef USE_BLARGG_APU + S9xAPUPackStatus(); +#endif + CPU.Flags &= ~SCAN_KEYS_FLAG; +#ifdef LAGFIX + } + else + { + finishedFrame = false; + break; + } + } while(!finishedFrame); +#endif +} + +int DynaInit(void) +{ + static bool inited = false; + + if (!inited) + { + inited = true; +#ifndef NDEBUG + DynaBreak(); +#endif + + Cache = MapRWX((void *)((uintptr_t)DynaInit), BUFFER_SIZE + BUFFER_EXTRA); + + if (!Cache) + return -1; + } + + return 0; +} + +int DynaReset(void) +{ + if (DynaInit()) + return -1; + + printf("Starting dynarec\n"); + CacheEmpty(); + return 0; +} |