aboutsummaryrefslogtreecommitdiff
path: root/source/arm_dynarec
diff options
context:
space:
mode:
authorneonloop2022-02-09 07:15:14 +0000
committerneonloop2022-02-09 07:15:14 +0000
commitce34e879e348cecd4e21329be5974cc4162fa6c4 (patch)
tree6bc241b726fc554a31d778642f45a2d1ea2309b4 /source/arm_dynarec
parent874c431fdaff24413886416ed3ffa3455681ac01 (diff)
downloadsnes9x2005-ce34e879e348cecd4e21329be5974cc4162fa6c4.tar.gz
snes9x2005-ce34e879e348cecd4e21329be5974cc4162fa6c4.tar.bz2
snes9x2005-ce34e879e348cecd4e21329be5974cc4162fa6c4.zip
Adds experimental ARM dynarecdynarec
Supports ARMv5 and higher, enable with USE_DYNAREC Makefile variable
Diffstat (limited to 'source/arm_dynarec')
-rw-r--r--source/arm_dynarec/armfn.S501
-rw-r--r--source/arm_dynarec/armfn.h50
-rw-r--r--source/arm_dynarec/armgen.h576
-rw-r--r--source/arm_dynarec/dynaexec.c3049
-rw-r--r--source/arm_dynarec/dynaexec.h14
-rw-r--r--source/arm_dynarec/map_rwx.c16
-rw-r--r--source/arm_dynarec/opdef.h281
-rw-r--r--source/arm_dynarec/opgen.h26
8 files changed, 4513 insertions, 0 deletions
diff --git a/source/arm_dynarec/armfn.S b/source/arm_dynarec/armfn.S
new file mode 100644
index 0000000..c5d1b3f
--- /dev/null
+++ b/source/arm_dynarec/armfn.S
@@ -0,0 +1,501 @@
+#define RegA r4
+#define RegX r5
+#define RegY r6
+#define RegP r7
+#define RegCPU r8
+#define RegCPUPC r9
+#define RegCycles r10
+#define RegChecks ip
+
+#ifndef NDEBUG
+#define METRICS_GETSETS_OFF 28
+#define METRICS_SLOWGETSETS_OFF 32
+#endif
+
+#define CPU_FLAGS_OFF 0
+#define CPU_PC_OFF 12
+#define CPU_PCBASE_OFF 16
+#define CPU_PC_AT_OPCODE_START_OFF 20
+#define CPU_WAIT_ADDRESS_OFF 24
+#define CPU_CYCLES_OFF 32
+#define CPU_NEXT_EVENT_OFF 36
+#define CPU_ICPU_OFF 0x80
+#define CPU_MEMORY_MAP_OFF 0x100
+
+#define ICPU_S9X_OPCODES_OFF (CPU_ICPU_OFF + 4)
+#define ICPU_REG_PB_OFF (CPU_ICPU_OFF + 8)
+#define ICPU_REG_DB_OFF (CPU_ICPU_OFF + 9)
+#define ICPU_REG_P_OFF (CPU_ICPU_OFF + 10)
+#define ICPU_REG_A_OFF (CPU_ICPU_OFF + 12)
+#define ICPU_REG_D_OFF (CPU_ICPU_OFF + 14)
+#define ICPU_REG_S_OFF (CPU_ICPU_OFF + 16)
+#define ICPU_REG_X_OFF (CPU_ICPU_OFF + 18)
+#define ICPU_REG_Y_OFF (CPU_ICPU_OFF + 20)
+#define ICPU_REG_PC_OFF (CPU_ICPU_OFF + 22)
+#define ICPU_CARRY_OFF (CPU_ICPU_OFF + 24)
+#define ICPU_ZERO_OFF (CPU_ICPU_OFF + 25)
+#define ICPU_NEGATIVE_OFF (CPU_ICPU_OFF + 26)
+#define ICPU_OVERFLOW_OFF (CPU_ICPU_OFF + 27)
+
+#define MEMORY_MAP_OFF 0x0024
+#define MEMORY_MAP_WRITE_MAP_OFF 0x4000
+#define MEMORY_MAP_SPEED_OFF 0x8000
+#define MEMORY_MAP_BLOCK_IS_RAM_OFF 0x9000
+#define MEMORY_WRITE_MAP_SPEED_OFF 0x4000
+#define MEMORY_WRITE_MAP_BLOCK_IS_RAM_OFF 0x5000
+
+#define SA1_OPCODES_OFF 0
+#define SA1_EXECUTING_OFF 24
+#define SA1_WAIT_COUNTER_OFF 52
+#define SA1_WAIT_BYTE_ADDRESS_1_OFF 56
+#define SA1_WAIT_BYTE_ADDRESS_2_OFF 60
+
+#define FLAG_DECIMAL 0x08
+#define FLAG_INDEX 0x10
+#define FLAG_MEMORY 0x20
+#define FLAG_EMULATION 0x100
+
+#define FUNC(name) \
+ .global name ; \
+name:
+
+.macro S9xCall func
+ push { ip, lr }
+ bl S9xRegsToMem
+ bl \func
+ bl S9xMemToRegs
+ pop { ip, pc }
+.endm
+
+ .text
+ .align 2
+
+#ifndef NDEBUG
+.macro MetricsIncOffset off
+ push { r0, r1 }
+ ldr r0, = Metrics
+ ldr r1, [r0, #\off]
+ add r1, r1, #1
+ str r1, [r0, #\off]
+ pop { r0, r1 }
+.endm
+
+FUNC(DynaBreak)
+ bx lr
+#endif
+
+FUNC(BlockEnter)
+ push { r4-r11, ip, lr }
+ ldr RegCPU, = CPU
+ bl S9xMemToRegs
+ bx r0
+
+FUNC(BlockReturn)
+ bl S9xRegsToMem
+ pop { r4-r11, ip, pc }
+
+.macro LoadChecks rs
+ ldr \rs, [RegCPU, #CPU_NEXT_EVENT_OFF]
+ ldr RegChecks, [RegCPU, #CPU_FLAGS_OFF]
+ orr RegChecks, RegChecks, \rs, lsl #16
+.endm
+
+.macro PushCPUState
+ str RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+.endm
+
+.macro PopCPUState rs
+ ldr RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ LoadChecks \rs
+.endm
+
+.macro S9xUnpackStatusFast
+ strh RegP, [RegCPU, #ICPU_REG_P_OFF]
+ mov r3, #1
+ and r2, r3, RegP, lsr #1
+ sub r2, r2, #1
+ strb r2, [RegCPU, #ICPU_ZERO_OFF]
+ and r2, RegP, #0x80
+ strb r2, [RegCPU, #ICPU_NEGATIVE_OFF]
+ and r2, RegP, r3
+ strb r2, [RegCPU, #ICPU_CARRY_OFF]
+ and r2, r3, RegP, lsr #6
+ strb r2, [RegCPU, #ICPU_OVERFLOW_OFF]
+.endm
+
+.macro S9xPackStatusFast
+ ldrh RegP, [RegCPU, #ICPU_REG_P_OFF]
+ ldrb r2, [RegCPU, #ICPU_ZERO_OFF]
+ ldrb r3, [RegCPU, #ICPU_NEGATIVE_OFF]
+ bic RegP, RegP, #0xc3
+ cmp r2, #0
+ ldrb r2, [RegCPU, #ICPU_CARRY_OFF]
+ orreq RegP, RegP, #0x2
+ tst r3, #0x80
+ ldrb r3, [RegCPU, #ICPU_OVERFLOW_OFF]
+ orrne RegP, RegP, #0x80
+ orr RegP, RegP, r2
+ orr RegP, RegP, r3, lsl #6
+ strh RegP, [RegCPU, #ICPU_REG_P_OFF]
+.endm
+
+ @@ Callers assume r0 is not touched
+FUNC(S9xCallUnpackStatusFast)
+ S9xUnpackStatusFast
+ bx lr
+
+ @@ Preserves scratch and r0, can be used as a wrapper
+FUNC(S9xRegsToMem)
+ push { r0, r1, r2, r3, ip, lr }
+ str RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ strh RegA, [RegCPU, #ICPU_REG_A_OFF]
+ strh RegX, [RegCPU, #ICPU_REG_X_OFF]
+ strh RegY, [RegCPU, #ICPU_REG_Y_OFF]
+ S9xUnpackStatusFast
+ pop { r0, r1, r2, r3, ip, pc }
+
+
+ @@ Preserves scratch and r0, can be used as a wrapper
+FUNC(S9xMemToRegs)
+ push { r0, r1, r2, r3, ip, lr }
+ ldr RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ S9xPackStatusFast
+ ldrh RegA, [RegCPU, #ICPU_REG_A_OFF]
+ ldrh RegX, [RegCPU, #ICPU_REG_X_OFF]
+ ldrh RegY, [RegCPU, #ICPU_REG_Y_OFF]
+ pop { r0, r1, r2, r3, ip, lr }
+ LoadChecks r2
+ bx lr
+
+FUNC(UpdateSection)
+ push { RegP, RegCPU, lr }
+ ldr RegCPU, =CPU
+ ldrh RegP, [RegCPU, #ICPU_REG_P_OFF]
+ bl S9xCallUpdateSection
+ pop { RegP, RegCPU, pc }
+
+FUNC(S9xCallUpdateSection)
+ ldr r1, =CacheSection
+ ldrb r0, [RegCPU, #ICPU_REG_PB_OFF]
+ tst RegP, #FLAG_DECIMAL
+ lsl r0, #4
+ orrne r0, r0, #0x8000
+ tst RegP, #FLAG_EMULATION
+ orrne r0, r0, #0x4000
+ bxne lr
+ tst RegP, #FLAG_INDEX
+ orrne r0, r0, #0x1000
+ tst RegP, #FLAG_MEMORY
+ orrne r0, r0, #0x2000
+ str r0, [r1]
+ bx lr
+
+FUNC(LoadChecks)
+ LoadChecks r2
+ bx lr
+
+FUNC(S9xCallSA1MainLoop)
+ push { ip, lr }
+ bl S9xSA1MainLoop
+ pop { ip, pc }
+
+FUNC(S9xCallSetPCBase)
+ push { ip, lr }
+ bl S9xSetPCBase
+ pop { ip, pc }
+
+.macro S9xCallHandleHBlank type
+FUNC(S9xCallHandleHBlank\type)
+ @ changes cycles, maybe PC in IRQ
+ push { ip, lr }
+ str RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ bl HandleHBlank\type
+ ldr RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ pop { ip, lr }
+ LoadChecks r2
+ bx lr
+.endm
+
+S9xCallHandleHBlank SFX
+S9xCallHandleHBlank NoSFX
+
+.macro S9xCallHandleFlags WFI
+FUNC(S9xCallHandleFlags\WFI)
+ @ needs S, changes flags (unpack status needed), changes PC, changes cycles
+ push { ip, lr }
+ str RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ S9xUnpackStatusFast
+ bl HandleFlags\WFI
+ ldr RegCPUPC, [RegCPU, #CPU_PC_OFF]
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ S9xPackStatusFast
+ pop { ip, lr }
+ LoadChecks r2
+ bx lr
+.endm
+
+S9xCallHandleFlags WFI
+S9xCallHandleFlags NoWFI
+
+.macro S9xCallHandleChecks WFI, SFX
+FUNC(S9xCallHandleChecks\WFI\SFX)
+ push { r1, lr }
+ mov r0, #0
+ cmp RegCycles, RegChecks, lsr #16
+ blhs S9xCallHandleHBlank\SFX
+ cmp r0, #0
+ popne { r1, lr }
+ bne BlockReturn
+
+ tst RegChecks, #0x880
+ blne S9xCallHandleFlags\WFI
+ cmp r0, #0
+ movne RegCPUPC, r0
+ popne { r1, lr }
+ bne JumpIndirect
+
+ pop { r1, pc }
+.endm
+
+S9xCallHandleChecks WFI, SFX
+S9xCallHandleChecks WFI, NoSFX
+S9xCallHandleChecks NoWFI, SFX
+S9xCallHandleChecks NoWFI, NoSFX
+
+#ifndef NDEBUG
+FUNC(S9xCallCheckInstruction)
+ S9xCall CheckInstruction
+#endif
+
+FUNC(JumpIndirect)
+ push { r1, ip }
+ ldr r1, [RegCPU, #CPU_PCBASE_OFF]
+ sub r0, RegCPUPC, r1
+ bl FetchBlock
+ pop { r1, ip }
+ ldr r0, [r0]
+ bx r0
+
+FUNC(JumpDirectChecks)
+ push { r1, ip }
+ mov r0, RegCPUPC
+ sub r1, lr, #4 @ Patch the opcode at the address of this caller
+ bl PatchJumpDirectChecks
+ pop { r1, ip }
+ bx r0
+
+FUNC(JumpDirect)
+ push { r1, ip }
+ mov r0, RegCPUPC
+ sub r1, lr, #4 @ Patch the opcode at the address of this caller
+ bl PatchJumpDirect
+ pop { r1, ip }
+ bx r0
+
+.macro GetMemFast Type
+FUNC(S9xGet\Type\()Fast)
+ push { r0, r3, ip, lr }
+#ifndef NDEBUG
+ MetricsIncOffset METRICS_GETSETS_OFF
+#endif
+ ldr r2, =0x0fff @ MEMMAP_MASK
+ .ifc \Type,Word
+ and r3, r0, r2
+ cmp r3, r2
+ beq 9f
+ .endif
+
+ add ip, RegCPU, #CPU_MEMORY_MAP_OFF
+ and r3, r2, r0, lsr #12 @ MEMMAP_SHIFT
+ ldr r2, [ip, r3, lsl #2]
+ add lr, ip, #MEMORY_MAP_BLOCK_IS_RAM_OFF
+ cmp r2, #18 @ MAP_LAST
+ blo 9f
+
+ ldrb lr, [lr, r3]
+ add ip, ip, #MEMORY_MAP_SPEED_OFF
+ ldrb ip, [ip, r3]
+ cmp lr, #1
+ streq r1, [RegCPU, #CPU_WAIT_ADDRESS_OFF]
+
+ mov r1, r0, lsl #16
+ mov r1, r1, lsr #16
+ ldrb r0, [r2, r1]
+ .ifc \Type,Word
+ add r1, r1, #1
+ ldrb r1, [r2, r1]
+ .endif
+ .ifc \Type,Byte
+ add RegCycles, RegCycles, ip
+ .else
+ add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord
+ .endif
+ .ifc \Type,Word
+ orr r0, r0, r1, lsl #8
+ .endif
+ pop { r1, r3, ip, pc }
+9:
+#ifndef NDEBUG
+ MetricsIncOffset METRICS_SLOWGETSETS_OFF
+#endif
+ str r1, [RegCPU, #CPU_PC_AT_OPCODE_START_OFF]
+ PushCPUState
+ bl S9xGet\Type
+ pop { r1, r3, ip, lr }
+ PopCPUState r2
+ bx lr
+.endm
+
+GetMemFast Byte
+GetMemFast Word
+
+ .macro SetMemFast Type, SA1
+FUNC(S9xSet\Type\()Fast\SA1)
+ push { r1, r3, ip, lr}
+#ifndef NDEBUG
+ MetricsIncOffset METRICS_GETSETS_OFF
+#endif
+ ldr r2, =0x0fff @ MEMMAP_MASK
+ mov ip, #0
+ .ifc \Type,Word
+ and r3, r1, r2
+ cmp r3, r2
+ beq 9f
+ .endif
+
+ str ip, [RegCPU, #CPU_WAIT_ADDRESS_OFF]
+
+ and r3, r2, r1, lsr #12 @ MEMMAP_SHIFT
+ add ip, RegCPU, #(CPU_MEMORY_MAP_OFF + MEMORY_MAP_WRITE_MAP_OFF)
+ ldr r2, [ip, r3, lsl #2]
+ cmp r2, #18 @ MAP_LAST
+ blo 9f
+
+ add ip, ip, #MEMORY_WRITE_MAP_SPEED_OFF
+ ldrb ip, [ip, r3]
+ .ifc \Type,Byte
+ add RegCycles, RegCycles, ip
+ .else
+ add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord
+ .endif
+
+
+ mov r1, r1, lsl #16
+ mov r1, r1, lsr #16
+
+ @@ Handle SA1 WaitAddress if needed
+ .ifc \SA1,SA1
+ push { r1 }
+ ldr ip, =SA1
+ add r1, r1, r2
+ ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_1_OFF]
+ cmp r3, r1
+ beq 8f
+ ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_2_OFF]
+ cmp r3, r1
+ beq 8f
+2:
+ pop { r1 }
+ .endif
+
+ strb r0, [r2, r1]
+ .ifc \Type,Word
+ add r1, r1, #1
+ mov r0, r0, lsr #8
+ strb r0, [r2, r1]
+ .endif
+ pop { r1, r3, ip, pc }
+
+ .ifc \SA1,SA1
+8:
+ ldr r1, [ip, #SA1_OPCODES_OFF]
+ mov r3, #0
+ str r3, [ip, #SA1_WAIT_COUNTER_OFF]
+ cmp r1, #0
+ movne r3, #1
+ strb r3, [ip, #SA1_EXECUTING_OFF]
+ b 2b
+ .endif
+9:
+#ifndef NDEBUG
+ MetricsIncOffset METRICS_SLOWGETSETS_OFF
+#endif
+ PushCPUState
+ .ifc \Type,Word
+ mov r0, r0, lsl #16
+ mov r0, r0, lsr #16
+ .else
+ and r0, r0, #0xFF
+ .endif
+ bl S9xSet\Type
+ pop { r1, r3, ip, lr }
+ PopCPUState r2
+ bx lr
+.endm
+
+SetMemFast Byte, NoSA1
+SetMemFast Word, NoSA1
+
+SetMemFast Byte, SA1
+SetMemFast Word, SA1
+
+FUNC(S9xCallFixCycles)
+ tst RegP, #FLAG_EMULATION
+ ldrne r0, =S9xOpcodesE1
+ bne 9f
+
+ tst RegP, #FLAG_MEMORY
+ beq 2f
+ tst RegP, #FLAG_INDEX
+ ldrne r0, =S9xOpcodesM1X1
+ ldreq r0, =S9xOpcodesM1X0
+ b 9f
+2:
+ tst RegP, #FLAG_INDEX
+ ldrne r0, =S9xOpcodesM0X1
+ ldreq r0, =S9xOpcodesM0X0
+9:
+ str r0, [RegCPU, #ICPU_S9X_OPCODES_OFF]
+ bx lr
+
+FUNC(S9xCallCPUShutdown)
+ push { ip, lr }
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ bl DynaCPUShutdown
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ pop { ip, pc }
+
+FUNC(S9xCallWAIShutdown)
+ push { ip, lr }
+ str RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ bl DynaWAIShutdown
+ ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF]
+ pop { ip, pc }
+
+ .bss
+ .align 4
+ .global CPU
+CPU:
+ .space 0x80 @ Actual: 0x50
+
+ .global ICPU
+ICPU:
+ .space 0x40 @ Actual: 0x34
+
+ .global OpenBus
+OpenBus:
+ .space 0x1
+
+ @ padding so Memory.Map is at 0x100
+ .space 0x3F - 0x24
+
+ .global Memory
+Memory:
+ .space 0xb468
diff --git a/source/arm_dynarec/armfn.h b/source/arm_dynarec/armfn.h
new file mode 100644
index 0000000..4b859f9
--- /dev/null
+++ b/source/arm_dynarec/armfn.h
@@ -0,0 +1,50 @@
+#ifndef ARMFN_H
+#define ARMFN_H
+
+#include <stdint.h>
+
+int BlockEnter(uint8_t *addr);
+void UpdateSection(void);
+
+/* Below can only be called from generated code */
+
+void S9xRegsToMem(void);
+void S9xMemToRegs(void);
+
+void JumpIndirect(uint8_t *pc);
+void JumpDirect(uint8_t *pc);
+void JumpDirectChecks(uint8_t *pc);
+
+uint16_t S9xGetByteFast(uint32_t Address, uint8_t *StartPC);
+uint16_t S9xGetWordFast(uint32_t Address, uint8_t *StartPC);
+void S9xSetByteFastSA1(uint16_t Word, uint32_t Address);
+void S9xSetWordFastSA1(uint16_t Word, uint32_t Address);
+void S9xSetByteFastNoSA1(uint16_t Word, uint32_t Address);
+void S9xSetWordFastNoSA1(uint16_t Word, uint32_t Address);
+
+#define S9xSetByteFast (Settings.SA1 ? S9xSetByteFastSA1 : S9xSetByteFastNoSA1)
+#define S9xSetWordFast (Settings.SA1 ? S9xSetWordFastSA1 : S9xSetWordFastNoSA1)
+
+void S9xCallUnpackStatusFast(void);
+void S9xCallUpdateSection(void);
+
+void S9xCallSA1MainLoop(void);
+void S9xCallSetPCBase(uint32_t Address);
+
+void S9xCallHandleChecksWFISFX(void);
+void S9xCallHandleChecksWFINoSFX(void);
+void S9xCallHandleChecksNoWFISFX(void);
+void S9xCallHandleChecksNoWFINoSFX(void);
+
+#define S9xCallHandleChecksWFI (Settings.SuperFX ? S9xCallHandleChecksWFISFX : S9xCallHandleChecksWFINoSFX)
+#define S9xCallHandleChecksNoWFI (Settings.SuperFX ? S9xCallHandleChecksNoWFISFX : S9xCallHandleChecksNoWFINoSFX)
+
+void S9xCallFixCycles(void);
+void S9xCallCPUShutdown(void);
+void S9xCallWAIShutdown(void);
+
+#ifndef NDEBUG
+void S9xCallCheckInstruction(uint8_t opcode, uint8_t *pc);
+#endif
+
+#endif /* ARMFN_H */
diff --git a/source/arm_dynarec/armgen.h b/source/arm_dynarec/armgen.h
new file mode 100644
index 0000000..bec7a58
--- /dev/null
+++ b/source/arm_dynarec/armgen.h
@@ -0,0 +1,576 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+static uint8_t *CachePtr;
+
+enum ARMCond
+{
+ CondEQ,
+ CondNE,
+ CondCS,
+ CondHS = CondCS,
+ CondCC,
+ CondLO = CondCC,
+ CondMI,
+ CondPL,
+ CondVS,
+ CondVC,
+ CondHI,
+ CondLS,
+ CondGE,
+ CondLT,
+ CondGT,
+ CondLE,
+ CondAL,
+};
+
+enum ARMShift
+{
+ ShiftLSL,
+ ShiftLSR,
+ ShiftASR,
+ ShiftROR,
+ ShiftRRX
+};
+
+enum ARMReg
+{
+ RegR0,
+ RegR1,
+ RegR2,
+ RegR3,
+ RegR4,
+ RegR5,
+ RegR6,
+ RegR7,
+ RegR8,
+ RegR9,
+ RegR10,
+ RegR11,
+ RegR12,
+ RegR13,
+ RegR14,
+ RegR15,
+
+ RegA1 = RegR0,
+ RegA2,
+ RegA3,
+ RegA4,
+ RegV1 = RegR4,
+ RegV2,
+ RegV3,
+ RegV4,
+ RegV5,
+ RegV6,
+ RegV7,
+ RegV8,
+ RegIP = RegR12,
+ RegSP,
+ RegLR,
+ RegPC,
+
+ RegA = RegR4,
+ RegX = RegR5,
+ RegY = RegR6,
+ RegP = RegR7,
+ RegCPU = RegR8,
+ RegCPUPC = RegR9,
+ RegCycles = RegR10,
+
+ RegChecks = RegR12,
+};
+
+#define ASCOND(cond) (cond << 28)
+#define ASLIST(reg) (1 << reg)
+#define PCREL(pc, target) ((intptr_t)(target) - (intptr_t)(pc) - 8)
+#define PCOFFSET(function) ((PCREL(CachePtr, function) >> 2) & 0xFFFFFF)
+#define ENCODE_IMM(imm, shl) (((((32 - (shl)) & 0x1F) >> 1) << 8) | ((imm) & 0xFF))
+#define DECODE_IMM(imm) (((imm) & 0xFF) << (32 - (((imm) >> 8) << 1)));
+#define ENCODE_SHIFT(type, imm) ((type) | (((imm) >= 32 ? 0 : (imm)) << 2))
+
+#define CPU_ICPU_OFFSET ((uintptr_t)&ICPU - (uintptr_t)&CPU)
+#define CPU_OPEN_BUS_OFFSET ((uintptr_t)&OpenBus - (uintptr_t)&CPU)
+
+#define CPU_OFFSET(field) (offsetof(__typeof__(CPU), field))
+#define ICPU_OFFSET(field) (CPU_ICPU_OFFSET + offsetof(__typeof__(ICPU), field))
+
+static size_t DecomposeImm32(uint32_t imm32, uint32_t *result)
+{
+ uint8_t shift = 0;
+ size_t count = 0;
+
+ if (!imm32) {
+ result[0] = 0;
+ return 1;
+ }
+
+ while (1)
+ {
+ while (shift < 32 && !(imm32 >> shift & 0x03))
+ shift += 2;
+
+ if (shift >= 32)
+ break;
+
+ if (shift > 24)
+ {
+ uint8_t remaining = (1 << (shift - 24));
+ uint32_t firstStore = count == 0 ? 0 : DECODE_IMM(result[0]);
+
+ if (firstStore && firstStore < remaining)
+ {
+ uint8_t bottom = imm32 >> shift;
+ uint8_t top = (imm32 & (remaining - 1)) << (32 - shift);
+ result[0] = ENCODE_IMM(bottom | top, shift);
+ break;
+ }
+ }
+
+ result[count++] = ENCODE_IMM(imm32 >> shift, shift);
+ shift += 8;
+ }
+
+ return count;
+}
+
+static inline void ARMEmit(uint32_t value)
+{
+ *(uint32_t *)CachePtr = value;
+ CachePtr += 4;
+}
+
+enum ARMDPIOp
+{
+ DPIOpAND,
+ DPIOpEOR,
+ DPIOpSUB,
+ DPIOpRSB,
+ DPIOpADD,
+ DPIOpADC,
+ DPIOpSBC,
+ DPIOpRSC,
+ DPIOpTST,
+ DPIOpTEQ,
+ DPIOpCMP,
+ DPIOpCMN,
+ DPIOpORR,
+ DPIOpMOV,
+ DPIOpBIC,
+ DPIOpNOT,
+};
+
+static void ARMEmitDPI_Imm(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ 0 << 26 | /* 0b00 */
+ 1 << 25 |
+ op << 21 |
+ s << 20 |
+ rn << 16 |
+ rd << 12 |
+ imm
+ );
+}
+
+static void ARMEmitDPI_Reg(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ 0 << 26 | /* 0b00 */
+ 0 << 25 |
+ op << 21 |
+ s << 20 |
+ rn << 16 |
+ rd << 12 |
+ shift << 5 |
+ 0 << 4 |
+ rm
+ );
+}
+
+static void ARMEmitDPI_RegShift(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, enum ARMReg rs, uint8_t type, enum ARMReg rm)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ 0 << 26 | /* 0b00 */
+ 0 << 25 |
+ op << 21 |
+ s << 20 |
+ rn << 16 |
+ rd << 12 |
+ rs << 8 |
+ 0 << 7 |
+ type << 5 |
+ 1 << 4 |
+ rm
+ );
+}
+
+static void GenARM_CMP_Imm(enum ARMCond cond, enum ARMReg reg, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpCMP, 1, 0, reg, imm);
+}
+
+static void GenARM_CMP_Reg(enum ARMCond cond, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpCMP, 1, 0, rn, shift, rm);
+}
+
+static void GenARM_SUB_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpSUB, 0, rd, rn, imm);
+}
+
+static void GenARM_SUBS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpSUB, 1, rd, rn, imm);
+}
+
+static void GenARM_SUB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpSUB, 0, rd, rn, shift, rm);
+}
+
+static void GenARM_SUBS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpSUB, 1, rd, rn, shift, rm);
+}
+
+static void GenARM_ADD_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpADD, 0, rd, rn, imm);
+}
+
+static void GenARM_ADD_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpADD, 0, rd, rn, shift, rm);
+}
+
+static void GenARM_ADDS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpADD, 1, rd, rn, shift, rm);
+}
+
+static void GenARM_MOV_Imm(enum ARMCond cond, enum ARMReg rd, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpMOV, 0, rd, 0, imm);
+}
+
+static void GenARM_MOV_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpMOV, 0, rd, 0, shift, rm);
+}
+
+static void GenARM_MOVS_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpMOV, 1, rd, 0, shift, rm);
+}
+
+static void GenARM_AND_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpAND, 0, rd, rn, imm);
+}
+
+static void GenARM_ANDS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpAND, 1, rd, rn, imm);
+}
+
+static void GenARM_AND_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpAND, 0, rd, rn, shift, rm);
+}
+
+static void GenARM_ANDS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpAND, 1, rd, rn, shift, rm);
+}
+
+static void GenARM_EOR_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpEOR, 0, rd, rn, imm);
+}
+
+static void GenARM_EORS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpEOR, 1, rd, rn, imm);
+}
+
+static void GenARM_EOR_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpEOR, 0, rd, rn, shift, rm);
+}
+
+static void GenARM_EORS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpEOR, 1, rd, rn, shift, rm);
+}
+
+static void GenARM_ORR_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpORR, 0, rd, rn, imm);
+}
+
+static void GenARM_ORRS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpORR, 1, rd, rn, imm);
+}
+
+static void GenARM_ORR_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpORR, 0, rd, rn, shift, rm);
+}
+
+static void GenARM_ORRS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpORR, 1, rd, rn, shift, rm);
+}
+
+static void GenARM_TST_Imm(enum ARMCond cond, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpTST, 1, 0, rn, imm);
+}
+
+static void GenARM_TST_Reg(enum ARMCond cond, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpTST, 1, 0, rn, shift, rm);
+}
+
+static void GenARM_BIC_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm)
+{
+ ARMEmitDPI_Imm(cond, DPIOpBIC, 0, rd, rn, imm);
+}
+
+static void GenARM_MVN_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitDPI_Reg(cond, DPIOpNOT, 0, rd, 0, shift, rm);
+}
+
+static void ARMEmitLS_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ bool u = imm >= 0 ? 1 : 0;
+ imm = (u ? imm : -imm) & 0xFFF;
+
+ ARMEmit(
+ ASCOND(cond) |
+ 1 << 26 | /* 0b01 */
+ 0 << 25 |
+ p << 24 |
+ u << 23 |
+ w << 21 |
+ op << 20 |
+ rn << 16 |
+ rt << 12 |
+ imm
+ );
+}
+
+static void ARMEmitLS_Reg(enum ARMCond cond, bool p, bool u, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ 1 << 26 | /* 0b01 */
+ 1 << 25 |
+ p << 24 |
+ u << 23 |
+ w << 21 |
+ op << 20 |
+ rn << 16 |
+ rt << 12 |
+ shift << 5 |
+ 0 << 4 |
+ rm
+ );
+}
+
+static void ARMEmitLSH_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ bool u = imm >= 0 ? 1 : 0;
+ imm = (u ? imm : -imm) & 0xFF;
+
+ ARMEmit(
+ ASCOND(cond) |
+ 0 << 25 | /* 0b000 */
+ p << 24 |
+ u << 23 |
+ 1 << 22 |
+ w << 21 |
+ op << 20 |
+ rn << 16 |
+ rt << 12 |
+ ((imm & 0xF0) << 4) |
+ 0xb << 4 | /* 0b1011 */
+ (imm & 0x0F)
+ );
+}
+
+static void ARMEmitLSB_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ bool u = imm >= 0 ? 1 : 0;
+ imm = (u ? imm : -imm) & 0xFF;
+
+ ARMEmit(
+ ASCOND(cond) |
+ 0x2 << 25 | /* 0b010 */
+ p << 24 |
+ u << 23 |
+ 1 << 22 |
+ w << 21 |
+ op << 20 |
+ rn << 16 |
+ rt << 12 |
+ imm
+ );
+}
+
+static void GenARM_LDR_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLS_Imm(cond, 1, 0, 1, rt, rn, imm);
+}
+
+static void GenARM_LDR_Reg(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, bool add, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitLS_Reg(cond, 1, add, 0, 1, rt, rn, shift, rm);
+}
+
+static void GenARM_LDRH_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLSH_Imm(cond, 1, 0, 1, rt, rn, imm);
+}
+
+static void GenARM_LDRB_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLSB_Imm(cond, 1, 0, 1, rt, rn, imm);
+}
+
+static void GenARM_STR_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLS_Imm(cond, 1, 0, 0, rt, rn, imm);
+}
+
+static void GenARM_STRH_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLSH_Imm(cond, 1, 0, 0, rt, rn, imm);
+}
+
+static void GenARM_STRB_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm)
+{
+ ARMEmitLSB_Imm(cond, 1, 0, 0, rt, rn, imm);
+}
+
+static void GenARM_STR_Reg(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, bool add, uint8_t shift, enum ARMReg rm)
+{
+ ARMEmitLS_Reg(cond, 1, add, 0, 0, rt, rn, shift, rm);
+}
+
+static void GenARM_PUSH(uint16_t regs)
+{
+ ARMEmit(
+ ASCOND(CondAL) |
+ (0x92 << 20) | /* 0b10010010 */
+ (RegSP << 16) |
+ (regs)
+ );
+}
+
+static void GenARM_POP(uint16_t regs)
+{
+ ARMEmit(
+ ASCOND(CondAL) |
+ (0x8B << 20) | /* 0b10001011 */
+ (RegSP << 16) |
+ (regs)
+ );
+}
+
+static void GenARM_B(enum ARMCond cond, uint32_t offset)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ (0xA << 24) | /* 0b1010 */
+ (offset)
+ );
+}
+
+static void GenARM_BL(enum ARMCond cond, uint32_t offset)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ (0xB << 24) | /* 0b1011 */
+ (offset)
+ );
+}
+
+static void GenARM_BX_Reg(enum ARMCond cond, enum ARMReg reg)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ (0x12 << 20) | /* 0b 0001 0010 */
+ (0xFFF1 << 4) | /* 0b 1111 1111 1111 0001 */
+ (reg)
+ );
+}
+
+static void GenARM_BLX_Reg(enum ARMCond cond, enum ARMReg reg)
+{
+ ARMEmit(
+ ASCOND(cond) |
+ (0x12 << 20) | /* 0b 0001 0010 */
+ (0xFFF3 << 4) | /* 0b 1111 1111 1111 0011 */
+ (reg)
+ );
+}
+
+static void GenARM_UXTB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm)
+{
+ GenARM_AND_Imm(cond, rd, rm, ENCODE_IMM(0xFF, 0));
+}
+
+static void GenARM_MOVB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm)
+{
+ GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 8), rd);
+ GenARM_ORR_Reg(cond, rd, rd, ENCODE_SHIFT(ShiftLSL, 24), rm);
+ GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftROR, 24), rd);
+}
+
+static void GenARM_MOVBS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg scratch, enum ARMReg rm)
+{
+ GenARM_AND_Imm(cond, rd, rd, ENCODE_IMM(0xFF, 8));
+ GenARM_ANDS_Imm(CondAL, scratch, rm, ENCODE_IMM(0xFF, 0));
+ GenARM_ORR_Reg(CondAL, rd, rd, 0, scratch);
+}
+
+static void GenARM_UXTHS_Reg_(enum ARMCond cond, bool s, enum ARMReg rd, enum ARMReg rm)
+{
+ GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSL, 16), rm);
+ if (s)
+ GenARM_MOVS_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 16), rd);
+ else
+ GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 16), rd);
+}
+
+static void GenARM_UXTH_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm)
+{
+ GenARM_UXTHS_Reg_(cond, 0, rd, rm);
+}
+
+static void GenARM_UXTHS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm)
+{
+ GenARM_UXTHS_Reg_(cond, 1, rd, rm);
+}
+
+static void GenARM_MOV32_Imm(enum ARMCond cond, enum ARMReg reg, uint32_t imm)
+{
+ uint32_t values[4];
+ uint32_t count = DecomposeImm32(imm, values);
+ uint32_t i;
+
+ GenARM_MOV_Imm(cond, reg, values[0]);
+
+ for (i = 1; i < count; i++)
+ {
+ GenARM_ORR_Imm(cond, reg, reg, values[i]);
+ }
+}
diff --git a/source/arm_dynarec/dynaexec.c b/source/arm_dynarec/dynaexec.c
new file mode 100644
index 0000000..d63d710
--- /dev/null
+++ b/source/arm_dynarec/dynaexec.c
@@ -0,0 +1,3049 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "snes9x.h"
+#include "cpuexec.h"
+#include "sa1.h"
+#include "apu.h"
+#include "arm_dynarec/armfn.h"
+#include "arm_dynarec/armgen.h"
+#include "arm_dynarec/dynaexec.h"
+
+#define BUFFER_SIZE (6 << 20)
+#define BUFFER_EXTRA (1 << 20)
+#define BLOCK_SIZE 0x1000
+
+#ifndef NDEBUG
+int trace[128];
+int traceidx;
+int BreakPC;
+int BreakOpcode;
+#endif
+
+enum SNESRegArg
+{
+ ArgNULL,
+ ArgA,
+ ArgX,
+ ArgY,
+ ArgS,
+ ArgD,
+ ArgDB,
+ ArgPB,
+ ArgP,
+ ArgZ,
+};
+
+enum OpcodeFlag
+{
+ OFNone = 0,
+ OFBreak = 1 << 0,
+ OFSectionUpdate = 1 << 1,
+};
+
+#define V(val) val##8, val##16
+
+enum OpAddrMode
+{
+ AddrNone,
+ V(AddrRegister),
+ V(AddrImmediate),
+ AddrImmediate24,
+ V(AddrZeroPage),
+ V(AddrIndirect),
+ V(AddrIndirectX),
+ V(AddrIndirectY),
+ V(AddrIndirectS),
+ V(AddrIndirectFar),
+ V(AddrAbsolute),
+ V(AddrLong),
+};
+
+enum OpFunction
+{
+ V(FnMOV),
+ V(FnMOVSP),
+ V(FnLD),
+ V(FnST),
+ V(FnPUSH),
+ FnPEA,
+ FnPER,
+ V(FnPOP),
+ FnMVN,
+ FnMVP,
+ V(FnOR),
+ V(FnAND),
+ V(FnEOR),
+ V(FnADC),
+ V(FnSBC),
+ V(FnCMP),
+ V(FnBIT),
+ V(FnINC),
+ V(FnDEC),
+ V(FnTSB),
+ V(FnTRB),
+ V(FnASL),
+ V(FnLSR),
+ V(FnROL),
+ V(FnROR),
+ FnBRA,
+ FnBRL,
+ FnJMP,
+ FnJML,
+ FnJSR,
+ FnJSL,
+ FnRTI,
+ FnRTL,
+ FnRTS,
+ FnBPL,
+ FnBMI,
+ FnBVC,
+ FnBVS,
+ FnBCC,
+ FnBCS,
+ FnBZC,
+ FnBZS,
+ FnBRK,
+ FnCOP,
+ FnCLC,
+ FnCLI,
+ FnCLD,
+ FnCLV,
+ FnSEC,
+ FnSEI,
+ FnSED,
+ FnREP,
+ FnSEP,
+ FnXCE,
+ FnSTP,
+ FnXBA,
+ FnWAI,
+ FnWDM,
+ FnNOP,
+};
+
+#undef V
+
+#define CHECK_8_16(field, value) (in->field == value##8 || in->field == value##16 )
+#define CHECK_FIELD(field, value) (in->field == value)
+#define IS_VOP(value) (CHECK_8_16(Op, Fn##value))
+#define IS_OP(value) (CHECK_FIELD(Op, Fn##value))
+#define IS_VADDRMODE(value) (CHECK_8_16(AddrMode, Addr##value))
+#define IS_ADDRMODE(value) (CHECK_FIELD(AddrMode, Addr##value))
+
+#define CASE_VOP(value) case Fn##value##8: case Fn##value##16
+#define CASE_OP(value) case Fn##value
+
+enum ChecksFlags
+{
+ CheckWFI = 1,
+};
+
+struct Instruction;
+
+typedef struct Instruction {
+ void (*S9xOpcode)(void);
+ void (*Emitter)(struct Instruction *);
+ uint8_t *PC;
+ uint8_t Opcode;
+ enum OpAddrMode AddrMode;
+ enum OpFunction Op;
+ enum SNESRegArg Arg1;
+ enum SNESRegArg Arg2;
+ uint16_t OutFlags;
+ uint16_t InFlags;
+ bool SectionUpdate;
+ bool ShouldBreak;
+} Instruction;
+
+uint32_t CacheSection;
+static uint8_t *CachePtr;
+static uint8_t *Cache;
+static uint8_t **Sections[13 * 0x1000]; /* Max: Decimal | Emulation = 13 */
+
+#ifndef NDEBUG
+struct
+{
+ uint32_t Emits;
+ uint32_t Finds;
+ uint32_t InterpretedFinds;
+ bool InterpretedBlock;
+ uint32_t Ops;
+ uint32_t InterpretedOps;
+ uint32_t ContextSwitches;
+ uint32_t GetSets;
+ uint32_t SlowGetSets;
+ uint32_t OpCounts[0x100];
+ uint32_t InterpretedOpCounts[0x100];
+ uint32_t InterpretedBlockStarts[0x100];
+} Metrics;
+
+void MetricsReset(void)
+{
+ memset(&Metrics, 0, sizeof(Metrics));
+}
+
+void MetricsPrint(void)
+{
+ int i;
+ printf("Cache hit rate: %f\n", (float)(Metrics.Finds - Metrics.Emits) / Metrics.Finds);
+ printf("Avg. instructions per find: %f\n", (float)Metrics.Ops / Metrics.Finds);
+
+ printf("Counts by instruction: \n");
+ for (i = 0; i < sizeof(Metrics.OpCounts) / sizeof(Metrics.OpCounts[0]); i++)
+ {
+ printf("0x%X: %d\n", i, Metrics.OpCounts[i]);
+ }
+
+ printf("Interpreted counts by instruction: \n");
+ for (i = 0; i < sizeof(Metrics.InterpretedOpCounts) / sizeof(Metrics.InterpretedOpCounts[0]); i++)
+ {
+ if (Metrics.InterpretedOpCounts[i] > 0)
+ printf("0x%X: %d\n", i, Metrics.InterpretedOpCounts[i]);
+ }
+
+ printf("Interpreted block starts by instruction: \n");
+ for (i = 0; i < sizeof(Metrics.InterpretedBlockStarts) / sizeof(Metrics.InterpretedBlockStarts[0]); i++)
+ {
+ if (Metrics.InterpretedBlockStarts[i] > 0)
+ printf("0x%X: %d\n", i, Metrics.InterpretedBlockStarts[i]);
+ }
+}
+#endif
+
+uint8_t **FetchBlock(uint16_t pc);
+
+static void *CacheAlloc(size_t size)
+{
+ uint8_t *prev = CachePtr;
+ CachePtr += size;
+ return (void *)prev;
+}
+
+static void CacheEmpty(void)
+{
+ memset(Cache, 0, BUFFER_SIZE + BUFFER_EXTRA);
+ memset(Sections, 0, sizeof(Sections));
+ CachePtr = Cache;
+}
+
+static uint8_t **FindBlock(uint16_t pc)
+{
+ /* Section is D/E/X/M flags, PB, and top 4 bits of PC */
+ uint32_t section = (CacheSection | ((pc & 0xF000) >> 12));
+ uint8_t **block = Sections[section];
+ pc &= ~0xF000;
+
+ if (!block || !block[pc])
+ {
+ if (CachePtr >= Cache + BUFFER_SIZE)
+ {
+ CacheEmpty();
+ printf("Reset translation cache\n");
+ block = NULL;
+ }
+
+ if (!block)
+ block = Sections[section] = CacheAlloc(BLOCK_SIZE * sizeof(uint8_t *));
+ }
+
+ return &block[pc];
+}
+
+
+#ifndef NDEBUG
+
+void CheckInstruction(uint8_t opcode, uint8_t *pc, bool interpreted)
+{
+ Metrics.Ops++;
+ Metrics.OpCounts[opcode]++;
+
+ if (interpreted)
+ {
+ Metrics.InterpretedOps++;
+ Metrics.InterpretedOpCounts[opcode]++;
+ if (!Metrics.InterpretedBlock)
+ {
+ Metrics.InterpretedBlock = true;
+ Metrics.InterpretedFinds++;
+ Metrics.InterpretedBlockStarts[opcode]++;
+ }
+ }
+
+ if (CPU.PC - CPU.PCBase == (BreakPC & 0xffff) && ICPU.Registers.PB == ((BreakPC >> 16) & 0xFF))
+ DynaBreak();
+
+ if (*CPU.PC == BreakOpcode)
+ DynaBreak();
+
+ if (pc != CPU.PC)
+ {
+ fprintf(stderr, "Incorrect PC: Expected 0x%X, was 0x%X\n", (uintptr_t)pc, (uintptr_t)CPU.PC);
+ DynaBreak();
+ }
+
+ if (opcode != *CPU.PC)
+ {
+ fprintf(stderr, "Incorrect opcode: Expected 0x%X, was 0x%X\n", opcode, *CPU.PC);
+ DynaBreak();
+ }
+}
+#endif
+
+static void EmitChecks(enum ChecksFlags flags)
+{
+ if (Settings.SA1)
+ {
+ GenARM_MOV32_Imm(CondAL, RegR0, (uintptr_t)&SA1);
+ GenARM_LDRB_Imm(CondAL, RegR0, RegR0, offsetof(__typeof__(SA1), Executing));
+ GenARM_CMP_Imm(CondAL, RegR0, ENCODE_IMM(0, 0));
+ GenARM_BL(CondNE, PCOFFSET(S9xCallSA1MainLoop));
+ }
+
+ GenARM_ANDS_Imm(CondAL, RegR0, RegChecks, ENCODE_IMM(0x88, 4)); /* 0x880 = NMI_FLAG | IRQ_PENDING_FLAG */
+ GenARM_CMP_Reg(CondEQ, RegCycles, ENCODE_SHIFT(ShiftLSR, 16), RegChecks);
+ GenARM_ORRS_Imm(CondHS, RegR0, RegR0, ENCODE_IMM(1, 0));
+
+ if (flags & CheckWFI)
+ GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksWFI));
+ else
+ GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksNoWFI));
+}
+
+static void PatchJump(uint32_t *source, uint8_t *target)
+{
+ *source = ((*source & 0xF0000000) | (0xA << 24) | ((PCREL(source, target) >> 2) & 0xFFFFFF));
+}
+
+static uint8_t *HandleFlags(bool checkWFI)
+{
+ uint8_t *NewPC = NULL;
+
+ if (CPU.Flags & NMI_FLAG)
+ {
+ if (--CPU.NMICycleCount == 0)
+ {
+ CPU.Flags &= ~NMI_FLAG;
+ if (checkWFI && CPU.WaitingForInterrupt)
+ {
+ CPU.WaitingForInterrupt = false;
+ CPU.PC++;
+ }
+ S9xOpcode_NMI();
+ UpdateSection();
+ NewPC = CPU.PC;
+ }
+ }
+
+ if (CPU.Flags & IRQ_PENDING_FLAG)
+ {
+ if (CPU.IRQCycleCount == 0)
+ {
+ if (checkWFI && CPU.WaitingForInterrupt)
+ {
+ CPU.WaitingForInterrupt = false;
+ NewPC = CPU.PC++;
+ }
+ if (CPU.IRQActive && !Settings.DisableIRQ)
+ {
+ if (!CheckFlag(IRQ))
+ {
+ S9xOpcode_IRQ();
+ UpdateSection();
+ NewPC = CPU.PC;
+ }
+ }
+ else
+ CPU.Flags &= ~IRQ_PENDING_FLAG;
+ }
+ else if (--CPU.IRQCycleCount == 0 && CheckFlag(IRQ))
+ CPU.IRQCycleCount = 1;
+ }
+
+ return NewPC;
+}
+
+uint8_t *HandleFlagsNoWFI(void)
+{
+ return HandleFlags(false);
+}
+
+uint8_t *HandleFlagsWFI(void)
+{
+ return HandleFlags(true);
+}
+
+static int HandleHBlank(bool SFX)
+{
+#ifndef NDEBUG
+ Metrics.ContextSwitches++;
+#endif
+
+#ifndef USE_BLARGG_APU
+ if (CPU.WhichEvent == HBLANK_END_EVENT)
+ while (IAPU.APUExecuting && APU.Cycles <= CPU.Cycles)
+ APU_EXECUTE1();
+#endif
+
+ if (SFX)
+ S9xDoHBlankProcessing_SFX();
+ else
+ S9xDoHBlankProcessing_NoSFX();
+
+#ifdef LAGFIX
+ if(finishedFrame)
+ return 3;
+#endif
+ if (CPU.Flags & SCAN_KEYS_FLAG)
+ return 1;
+
+ return 0;
+}
+
+int HandleHBlankSFX(void)
+{
+ return HandleHBlank(true);
+}
+
+int HandleHBlankNoSFX(void)
+{
+ return HandleHBlank(false);
+}
+
+uint8_t *PatchJumpDirectChecks(uint8_t *PC, uint32_t *source)
+{
+ uint8_t *dest = *FetchBlock(PC - CPU.PCBase);
+ uint8_t *checks = CachePtr;
+ EmitChecks(0);
+ GenARM_B(CondAL, PCOFFSET(dest));
+ __clear_cache(checks, CachePtr);
+ PatchJump(source, checks);
+ __clear_cache(source, source+1);
+ return checks;
+}
+
+uint8_t *PatchJumpDirect(uint8_t *PC, uint32_t *source)
+{
+ uint8_t *dest = *FetchBlock(PC - CPU.PCBase);
+ PatchJump(source, dest);
+ __clear_cache(source, source+1);
+ return dest;
+}
+
+void DynaCPUShutdown(void)
+{
+ /* Don't skip cycles with a pending NMI or IRQ - could cause delayed
+ * interrupt. Interrupts are delayed for a few cycles already, but
+ * the delay could allow the shutdown code to cycle skip again.
+ * Was causing screen flashing on Top Gear 3000. */
+ if (CPU.WaitCounter == 0 && !(CPU.Flags & (IRQ_PENDING_FLAG | NMI_FLAG)))
+ {
+ CPU.WaitAddress = NULL;
+#ifndef USE_BLARGG_APU
+ CPU.Cycles = CPU.NextEvent;
+ if (IAPU.APUExecuting)
+ {
+ ICPU.CPUExecuting = false;
+ do
+ {
+ APU_EXECUTE1();
+ } while (APU.Cycles < CPU.NextEvent);
+ ICPU.CPUExecuting = true;
+ }
+#endif
+ }
+ else if (CPU.WaitCounter >= 2)
+ CPU.WaitCounter = 1;
+ else
+ CPU.WaitCounter--;
+}
+
+void DynaWAIShutdown(void)
+{
+ CPU.Cycles = CPU.NextEvent;
+#ifndef USE_BLARGG_APU
+ if (IAPU.APUExecuting)
+ {
+ ICPU.CPUExecuting = false;
+ do
+ {
+ APU_EXECUTE1();
+ } while (APU.Cycles < CPU.NextEvent);
+ ICPU.CPUExecuting = true;
+ }
+#endif
+}
+
+static void EmitFlushCyclesPC(int *dpc, int *dcycles)
+{
+ if (*dpc != 0)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(*dpc, 0));
+
+ if (*dcycles != 0)
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(*dcycles, 0));
+
+ dpc = 0;
+ dcycles = 0;
+}
+
+/* a1 = value, a4 = sp, preserves a4 */
+static void EmitPush(Instruction *in, bool n8)
+{
+ enum ARMReg RegS = RegA4; /* Preserved during memory ops */
+
+ if (n8)
+ GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0));
+ else
+ GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0));
+
+ if (CheckEmulation())
+ {
+ GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8));
+ }
+
+ GenARM_ADD_Imm(CondAL, RegA2, RegS, ENCODE_IMM(1, 0));
+ GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(0xFF, 16));
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast));
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast));
+ }
+}
+
+/* a1 = value, a4 = sp, preserves a4 */
+static void EmitPop(Instruction *in, bool n8)
+{
+ enum ARMReg RegS = RegA4; /* Preserved during memory ops */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+
+ if (n8)
+ GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0));
+ else
+ GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0));
+
+ if (CheckEmulation())
+ {
+ GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8));
+ }
+
+ if (n8)
+ {
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegS);
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ }
+ else
+ {
+ GenARM_SUB_Imm(CondAL, RegA1, RegS, ENCODE_IMM(1, 0));
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ }
+}
+
+static enum ARMReg EmitLoadRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg RegMEM, bool read)
+{
+ enum ARMReg reg;
+
+ switch(arg)
+ {
+ case ArgA:
+ reg = RegA;
+ break;
+ case ArgX:
+ reg = RegX;
+ break;
+ case ArgY:
+ reg = RegY;
+ break;
+ case ArgP:
+ reg = RegP;
+ break;
+ case ArgD:
+ if (read)
+ GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.D));
+ reg = RegMEM;
+ break;
+ case ArgS:
+ if (read)
+ GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.S));
+ reg = RegMEM;
+ break;
+ case ArgDB:
+ if (read)
+ GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.DB));
+ reg = RegMEM;
+ break;
+ case ArgPB:
+ if (read)
+ GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.PB));
+ reg = RegMEM;
+ break;
+ default:
+ fprintf(stderr, "Unhandled SNES Register: %d\n", arg);
+ reg = -1;
+ break;
+ }
+
+ return reg;
+}
+
+/* Preserves reg, modifies R3 */
+static void EmitStoreRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg reg, bool n8)
+{
+ switch(arg)
+ {
+ case ArgA:
+ if (reg != RegA)
+ {
+ if (n8)
+ GenARM_MOVB_Reg(CondAL, RegA, reg);
+ else
+ GenARM_UXTH_Reg(CondAL, RegA, reg);
+ }
+ break;
+ case ArgX:
+ if (reg != RegX)
+ {
+ if (n8)
+ GenARM_MOVB_Reg(CondAL, RegX, reg);
+ else
+ GenARM_UXTH_Reg(CondAL, RegX, reg);
+ }
+ break;
+ case ArgY:
+ if (reg != RegY)
+ {
+ if (n8)
+ GenARM_MOVB_Reg(CondAL, RegY, reg);
+ else
+ GenARM_UXTH_Reg(CondAL, RegY, reg);
+ }
+ break;
+ case ArgP:
+ {
+ uint8_t *branch;
+ GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg);
+ GenARM_EOR_Reg(CondAL, reg, RegP, 0, reg);
+ GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg);
+
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(MemoryFlag | IndexFlag, 0));
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUnpackStatusFast));
+
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(IndexFlag, 0));
+ GenARM_BIC_Imm(CondNE, RegX, RegX, ENCODE_IMM(0xFF, 8));
+ GenARM_BIC_Imm(CondNE, RegY, RegY, ENCODE_IMM(0xFF, 8));
+
+ /* Update interpreter lookup table and CacheSection if processor flags changed */
+ GenARM_EOR_Reg(CondAL, reg, reg, 0, RegP);
+ GenARM_TST_Imm(CondAL, reg, ENCODE_IMM((Emulation | MemoryFlag | IndexFlag | Decimal) >> 2, 2));
+ branch = CachePtr;
+ GenARM_B(CondEQ, PCOFFSET(branch));
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+ GenARM_BL(CondAL, PCOFFSET(S9xCallFixCycles));
+ PatchJump((uint32_t *)branch, CachePtr);
+ break;
+ }
+ case ArgD:
+ GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.D));
+ break;
+ case ArgDB:
+ GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg);
+ GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.DB));
+ GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB));
+ break;
+ case ArgPB:
+ GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg);
+ GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedPB));
+ break;
+ case ArgS:
+ if (CheckEmulation())
+ {
+ GenARM_BIC_Imm(CondAL, RegA3, reg, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Imm(CondAL, RegA3, RegA3, ENCODE_IMM(1, 8));
+ GenARM_STRH_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.S));
+ }
+ else
+ {
+ GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.S));
+ }
+
+ break;
+ default:
+ fprintf(stderr, "Unhandled SNES Register: %d\n", arg);
+ break;
+ }
+}
+
+static uint16_t EmitLoadLiteral(Instruction *in, bool n8, int *dpc, int *dcycles)
+{
+ uint16_t literal;
+ *dcycles += CPU.MemSpeed;
+ *dpc += 1;
+ literal = in->PC[1];
+
+ if (!n8)
+ {
+ *dcycles += CPU.MemSpeed;
+ *dpc += 1;
+ literal |= in->PC[2] << 8;
+ }
+
+ return literal;
+}
+
+static int32_t EmitLoadRelative(Instruction *in, bool n8, int *dpc, int *dcycles)
+{
+ int32_t disp;
+ int32_t pc;
+ int32_t target;
+ bool overflow;
+
+ if (n8)
+ {
+ disp = (int8_t)in->PC[1];
+ pc = (int32_t)((in->PC + 2) - CPU.PCBase);
+ (*dpc)++;
+ *dcycles += CPU.MemSpeed;
+ }
+ else
+ {
+ disp = (int16_t)(in->PC[1] | (in->PC[2] << 8));
+ pc = (int32_t)((in->PC + 3) - CPU.PCBase);
+ *dpc += 2;
+ *dcycles += CPU.MemSpeedx2;
+ }
+
+ target = (pc + disp) & 0xFFFF;
+ overflow = target != (pc + disp);
+
+ if (overflow)
+ disp = (target - pc);
+
+ return disp;
+}
+
+/* Stores result in A1 for reads, A2 for writes, because that is where
+ * S9XRead / S9xWrite look for address. Modifies the other. Reads also
+ * write over A4. */
+static void EmitAddrZeroPage(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ enum ARMReg rd = read ? RegA1 : RegA2;
+ enum ARMReg scratch = read ? RegA2 : RegA1;
+
+ *dpc += 1;
+ *dcycles += CPU.MemSpeed;
+ GenARM_MOV_Imm(CondAL, rd, ENCODE_IMM(in->PC[1], 0));
+
+ if (offsetReg == ArgS)
+ GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.S));
+ else
+ GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.D));
+
+ if (offsetReg == ArgX)
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX);
+ else if (offsetReg == ArgY)
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY);
+
+ if (offsetReg == ArgX || offsetReg == ArgY || offsetReg == ArgS)
+ *dcycles += ONE_CYCLE;
+
+ if (read)
+ {
+ /* Set OpenBus to in->PC[1] */
+ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[1], 0));
+ GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET);
+ }
+
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch);
+
+ if (CheckEmulation() && (offsetReg == ArgX || offsetReg == ArgY))
+ {
+ GenARM_UXTB_Reg(CondAL, rd, rd);
+ *dcycles += ONE_CYCLE;
+ }
+ else
+ {
+ GenARM_UXTH_Reg(CondAL, rd, rd);
+ }
+}
+
+/* Stores result in A1 for reads, A2 for writes, because that is where
+ * S9XRead / S9xWrite look for address. Modifies the other. Reads also
+ * write over A4. */
+static void EmitAddrAbsolute(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ enum ARMReg rd = read ? RegA1 : RegA2;
+ enum ARMReg scratch = read ? RegA2 : RegA1;
+ *dpc += 2;
+ *dcycles += CPU.MemSpeedx2;
+ GenARM_MOV32_Imm(CondAL, scratch, in->PC[1] + (in->PC[2] << 8));
+ GenARM_LDR_Imm(CondAL, rd, RegCPU, ICPU_OFFSET(ShiftedDB));
+
+ if (offsetReg == ArgX)
+ GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegX);
+ else if (offsetReg == ArgY)
+ GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegY);
+
+ if (read)
+ {
+ /* Set OpenBus to in->PC[2] */
+ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0));
+ GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET);
+ }
+
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch);
+}
+
+/* Stores result in A1 for reads, A2 for writes, because that's where
+ * S9XRead / S9xWrite look for address. Reads also write over A4. */
+static void EmitAddrLong(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ enum ARMReg rd = read ? RegA1 : RegA2;
+ *dpc += 3;
+ *dcycles += CPU.MemSpeedx2 + CPU.MemSpeed;
+
+ GenARM_MOV32_Imm(CondAL, rd, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16));
+
+ if (offsetReg == ArgX)
+ {
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX);
+ GenARM_BIC_Imm(CondAL, rd, rd, ENCODE_IMM(0xFF, 24));
+ }
+
+ if (read)
+ {
+ /* Set OpenBus to in->PC[2] */
+ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0));
+ GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET);
+ }
+}
+
+/* Stores result in A1 for reads, A2 for writes, because that's where
+ * S9XRead / S9xWrite look for address. Writes over A4. */
+static void EmitAddrIndirect(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ enum ARMReg rd = read ? RegA1 : RegA2;
+ enum ARMReg scratch = read ? RegA2 : RegA1;
+ enum SNESRegArg ZPArg = ArgNULL;
+
+ if (offsetReg == ArgX)
+ ZPArg = ArgX;
+ else if (IS_VADDRMODE(IndirectS))
+ ZPArg = ArgS;
+
+ EmitAddrZeroPage(true, in, ZPArg, dpc, dcycles);
+
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+
+ if (rd != RegA1)
+ GenARM_MOV_Reg(CondAL, rd, 0, RegA1);
+
+ if (offsetReg == ArgY)
+ {
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY);
+ }
+
+ if (IS_VADDRMODE(IndirectS))
+ *dcycles += ONE_CYCLE;
+
+ GenARM_LDR_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(ShiftedDB));
+ GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch);
+
+ if (read)
+ {
+ /* Set OpenBus to last byte fetched */
+ GenARM_MOV_Reg(CondAL, RegA4, ENCODE_SHIFT(ShiftLSR, 8), rd);
+ GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET);
+ }
+}
+
+/* Stores result in A1 for reads, A2 for writes, because that's where
+ * S9XRead / S9xWrite look for address. Writes over A4. */
+static void EmitAddrIndirectFar(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ enum ARMReg rd = read ? RegA1 : RegA2;
+
+ EmitAddrZeroPage(true, in, ArgNULL, dpc, dcycles);
+
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+
+ /* GetFast preserves A4 */
+ GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1);
+ GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0));
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+
+ if (offsetReg == ArgY)
+ {
+ GenARM_ADD_Reg(CondAL, RegA4, RegA4, 0, RegY);
+ }
+
+ if (read)
+ {
+ /* Set OpenBus to last byte fetched */
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET);
+ }
+
+ GenARM_ADD_Reg(CondAL, rd, RegA4, ENCODE_SHIFT(ShiftLSL, 16), RegA1);
+}
+
+static void EmitGetAddress(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles)
+{
+ switch(in->AddrMode)
+ {
+ case AddrZeroPage8:
+ case AddrZeroPage16:
+ EmitAddrZeroPage(read, in, offsetReg, dpc, dcycles);
+ break;
+ case AddrAbsolute8:
+ case AddrAbsolute16:
+ EmitAddrAbsolute(read, in, offsetReg, dpc, dcycles);
+ break;
+ case AddrLong8:
+ case AddrLong16:
+ EmitAddrLong(read, in, offsetReg, dpc, dcycles);
+ break;
+ case AddrIndirect8:
+ case AddrIndirect16:
+ case AddrIndirectX8:
+ case AddrIndirectX16:
+ case AddrIndirectY8:
+ case AddrIndirectY16:
+ case AddrIndirectS8:
+ case AddrIndirectS16:
+ EmitAddrIndirect(read, in, offsetReg, dpc, dcycles);
+ break;
+ case AddrIndirectFar8:
+ case AddrIndirectFar16:
+ EmitAddrIndirectFar(read, in, offsetReg, dpc, dcycles);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Returns loaded register, leaves address in RegA2 for storing */
+static enum ARMReg EmitLoadMemRegister(Instruction *in, enum ARMReg RegMEM, bool n8, int *dpc, int *dcycles)
+{
+ EmitGetAddress(true, in, in->Arg2, dpc, dcycles);
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+
+ if (n8)
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ else
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+
+ GenARM_MOV_Reg(CondAL, RegMEM, 0, RegA1);
+
+ return RegMEM;
+}
+
+/* Assumes address is in RegA2 */
+static void EmitStoreMemRegister(enum ARMReg RegMEM, bool n8)
+{
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegMEM);
+
+ if (n8)
+ GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast));
+ else
+ GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast));
+}
+
+/* Expects Z to be set in ARM flags, HS to be set in ARM if CarryHS is true (SBC, CMP) */
+static void EmitSetFlags(enum ARMReg reg, uint32_t flags, bool n8, bool CarryHS)
+{
+ if (flags == 0)
+ return;
+
+ GenARM_BIC_Imm(CondAL, RegP, RegP, flags);
+
+ if (flags & Zero)
+ GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero);
+
+ if ((flags & Carry) && CarryHS)
+ GenARM_ORR_Imm(CondHS, RegP, RegP, Carry);
+
+ if (n8)
+ {
+ if (flags & Negative)
+ {
+ GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 6));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Negative);
+ }
+
+ if ((flags & Carry) && !CarryHS)
+ {
+ GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 8));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Carry);
+ }
+ }
+ else
+ {
+ if (flags & Negative)
+ {
+ GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 14));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Negative);
+ }
+
+ if ((flags & Carry) && !CarryHS)
+ {
+ GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 16));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Carry);
+ }
+ }
+}
+
+static void EmitLD(enum ARMReg reg, bool n8, int *dpc, int *dcycles)
+{
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(dpc, dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_MOVBS_Reg(CondAL, reg, RegR0, RegR0);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_MOVS_Reg(CondAL, reg, 0, RegR0);
+ }
+}
+
+static void EmitST(enum SNESRegArg reg, bool n8)
+{
+ switch(reg)
+ {
+ case ArgA:
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegA);
+ break;
+ case ArgX:
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegX);
+ break;
+ case ArgY:
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegY);
+ break;
+ case ArgZ:
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0));
+ break;
+ default:
+ break;
+ }
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast));
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast));
+ }
+}
+
+static void EmitBranch(Instruction *in, uint32_t flag, bool set, int *dpc, int *dcycles)
+{
+ int32_t disp = EmitLoadRelative(in, true, dpc, dcycles);
+
+ EmitFlushCyclesPC(dpc, dcycles);
+
+ /* Interpreter runs BranchCheck here. Only when APU is disabled
+ * until next reset. So cost of the load seems not worth it, unless
+ * games break. */
+
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0));
+ /* If condition doesn't match, jump to next instruction */
+ if (set)
+ GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks));
+ else
+ GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks));
+
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0));
+
+ if (Settings.Shutdown)
+ {
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ }
+
+ if (disp > 0)
+ {
+ if (disp & 0xFF)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+ else if (disp < 0)
+ {
+ disp = -disp;
+ if (disp & 0xFF)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+
+ if (Settings.Shutdown) {
+ GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC);
+ GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown));
+ }
+
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+}
+
+static void EmitNativeCall(Instruction *in)
+{
+ int dpc = 1;
+ int dcycles = CPU.MemSpeed;
+ bool didBreak = false;
+ bool n8;
+
+ switch(in->Op)
+ {
+ CASE_VOP(MOV):
+ CASE_VOP(MOVSP):
+ {
+ enum ARMReg RegSRC;
+ enum ARMReg RegDST;
+ n8 = (in->Op == FnMOV8) || (in->Op == FnMOVSP8);
+
+ dcycles += ONE_CYCLE;
+ RegDST = EmitLoadRegister(in, in->Arg1, RegA2, false);
+ RegSRC = EmitLoadRegister(in, in->Arg2, RegA3, true);
+
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_MOVBS_Reg(CondAL, RegDST, RegA1, RegSRC);
+ }
+ else
+ {
+ GenARM_UXTHS_Reg(CondAL, RegDST, RegSRC);
+ }
+
+ EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false);
+ EmitStoreRegister(in, in->Arg1, RegDST, n8);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(LD):
+ {
+ enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true);
+ n8 = in->Op == FnLD8;
+
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BIC_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(0xFF, 0));
+ GenARM_ORR_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(literal, 0));
+
+ if (in->OutFlags & Zero)
+ GenARM_ANDS_Imm(CondAL, RegA1, RegDST, ENCODE_IMM(literal, 0));
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegDST, literal);
+ if (in->OutFlags & Zero)
+ GenARM_ANDS_Reg(CondAL, RegDST, RegDST, 0, RegDST);
+ }
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+ EmitLD(RegDST, n8, &dpc, &dcycles);
+ }
+
+ EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(ST):
+ {
+ n8 = in->Op == FnST8;
+
+ EmitGetAddress(false, in, in->Arg2, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ EmitST(in->Arg1, n8);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(PUSH):
+ {
+ enum ARMReg RegSRC;
+ n8 = in->Op == FnPUSH8;
+
+ if (in->Arg1 == ArgNULL)
+ {
+ RegSRC = EmitLoadMemRegister(in, RegA1, n8, &dpc, &dcycles);
+ }
+ else
+ {
+ RegSRC = EmitLoadRegister(in, in->Arg1, RegA1, true);
+ dcycles += ONE_CYCLE;
+ }
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ if (RegSRC != RegA1)
+ GenARM_MOV_Reg(CondAL, RegA1, 0, RegSRC);
+
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ EmitPush(in, n8);
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_OP(PEA):
+ {
+ uint16_t literal = EmitLoadLiteral(in, false, &dpc, &dcycles);
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ EmitPush(in, false);
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_OP(PER):
+ {
+ uint16_t literal = (in->PC + 3 - CPU.PCBase + in->PC[1] + (in->PC[2] << 8)) & 0xffff;
+ dcycles += CPU.MemSpeedx2 + ONE_CYCLE;
+ dpc += 2;
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ EmitPush(in, false);
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(POP):
+ {
+ n8 = in->Op == FnPOP8;
+ dcycles += TWO_CYCLES;
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitPop(in, n8);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (in->OutFlags & Zero)
+ GenARM_MOVS_Reg(CondAL, RegA1, 0, RegA1); /* Set ZF on A1 */
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false);
+ EmitStoreRegister(in, in->Arg1, RegA1, n8);
+ EmitChecks(0);
+ break;
+ }
+ CASE_OP(MVP):
+ CASE_OP(MVN):
+ {
+ uint8_t *start;
+ dpc += 2;
+ dcycles += CPU.MemSpeedx2 + TWO_CYCLES;
+
+ /* Load DD */
+ /* Set DB / ShiftedDB */
+ GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->PC[1], 0));
+ GenARM_STRB_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.DB));
+ GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), RegA3);
+ GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB));
+
+ /* A3 = DD << 16 */
+
+ /* Load SS */
+ /* Set OpenBus to SS */
+ GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0));
+ GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET);
+
+ /* A4 = SS << 16 */
+ start = CachePtr;
+
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->PC[2], 16));
+ GenARM_ORR_Reg(CondAL, RegA1, RegA1, 0, RegX);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+
+ /* SetByte -- A1 val, A2 addr */
+ GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[1], 16));
+ GenARM_ORR_Reg(CondAL, RegA2, RegA2, 0, RegY);
+ GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast));
+
+ if (in->Op == FnMVN)
+ {
+ if (CheckIndex())
+ {
+ GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0));
+ GenARM_MOVB_Reg(CondAL, RegX, RegA1);
+ GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0));
+ GenARM_MOVB_Reg(CondAL, RegY, RegA1);
+ }
+ else
+ {
+ GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegX, RegA1);
+ GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegY, RegA1);
+ }
+ }
+ else
+ {
+ if (CheckIndex())
+ {
+ GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0));
+ GenARM_MOVB_Reg(CondAL, RegX, RegA1);
+ GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0));
+ GenARM_MOVB_Reg(CondAL, RegY, RegA1);
+ }
+ else
+ {
+ GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegX, RegA1);
+ GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegY, RegA1);
+ }
+ }
+
+ GenARM_CMP_Imm(CondAL, RegA, ENCODE_IMM(0, 0));
+ GenARM_SUB_Imm(CondAL, RegA1, RegA, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegA, RegA1);
+ GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks));
+
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(dpc, 0));
+ EmitChecks(0);
+ GenARM_B(CondAL, PCOFFSET(start));
+ didBreak = true;
+ break;
+ }
+ CASE_VOP(OR):
+ {
+ n8 = in->Op == FnOR8;
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_ORR_Imm(CondAL, RegA, RegA, ENCODE_IMM(literal, 0));
+ GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0));
+ EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false);
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegR0, literal);
+ GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0);
+ EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false);
+ }
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegR0);
+ GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0));
+ EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0);
+ EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false);
+ }
+ }
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(AND):
+ {
+ n8 = in->Op == FnAND8;
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_ANDS_Imm(CondAL, RegA1, RegA, ENCODE_IMM(literal, 0));
+ GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+ GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_ANDS_Reg(CondAL, RegA1, RegA, 0, RegA1);
+ GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ }
+
+ EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(EOR):
+ {
+ n8 = in->Op == FnEOR8;
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA1, RegA, ENCODE_IMM(0xFF, 0));
+ GenARM_EORS_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(literal, 0));
+ GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+ GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_AND_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0));
+ GenARM_EORS_Reg(CondAL, RegA1, RegA1, 0, RegA2);
+ GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8));
+ GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1);
+ }
+ }
+
+ EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(ADC):
+ {
+ enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true);
+ n8 = in->Op == FnADC8;
+
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ else
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ }
+
+ GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0));
+
+ if (CheckDecimal())
+ {
+ uint32_t shift;
+
+ for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4)
+ {
+ GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4);
+ GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4);
+ GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0xA, shift));
+ GenARM_ADD_Imm(CondHS, RegA3, RegA3, ENCODE_IMM(0x6, shift));
+ }
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0));
+ GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3);
+ }
+ else
+ {
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST);
+ GenARM_UXTHS_Reg(CondAL, RegDST, RegA3);
+ }
+ }
+ else
+ {
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2);
+ GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3);
+ }
+ else
+ {
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST);
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST);
+ GenARM_UXTHS_Reg(CondAL, RegDST, RegA3);
+ }
+ }
+
+ EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Carry | Overflow), n8, false);
+
+ if (in->OutFlags & Overflow)
+ {
+ /* Calculate overflow flag */
+ if (CheckDecimal()) {
+ GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, n8 ? 8 : 16));
+ GenARM_SUB_Imm(CondGE, RegA3, RegA3, ENCODE_IMM(0x60, n8 ? 0 : 8));
+ }
+
+ GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1);
+ GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1);
+ GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2);
+ GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2);
+ if (n8)
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0));
+ }
+ else
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8));
+ }
+ GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0));
+ }
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(SBC):
+ {
+ enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true);
+ n8 = in->Op == FnSBC8;
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ else
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ }
+
+ GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1);
+ if (n8)
+ GenARM_UXTB_Reg(CondAL, RegA1, RegA1);
+ else
+ GenARM_UXTH_Reg(CondAL, RegA1, RegA1);
+
+ GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0));
+
+ if (CheckDecimal())
+ {
+ uint32_t shift;
+
+ for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4)
+ {
+ GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4);
+ GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4);
+ GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0x10, shift));
+ GenARM_SUB_Imm(CondLO, RegA3, RegA3, ENCODE_IMM(0x6, shift));
+ }
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0));
+ GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3);
+ }
+ else
+ {
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST);
+ GenARM_UXTHS_Reg(CondAL, RegDST, RegA3);
+ }
+ }
+ else
+ {
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0));
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2);
+ GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3);
+ }
+ else
+ {
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST);
+ GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST);
+ GenARM_UXTHS_Reg(CondAL, RegDST, RegA3);
+ }
+ }
+
+ EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Overflow), n8, false);
+
+ if (in->OutFlags & Carry)
+ {
+ GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0));
+ if (n8)
+ GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 8));
+ else
+ GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 16));
+ GenARM_ORR_Imm(CondGE, RegP, RegP, ENCODE_IMM(Carry, 0));
+ }
+
+ if (in->OutFlags & Overflow)
+ {
+ /* Calculate overflow flag */
+ GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1);
+ GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1);
+ GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2);
+ GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2);
+ if (n8)
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0));
+ }
+ else
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8));
+ }
+ GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0));
+ }
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(CMP):
+ {
+ enum ARMReg RegW = EmitLoadRegister(in, in->Arg1, RegR0, true);
+ n8 = in->Op == FnCMP8;
+
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0));
+ GenARM_SUBS_Imm(CondAL, RegR0, RegR1, ENCODE_IMM(literal, 0));
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegR0, literal);
+ GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0);
+ }
+ }
+ else
+ {
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0));
+ GenARM_SUBS_Reg(CondAL, RegR0, RegR1, 0, RegR0);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0);
+ }
+ }
+
+ EmitSetFlags(RegR0, in->OutFlags & (Carry | Zero | Negative), n8, true);
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(BIT):
+ {
+ n8 = in->Op == FnBIT8;
+
+ if (IS_VADDRMODE(Immediate))
+ {
+ uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_TST_Imm(CondAL, RegA, ENCODE_IMM(literal, 0));
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegA1, literal);
+ GenARM_TST_Reg(CondAL, RegA, 0, RegA1);
+ }
+
+ EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false);
+ }
+ else
+ {
+ uint32_t flags;
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_TST_Reg(CondAL, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_TST_Reg(CondAL, RegA, 0, RegA1);
+ }
+
+ flags = in->OutFlags & (Zero | Negative | Overflow);
+
+ if (flags == 0)
+ return;
+
+ GenARM_BIC_Imm(CondAL, RegP, RegP, flags);
+
+ if (flags & Zero)
+ GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero);
+
+ if (n8)
+ {
+ if (flags & Negative)
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Negative);
+ }
+
+ if ((flags & Overflow))
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 0));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow);
+ }
+ }
+ else
+ {
+ if (flags & Negative)
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Negative);
+ }
+
+ if ((flags & Overflow))
+ {
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 8));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow);
+ }
+ }
+ }
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(INC):
+ CASE_VOP(DEC):
+ {
+ enum ARMReg RegW;
+ n8 = in->Op == FnINC8 || in->Op == FnDEC8;
+
+ if (in->Arg1 == ArgNULL)
+ RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles);
+ else
+ RegW = EmitLoadRegister(in, in->Arg1, RegA2, true);
+
+ /* Add cycles */
+ dcycles += ONE_CYCLE;
+
+ /* Clear WaitAddress */
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_SHIFT(0, 0));
+ GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (IS_VOP(INC))
+ GenARM_ADD_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0));
+ else
+ GenARM_SUB_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0));
+
+ if (n8)
+ {
+ GenARM_MOVBS_Reg(CondAL, RegW, RegA1, RegA1);
+ }
+ else
+ {
+ GenARM_UXTHS_Reg(CondAL, RegW, RegA1);
+ }
+
+ EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false);
+
+ if (in->Arg1 == ArgNULL)
+ EmitStoreMemRegister(RegW, n8);
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(TSB):
+ CASE_VOP(TRB):
+ {
+ n8 = in->Op == FnTSB8 || in->Op == FnTRB8;
+ EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles);
+
+ dcycles += ONE_CYCLE;
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_AND_Imm(CondAL, RegA4, RegA, ENCODE_IMM(0xFF, 0));
+
+ GenARM_TST_Reg(CondAL, RegA4, 0, RegA1);
+ EmitSetFlags(RegA4, in->OutFlags & (Zero), n8, false);
+
+ if (IS_VOP(TSB))
+ {
+ GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA1);
+ }
+ else
+ {
+ GenARM_MVN_Reg(CondAL, RegA4, 0, RegA4);
+ GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1);
+ }
+ GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast));
+ }
+ else
+ {
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_TST_Reg(CondAL, RegA, 0, RegA1);
+ EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false);
+
+ if (IS_VOP(TSB))
+ {
+ GenARM_ORR_Reg(CondAL, RegA1, RegA, 0, RegA1);
+ }
+ else
+ {
+ GenARM_MVN_Reg(CondAL, RegA4, 0, RegA);
+ GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1);
+ }
+ GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast));
+ }
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(ASL):
+ {
+ enum ARMReg RegW;
+ n8 = in->Op == FnASL8;
+
+ if (in->Arg1 == ArgNULL)
+ RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles);
+ else
+ RegW = EmitLoadRegister(in, in->Arg1, RegA4, true);
+
+ /* Add cycles */
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW);
+
+ if (n8)
+ {
+ GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1);
+ }
+ else
+ {
+ GenARM_UXTHS_Reg(CondAL, RegW, RegA1);
+ }
+
+ EmitSetFlags(RegA1, in->OutFlags & (Carry | Zero | Negative), CheckMemory(), false);
+
+ if (in->Arg1 == ArgNULL)
+ EmitStoreMemRegister(RegW, n8);
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(LSR):
+ {
+ enum ARMReg RegW;
+ n8 = in->Op == FnLSR8;
+
+ if (in->Arg1 == ArgNULL)
+ RegW = EmitLoadMemRegister(in, RegA3, n8, &dpc, &dcycles);
+ else
+ RegW = EmitLoadRegister(in, in->Arg1, RegA3, true);
+
+ /* Add cycles */
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0));
+ GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1);
+ GenARM_BIC_Imm(CondAL, RegW, RegW, ENCODE_IMM(0xFF, 0));
+ GenARM_ORR_Reg(CondAL, RegW, RegW, 0, RegA1);
+ }
+ else
+ {
+ GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW);
+ }
+
+ /* Flags easier to set inline here */
+ GenARM_BIC_Imm(CondAL, RegP, RegP, in->OutFlags & (Carry | Zero | Negative));
+
+ /* Negative is always unset */
+ if (in->OutFlags & Zero)
+ GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero);
+
+ if (in->OutFlags & Carry)
+ GenARM_ORR_Imm(CondCS, RegP, RegP, Carry);
+
+ if (in->Arg1 == ArgNULL)
+ EmitStoreMemRegister(RegW, n8);
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(ROL):
+ {
+ enum ARMReg RegW;
+ n8 = in->Op == FnROL8;
+
+ if (in->Arg1 == ArgNULL)
+ RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles);
+ else
+ RegW = EmitLoadRegister(in, in->Arg1, RegA4, true);
+
+ /* Add cycles */
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW);
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 0));
+
+ if (in->OutFlags & Carry)
+ {
+ GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(1, n8 ? 8 : 16));
+ GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Carry, 0));
+ }
+
+ if (n8)
+ {
+ GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1);
+ }
+ else
+ {
+ GenARM_UXTHS_Reg(CondAL, RegW, RegA1);
+ }
+
+ EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false);
+
+ if (in->Arg1 == ArgNULL)
+ EmitStoreMemRegister(RegW, n8);
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_VOP(ROR):
+ {
+ enum ARMReg RegW;
+ n8 = in->Op == FnROR8;
+
+ if (in->Arg1 == ArgNULL)
+ RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles);
+ else
+ RegW = EmitLoadRegister(in, in->Arg1, RegA4, true);
+
+ /* Add cycles */
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (n8)
+ {
+ GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0));
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 8));
+ GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1);
+ GenARM_MOVB_Reg(CondAL, RegW, RegA1);
+ }
+ else
+ {
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Imm(CondNE, RegW, RegW, ENCODE_IMM(1, 16));
+ GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW);
+ }
+
+ if (in->OutFlags & Carry)
+ {
+ GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Imm(CondCS, RegP, RegP, ENCODE_IMM(Carry, 0));
+ }
+
+ EmitSetFlags(n8 ? RegA1 : RegW, in->OutFlags & (Zero | Negative), n8, false);
+
+ if (in->Arg1 == ArgNULL)
+ EmitStoreMemRegister(RegW, n8);
+
+ EmitChecks(0);
+ break;
+ }
+ CASE_OP(BRA):
+ CASE_OP(BRL):
+ {
+ int32_t disp;
+ n8 = in->Op == FnBRA;
+ disp = EmitLoadRelative(in, n8, &dpc, &dcycles);
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (in->Op == FnBRA && Settings.Shutdown)
+ {
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ }
+
+ if (disp > 0)
+ {
+ if (disp & 0xFF)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+ else if (disp < 0)
+ {
+ disp = -disp;
+ if (disp & 0xFF)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+
+ if (in->Op == FnBRA && Settings.Shutdown) {
+ GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC);
+ GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown));
+ }
+
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ didBreak = true;
+ break;
+ }
+
+ CASE_OP(JMP):
+ CASE_OP(JML):
+ {
+ switch(in->AddrMode)
+ {
+ case AddrImmediate16:
+ dcycles += CPU.MemSpeedx2;
+ GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8) | ICPU.ShiftedPB);
+
+ /* Only flush cycles because PC will change */
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+
+ break;
+ case AddrImmediate24:
+ dcycles += CPU.MemSpeedx2 + CPU.MemSpeed;
+ GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8));
+ GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0));
+ EmitStoreRegister(in, ArgPB, RegA2, true);
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(in->PC[3], 16));
+
+ /* Only flush cycles because PC will change */
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+
+ break;
+ case AddrAbsolute16:
+ dcycles += CPU.MemSpeedx2;
+ dpc += 2;
+ GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8));
+
+ if (in->Arg2 == ArgX)
+ {
+ GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX);
+ GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16));
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16));
+ dcycles += ONE_CYCLE;
+ }
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+
+ if (in->Op == FnJMP)
+ {
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16));
+ }
+ else
+ {
+ GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1);
+ GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0));
+ GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(3, 0));
+ GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast));
+ GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1);
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB));
+ GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA2);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase));
+ GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC));
+
+ if (in->AddrMode == AddrImmediate24 || in->Op == FnJML)
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ if (in->AddrMode == AddrImmediate16 && Settings.Shutdown)
+ {
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC);
+ GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown));
+ }
+
+ EmitChecks(0);
+
+ if (in->AddrMode == AddrImmediate16 || in->AddrMode == AddrImmediate24)
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ else
+ GenARM_BL(CondAL, PCOFFSET(JumpIndirect));
+
+ didBreak = true;
+ break;
+ }
+ CASE_OP(JSR):
+ CASE_OP(JSL):
+ {
+ if (in->Op == FnJSL)
+ {
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(ICPU.Registers.PB, 0));
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ dcycles += CPU.MemSpeed * 3;
+ dpc += 3;
+ /* Only flush cycles because PC will change */
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+
+ EmitPush(in, true);
+ }
+ else
+ {
+ dcycles += CPU.MemSpeedx2 + ONE_CYCLE;
+ dpc += 2;
+ /* Only flush cycles because PC will change */
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+ }
+
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase));
+ GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(dpc - 1, 0));
+ GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1);
+
+ EmitPush(in, false);
+
+ /* Load new PC */
+ if (in->Op == FnJSL)
+ {
+ GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0));
+ GenARM_STRB_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA2);
+ GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB));
+
+ GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16));
+ }
+ else if (in->Arg2 == ArgNULL)
+ {
+ GenARM_MOV32_Imm(CondAL, RegA2, in->PC[1] + (in->PC[2] << 8));
+ GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(ICPU.Registers.PB, 16));
+ }
+ else
+ {
+ GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8));
+ /* Set OpenBus to PC[2] */
+
+ GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX);
+ GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16));
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16));
+
+ /* Update cycles / cpu for slow lookup */
+ GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC);
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16));
+ }
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase));
+ GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC));
+
+ if (in->Op == FnJSL)
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ EmitChecks(0);
+ if (in->Arg2 == ArgNULL)
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ else
+ GenARM_BL(CondAL, PCOFFSET(JumpIndirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(RTI):
+ {
+ uint8_t *branch1;
+ dcycles += ONE_CYCLE * 2;
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+ EmitPop(in, true);
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitStoreRegister(in, ArgP, RegA1, false);
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ EmitPop(in, false);
+ GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* RegCPU will be overwritten later */
+
+ /* Ignore pop PB if Emulation is set */
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8));
+ branch1 = CachePtr;
+ GenARM_B(CondNE, PCOFFSET(branch1));
+
+ EmitPop(in, true);
+
+ GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1);
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB));
+
+ PatchJump((uint32_t *)branch1, CachePtr);
+
+ GenARM_LDR_Imm(CondNE, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB));
+ GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2);
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase));
+ GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ EmitChecks(0);
+ GenARM_B(CondAL, PCOFFSET(JumpIndirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(RTL):
+ CASE_OP(RTS):
+ {
+ dcycles += ONE_CYCLE * 2;
+ if (in->Op == FnRTS)
+ dcycles += ONE_CYCLE;
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0));
+ EmitPop(in, false);
+
+ GenARM_ADD_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(1, 0));
+ GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* No need to preserve RegCPU. It will be overwritten later, and this should never be a WaitAddress */
+
+ if (in->Op == FnRTL)
+ {
+ EmitPop(in, true);
+ GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1);
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB));
+ GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2);
+ }
+ else
+ {
+ GenARM_ADD_Imm(CondAL, RegA1, RegCPUPC, ENCODE_IMM(ICPU.Registers.PB, 16));
+ }
+
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase));
+ GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC));
+
+ if (in->Op == FnRTL)
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ EmitChecks(0);
+ GenARM_B(CondAL, PCOFFSET(JumpIndirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(BPL):
+ EmitBranch(in, Negative, false, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BMI):
+ EmitBranch(in, Negative, true, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BVC):
+ EmitBranch(in, Overflow, false, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BVS):
+ EmitBranch(in, Overflow, true, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BCC):
+ EmitBranch(in, Carry, false, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BCS):
+ EmitBranch(in, Carry, true, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BZC):
+ EmitBranch(in, Zero, false, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BZS):
+ EmitBranch(in, Zero, true, &dpc, &dcycles);
+ didBreak = true;
+ break;
+ CASE_OP(BRK):
+ CASE_OP(COP):
+ {
+ uint32_t destPC;
+ dcycles += ONE_CYCLE;
+
+ GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ if (!CheckEmulation())
+ {
+ GenARM_LDRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB));
+ EmitPush(in, true);
+ dcycles += ONE_CYCLE;
+ }
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase));
+
+ if (!CheckEmulation())
+ {
+ GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(1, 0));
+ }
+
+ GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1);
+
+ EmitPush(in, false);
+
+ GenARM_AND_Imm(CondAL, RegA1, RegP, ENCODE_IMM(0xFF, 0));
+ EmitPush(in, true);
+ GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S));
+
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET);
+
+ GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Decimal, 0));
+ GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(IRQ, 0));
+ GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P));
+
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0));
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB));
+ GenARM_STR_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(ShiftedPB));
+
+ if (in->Op == FnBRK)
+ destPC = CheckEmulation() ? 0xFFFE : 0xFFE6;
+ else
+ destPC = CheckEmulation() ? 0xFFF4 : 0xFFE4;
+
+ GenARM_MOV32_Imm(CondAL, RegA1, destPC);
+ GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(1, 0));
+ GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase));
+ GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpIndirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(CLC):
+ CASE_OP(CLI):
+ CASE_OP(CLD):
+ CASE_OP(CLV):
+ CASE_OP(SEC):
+ CASE_OP(SEI):
+ CASE_OP(SED):
+ {
+ uint16_t flag;
+ bool update = false;
+
+ switch(in->Op)
+ {
+ CASE_OP(SEC):
+ CASE_OP(CLC):
+ flag = Carry;
+ break;
+ CASE_OP(SEI):
+ CASE_OP(CLI):
+ flag = IRQ;
+ update = true;
+ break;
+ CASE_OP(SED):
+ CASE_OP(CLD):
+ flag = Decimal;
+ update = true;
+ break;
+ CASE_OP(CLV):
+ flag = Overflow;
+ break;
+ default:
+ fprintf(stderr, "Invalid opcode: %X\n", in->Opcode);
+ break;
+ }
+
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (in->Op == FnCLC || in->Op == FnCLD || in->Op == FnCLI || in->Op == FnCLV)
+ GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0));
+ else
+ GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0));
+
+ if (update)
+ GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P));
+
+ EmitChecks(0);
+
+ if (in->SectionUpdate)
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ if (in->ShouldBreak)
+ {
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ didBreak = true;
+ }
+ break;
+ }
+ CASE_OP(SEP):
+ CASE_OP(REP):
+ {
+ dpc++;
+ dcycles += CPU.MemSpeed + ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (IS_OP(REP))
+ GenARM_BIC_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0));
+ else
+ GenARM_ORR_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0));
+
+ EmitStoreRegister(in, ArgP, RegA1, false);
+
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(XCE):
+ {
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BIC_Imm(CondAL, RegA2, RegP, ENCODE_IMM(Emulation >> 8, 8));
+ GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Reg(CondAL, RegA2, RegA2, ENCODE_SHIFT(ShiftLSR, 8), RegP);
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0));
+ GenARM_ORR_Imm(CondNE, RegA2, RegA2, ENCODE_IMM(Emulation >> 8, 8));
+
+ EmitStoreRegister(in, ArgP, RegA2, false);
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpIndirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(STP):
+ {
+#ifdef NO_SPEEDHACKS
+ dpc--;
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags));
+ GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(DEBUG_MODE_FLAG, 0));
+ GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags));
+#else
+ int8_t disp = (in->PC[1] & 0x7F) | ((in->PC[1] & 0x40) << 1);
+ uint32_t pc = in->PC + 2 - CPU.PCBase;
+ uint32_t target = (pc + disp) & 0xFFFF;
+ bool overflow = target != (pc + disp);
+ dpc++;
+
+ if (overflow)
+ disp = (target - pc);
+
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0));
+ GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown));
+
+ /* Interpreter runs BranchCheck here. Only when APU is disabled
+ * until next reset. So cost of the load seems not worth it, unless
+ * games break. */
+
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Zero, 0));
+
+ if (in->PC[1] & 0x80)
+ GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks));
+ else
+ GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks));
+
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0));
+
+ if (Settings.Shutdown)
+ {
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ }
+
+ if (disp > 0)
+ {
+ if (disp & 0xFF)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+ else if (disp < 0)
+ {
+ disp = -disp;
+ if (disp & 0xFF)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+
+ if (Settings.Shutdown) {
+ GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC);
+ GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown));
+ }
+#endif
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(XBA):
+ {
+ dcycles += TWO_CYCLES;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 8), RegA);
+ GenARM_MOV_Reg(CondAL, RegA, ENCODE_SHIFT(ShiftLSL, 24), RegA);
+ GenARM_ORR_Reg(CondAL, RegA, RegA1, ENCODE_SHIFT(ShiftLSR, 16), RegA);
+
+ EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), true, false);
+ EmitChecks(0);
+ break;
+ }
+ CASE_OP(WAI):
+ {
+ uint8_t *loop;
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(1, 0));
+ GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitingForInterrupt));
+ dpc--;
+ loop = CachePtr;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ if (Settings.Shutdown) {
+ GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown));
+ }
+
+ EmitChecks(CheckWFI);
+ GenARM_B(CondAL, PCOFFSET(loop));
+ break;
+ }
+ CASE_OP(WDM):
+ {
+#ifndef NO_SPEEDHACKS
+ int8_t disp = 0xF0 | (in->PC[1] & 0xF);
+ uint32_t pc = in->PC + 2 - CPU.PCBase;
+ uint32_t target = (pc + disp) & 0xFFFF;
+ bool overflow = target != (pc + disp);
+ uint8_t flag;
+ uint8_t skip;
+ dpc++;
+
+ if (overflow)
+ disp = (target - pc);
+
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0));
+ GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ EmitFlushCyclesPC(&dpc, &dcycles);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown));
+
+ switch(in->PC[1] & 0xF0)
+ {
+ case 0x10: /* BPL */
+ flag = Negative;
+ skip = CondNE;
+ break;
+ case 0x30: /* BMI */
+ flag = Negative;
+ skip = CondEQ;
+ break;
+ case 0x50: /* BVC */
+ flag = Overflow;
+ skip = CondNE;
+ break;
+ case 0x70: /* BVS */
+ flag = Overflow;
+ skip = CondEQ;
+ break;
+ case 0x80: /* BRA */
+ flag = 0;
+ break;
+ case 0x90: /* BCC */
+ flag = Carry;
+ skip = CondNE;
+ break;
+ case 0xB0: /* BCS */
+ flag = Carry;
+ skip = CondEQ;
+ break;
+ case 0xD0: /* BNE */
+ flag = Zero;
+ skip = CondNE;
+ break;
+ case 0xF0: /* BEQ */
+ flag = Zero;
+ skip = CondEQ;
+ break;
+ default:
+ didBreak = true;
+ break;
+ }
+
+ if (!didBreak)
+ {
+ /* Interpreter runs BranchCheck here. Only when APU is disabled
+ * until next reset. So cost of the load seems not worth it, unless
+ * games break. */
+
+ if (flag)
+ {
+ GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0));
+ GenARM_BL(skip, PCOFFSET(JumpDirectChecks));
+ }
+
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0));
+
+ if (Settings.Shutdown)
+ {
+ GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress));
+ }
+
+ if (disp > 0)
+ {
+ if (disp & 0xFF)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+ else if (disp < 0)
+ {
+ disp = -disp;
+ if (disp & 0xFF)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0));
+
+ if (disp & 0xFF00)
+ GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8));
+ }
+
+ if (Settings.Shutdown) {
+ GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC);
+ GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown));
+ }
+ }
+
+#endif
+ EmitChecks(0);
+ GenARM_BL(CondAL, PCOFFSET(JumpDirect));
+ didBreak = true;
+ break;
+ }
+ CASE_OP(NOP):
+ {
+ dcycles += ONE_CYCLE;
+ EmitFlushCyclesPC(&dpc, &dcycles);
+ EmitChecks(0);
+ break;
+ }
+ }
+
+ if (in->ShouldBreak && !didBreak)
+ {
+ GenARM_B(CondAL, PCOFFSET(JumpIndirect));
+ }
+}
+
+static void EmitS9xCall(Instruction *in)
+{
+ /* CPU.PCAtOpcodeStart = CPU.PC; */
+ /* CPU.Cycles += CPU.MemSpeed; */
+ /* CPU.PC++; */
+
+ GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart));
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0));
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem));
+
+ GenARM_BL(CondAL, PCOFFSET((uintptr_t)in->S9xOpcode));
+
+ if (in->SectionUpdate)
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs));
+
+ EmitChecks(0);
+
+ if (in->ShouldBreak)
+ {
+ GenARM_B(CondAL, PCOFFSET(JumpIndirect));
+ }
+}
+
+static void EmitInterpreterLoop(Instruction *in)
+{
+ /* CPU.PCAtOpcodeStart = CPU.PC; */
+ /* CPU.Cycles += CPU.MemSpeed; */
+ /* CPU.PC++; */
+ uint8_t *start = CachePtr;
+
+ GenARM_LDRB_Imm(CondAL, RegA1, RegCPUPC, 0);
+ GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart));
+ GenARM_LDR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(S9xOpcodes));
+ GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0));
+ GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0));
+ GenARM_LDR_Reg(CondAL, RegA2, RegA2, 1, ENCODE_SHIFT(ShiftLSL, 2), RegA1);
+
+ GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem));
+
+ GenARM_BLX_Reg(CondAL, RegA2);
+ GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection));
+
+ GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs));
+
+ EmitChecks(0);
+
+ GenARM_B(CondAL, PCOFFSET(start));
+}
+
+static void EmitOne(Instruction *in)
+{
+
+#ifndef NDEBUG
+ GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->Opcode, 0));
+ GenARM_MOV32_Imm(CondAL, RegA2, (uintptr_t)in->PC);
+ GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->Emitter == EmitS9xCall, 0));
+ GenARM_BL(CondAL, PCOFFSET(S9xCallCheckInstruction));
+#endif
+
+ /* Call Opcode */
+ in->Emitter(in);
+}
+
+#define SizeNone 0
+#define SizeRegister8 0
+#define SizeRegister16 0
+#define SizeImmediate8 1
+#define SizeImmediate16 2
+#define SizeImmediate24 3
+#define SizeZeroPage8 1
+#define SizeZeroPage16 1
+#define SizeIndirect8 1
+#define SizeIndirect16 1
+#define SizeIndirectX8 1
+#define SizeIndirectX16 1
+#define SizeIndirectY8 1
+#define SizeIndirectY16 1
+#define SizeIndirectS8 1
+#define SizeIndirectS16 1
+#define SizeIndirectFar8 1
+#define SizeIndirectFar16 1
+#define SizeAbsolute8 2
+#define SizeAbsolute16 2
+#define SizeLong8 3
+#define SizeLong16 3
+
+#define OPSIZE(cond, t, f) cond ? Size##t : Size##f
+#define OPADDRMODE(cond, t, f) cond ? Addr##t : Addr##f
+#define OPFN(cond, t, f) cond ? Fn##t : Fn##f
+
+#define OP(opcode, addrmode, op, arg1, arg2, gen, used, flags) \
+ case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitNativeCall; in.ShouldBreak = (flags) & OFBreak; in.SectionUpdate = (flags) & OFSectionUpdate; in.Op = OPFN(op); in.AddrMode = OPADDRMODE(addrmode); in.Arg1 = Arg##arg1; in.Arg2 = Arg##arg2; in.OutFlags = gen; in.InFlags = used; break;
+
+#define OPD(opcode, addrmode, op, arg1, arg2, gen, used, flags) \
+ case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitS9xCall; in.S9xOpcode = ICPU.S9xOpcodes[0x##opcode].S9xOpcode; in.SectionUpdate = (flags) & OFSectionUpdate; in.ShouldBreak = (flags) & OFBreak; in.OutFlags = gen; in.InFlags = used; break;
+
+static size_t ParseInstructions(uint8_t *pc, Instruction *instructions, size_t max)
+{
+ bool shouldBreak;
+ int i;
+
+ for (i = 0, shouldBreak = false; i < max && !shouldBreak; i++)
+ {
+ uint8_t opcode;
+ Instruction in = {0};
+
+ in.PC = pc;
+
+#include "arm_dynarec/opgen.h"
+
+ in.Opcode = opcode;
+
+ instructions[i] = in;
+ shouldBreak = in.ShouldBreak;
+ }
+
+ if (i > 0)
+ instructions[i - 1].ShouldBreak = true;
+
+ return i;
+}
+
+static void ScanInstructions(Instruction *instructions, size_t length)
+{
+ uint16_t CurrentFlags = (Negative | Zero | Carry | Overflow);
+ uint16_t i;
+
+ if (length < 2)
+ return;
+
+ /* Dead flag elimination */
+ for (i = length - 1; i > 0; i--)
+ {
+ Instruction *in = &instructions[i - 1];
+ Instruction *next = &instructions[i];
+
+ CurrentFlags = CurrentFlags | next->InFlags;
+ in->OutFlags = in->OutFlags & CurrentFlags;
+ CurrentFlags = CurrentFlags & ~in->OutFlags;
+ }
+}
+
+static void Emit(uint16_t pc, uint8_t **block)
+{
+ uint8_t *start = CachePtr;
+ uint8_t *StartPC = pc + CPU.PCBase;
+ uint32_t address = pc | ICPU.ShiftedPB;
+ int32_t MemoryBlock = (address >> MEMMAP_SHIFT) & MEMMAP_MASK;
+
+ Instruction instructions[100];
+ size_t length;
+ int i;
+
+ if (Memory.BlockIsRAM[MemoryBlock])
+ {
+ /* Code in RAM, for now just run interpreter until interrupt */
+ Instruction in = { 0 };
+ instructions[0] = in;
+ EmitInterpreterLoop(&instructions[0]);
+ }
+ else
+ {
+ length = ParseInstructions(StartPC, instructions, sizeof(instructions) / sizeof(instructions[0]));
+ ScanInstructions(instructions, length);
+
+ for (i = 0; i < length; i++)
+ EmitOne(&instructions[i]);
+ }
+
+ __clear_cache(start, CachePtr);
+ *block = start;
+}
+
+uint8_t **FetchBlock(uint16_t pc)
+{
+ uint8_t **block = FindBlock(pc);
+#ifndef NDEBUG
+ Metrics.Finds++;
+ Metrics.InterpretedBlock = false;
+
+ if (!*block)
+ Metrics.Emits++;
+#endif
+ if (!*block)
+ Emit(pc, block);
+
+#ifndef NDEBUG
+ trace[traceidx++] = (CacheSection << 16) | pc;
+ trace[traceidx++] = (uintptr_t)*block;
+ traceidx %= 128;
+#endif
+
+ return block;
+}
+
+void S9xMainLoop_Dyna(void)
+{
+ UpdateSection();
+
+#ifdef LAGFIX
+ do
+ {
+#endif
+ uint8_t **block = FetchBlock(CPU.PC - CPU.PCBase);
+ BlockEnter(*block);
+
+ ICPU.Registers.PC = CPU.PC - CPU.PCBase;
+#ifndef USE_BLARGG_APU
+ IAPU.Registers.PC = IAPU.PC - IAPU.RAM;
+#endif
+
+#ifdef LAGFIX
+ if(!finishedFrame)
+ {
+#endif
+ S9xPackStatus();
+#ifndef USE_BLARGG_APU
+ S9xAPUPackStatus();
+#endif
+ CPU.Flags &= ~SCAN_KEYS_FLAG;
+#ifdef LAGFIX
+ }
+ else
+ {
+ finishedFrame = false;
+ break;
+ }
+ } while(!finishedFrame);
+#endif
+}
+
+int DynaInit(void)
+{
+ static bool inited = false;
+
+ if (!inited)
+ {
+ inited = true;
+#ifndef NDEBUG
+ DynaBreak();
+#endif
+
+ Cache = MapRWX((void *)((uintptr_t)DynaInit), BUFFER_SIZE + BUFFER_EXTRA);
+
+ if (!Cache)
+ return -1;
+ }
+
+ return 0;
+}
+
+int DynaReset(void)
+{
+ if (DynaInit())
+ return -1;
+
+ printf("Starting dynarec\n");
+ CacheEmpty();
+ return 0;
+}
diff --git a/source/arm_dynarec/dynaexec.h b/source/arm_dynarec/dynaexec.h
new file mode 100644
index 0000000..b8a807d
--- /dev/null
+++ b/source/arm_dynarec/dynaexec.h
@@ -0,0 +1,14 @@
+#ifndef DYNAEXEC_H
+#define DYNAEXEC_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+void S9xMainLoop_Dyna();
+int DynaInit(void);
+int DynaReset(void);
+void DynaBreak(void);
+
+void *MapRWX(void *target, size_t size);
+
+#endif /* DYNAEXEC_H */
diff --git a/source/arm_dynarec/map_rwx.c b/source/arm_dynarec/map_rwx.c
new file mode 100644
index 0000000..d4373f7
--- /dev/null
+++ b/source/arm_dynarec/map_rwx.c
@@ -0,0 +1,16 @@
+#define _GNU_SOURCE /* MAP_ANONYMOUS */
+
+#include <sys/mman.h>
+#include <stdlib.h>
+
+void *MapRWX(void *target, size_t size) {
+ void *buf = mmap(target, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
+
+ if (buf == MAP_FAILED)
+ {
+ perror("mmap");
+ return NULL;
+ }
+
+ return buf;
+}
diff --git a/source/arm_dynarec/opdef.h b/source/arm_dynarec/opdef.h
new file mode 100644
index 0000000..80606d8
--- /dev/null
+++ b/source/arm_dynarec/opdef.h
@@ -0,0 +1,281 @@
+OP (A8, XF(Register), XF(MOV), Y, A, NZ, 0, OFNone);
+OP (AA, XF(Register), XF(MOV), X, A, NZ, 0, OFNone);
+OP (BA, XF(Register), XF(MOV), X, S, NZ, 0, OFNone);
+OP (98, MF(Register), MF(MOV), A, Y, NZ, 0, OFNone);
+OP (8A, MF(Register), MF(MOV), A, X, NZ, 0, OFNone);
+OP (9A, EF(Register), EF(MOVSP), S, X, 0, 0, OFNone);
+OP (9B, XF(Register), XF(MOV), Y, X, NZ, 0, OFNone);
+OP (BB, XF(Register), XF(MOV), X, Y, NZ, 0, OFNone);
+OP (7B, NF16(Register), NF16(MOV), A, D, NZ, 0, OFNone);
+OP (5B, NF16(Register), NF16(MOV), D, A, NZ, 0, OFNone);
+OP (3B, NF16(Register), NF16(MOV), A, S, NZ, 0, OFNone);
+OP (1B, EF(Register), EF(MOVSP), S, A, 0, 0, OFNone);
+
+OP (A9, MF(Immediate), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (A5, MF(ZeroPage), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (B5, MF(ZeroPage), MF(LD), A, X, NZ, 0, OFNone);
+OP (A3, MF(ZeroPage), MF(LD), A, S, NZ, 0, OFNone);
+OP (AD, MF(Absolute), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (BD, MF(Absolute), MF(LD), A, X, NZ, 0, OFNone);
+OP (B9, MF(Absolute), MF(LD), A, Y, NZ, 0, OFNone);
+OP (AF, MF(Long), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (BF, MF(Long), MF(LD), A, X, NZ, 0, OFNone);
+OP (B2, MF(Indirect), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (A1, MF(IndirectX), MF(LD), A, X, NZ, 0, OFNone);
+OP (B1, MF(IndirectY), MF(LD), A, Y, NZ, 0, OFNone);
+OP (B3, MF(IndirectS), MF(LD), A, Y, NZ, 0, OFNone);
+OP (A7, MF(IndirectFar), MF(LD), A, NULL, NZ, 0, OFNone);
+OP (B7, MF(IndirectFar), MF(LD), A, Y, NZ, 0, OFNone);
+OP (A2, XF(Immediate), XF(LD), X, NULL, NZ, 0, OFNone);
+OP (A6, XF(ZeroPage), XF(LD), X, NULL, NZ, 0, OFNone);
+OP (B6, XF(ZeroPage), XF(LD), X, Y, NZ, 0, OFNone);
+OP (AE, XF(Absolute), XF(LD), X, NULL, NZ, 0, OFNone);
+OP (BE, XF(Absolute), XF(LD), X, Y, NZ, 0, OFNone);
+OP (A0, XF(Immediate), XF(LD), Y, NULL, NZ, 0, OFNone);
+OP (A4, XF(ZeroPage), XF(LD), Y, NULL, NZ, 0, OFNone);
+OP (B4, XF(ZeroPage), XF(LD), Y, X, NZ, 0, OFNone);
+OP (AC, XF(Absolute), XF(LD), Y, NULL, NZ, 0, OFNone);
+OP (BC, XF(Absolute), XF(LD), Y, X, NZ, 0, OFNone);
+
+OP (64, MF(ZeroPage), MF(ST), Z, NULL, 0, 0, OFNone);
+OP (74, MF(ZeroPage), MF(ST), Z, X, 0, 0, OFNone);
+OP (9C, MF(Absolute), MF(ST), Z, NULL, 0, 0, OFNone);
+OP (9E, MF(Absolute), MF(ST), Z, X, 0, 0, OFNone);
+OP (85, MF(ZeroPage), MF(ST), A, NULL, 0, 0, OFNone);
+OP (95, MF(ZeroPage), MF(ST), A, X, 0, 0, OFNone);
+OP (83, MF(ZeroPage), MF(ST), A, S, 0, 0, OFNone);
+OP (8D, MF(Absolute), MF(ST), A, NULL, 0, 0, OFNone);
+OP (9D, MF(Absolute), MF(ST), A, X, 0, 0, OFNone);
+OP (99, MF(Absolute), MF(ST), A, Y, 0, 0, OFNone);
+OP (8F, MF(Long), MF(ST), A, NULL, 0, 0, OFNone);
+OP (9F, MF(Long), MF(ST), A, X, 0, 0, OFNone);
+OP (81, MF(IndirectX), MF(ST), A, X, 0, 0, OFNone);
+OP (91, MF(IndirectY), MF(ST), A, Y, 0, 0, OFNone);
+OP (92, MF(Indirect), MF(ST), A, NULL, 0, 0, OFNone);
+OP (93, MF(IndirectS), MF(ST), A, Y, 0, 0, OFNone);
+OP (87, MF(IndirectFar), MF(ST), A, NULL, 0, 0, OFNone);
+OP (97, MF(IndirectFar), MF(ST), A, Y, 0, 0, OFNone);
+OP (86, XF(ZeroPage), XF(ST), X, NULL, 0, 0, OFNone);
+OP (96, XF(ZeroPage), XF(ST), X, Y, 0, 0, OFNone);
+OP (8E, XF(Absolute), XF(ST), X, NULL, 0, 0, OFNone);
+OP (84, XF(ZeroPage), XF(ST), Y, NULL, 0, 0, OFNone);
+OP (94, XF(ZeroPage), XF(ST), Y, X, 0, 0, OFNone);
+OP (8C, XF(Absolute), XF(ST), Y, NULL, 0, 0, OFNone);
+
+OP (48, MF(Register), MF(PUSH), A, NULL, 0, 0, OFNone);
+OP (DA, XF(Register), XF(PUSH), X, NULL, 0, 0, OFNone);
+OP (5A, XF(Register), XF(PUSH), Y, NULL, 0, 0, OFNone);
+OP (08, NF8(Register), NF8(PUSH), P, NULL, 0, NZCV, OFNone);
+OP (8B, NF8(Register), NF8(PUSH), DB, NULL, 0, 0, OFNone);
+OP (4B, NF8(Register), NF8(PUSH), PB, NULL, 0, 0, OFNone);
+OP (0B, NF16(Register), NF16(PUSH), D, NULL, 0, 0, OFNone);
+OP (D4, NF16(ZeroPage), NF16(PUSH), NULL, NULL, 0, 0, OFNone);
+OP (F4, NF16(Immediate), NF(PEA), NULL, NULL, 0, 0, OFNone);
+OP (62, NF16(Immediate), NF(PER), NULL, NULL, 0, 0, OFNone);
+OP (68, MF(Register), MF(POP), A, NULL, NZ, 0, OFNone);
+OP (FA, XF(Register), XF(POP), X, NULL, NZ, 0, OFNone);
+OP (7A, XF(Register), XF(POP), Y, NULL, NZ, 0, OFNone);
+OP (2B, NF16(Register), NF16(POP), D, NULL, NZ, 0, OFNone);
+OP (AB, NF8(Register), NF8(POP), DB, NULL, NZ, 0, OFNone);
+OP (28, NF8(Register), NF8(POP), P, NULL, NZCV, 0, OFBreak | OFSectionUpdate);
+
+OP (44, NF16(Immediate), NF(MVP), NULL, NULL, 0, NZCV, OFBreak);
+OP (54, NF16(Immediate), NF(MVN), NULL, NULL, 0, NZCV, OFBreak);
+
+OP (09, MF(Immediate), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (05, MF(ZeroPage), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (15, MF(ZeroPage), MF(OR), A, X, NZ, 0, OFNone);
+OP (0D, MF(Absolute), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (1D, MF(Absolute), MF(OR), A, X, NZ, 0, OFNone);
+OP (19, MF(Absolute), MF(OR), A, Y, NZ, 0, OFNone);
+OP (01, MF(IndirectX), MF(OR), A, X, NZ, 0, OFNone);
+OP (11, MF(IndirectY), MF(OR), A, Y, NZ, 0, OFNone);
+OP (12, MF(Indirect), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (03, MF(ZeroPage), MF(OR), A, S, NZ, 0, OFNone);
+OP (13, MF(IndirectS), MF(OR), A, Y, NZ, 0, OFNone);
+OP (07, MF(IndirectFar), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (17, MF(IndirectFar), MF(OR), A, Y, NZ, 0, OFNone);
+OP (0F, MF(Long), MF(OR), A, NULL, NZ, 0, OFNone);
+OP (1F, MF(Long), MF(OR), A, X, NZ, 0, OFNone);
+
+OP (29, MF(Immediate), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (25, MF(ZeroPage), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (35, MF(ZeroPage), MF(AND), A, X, NZ, 0, OFNone);
+OP (2D, MF(Absolute), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (3D, MF(Absolute), MF(AND), A, X, NZ, 0, OFNone);
+OP (39, MF(Absolute), MF(AND), A, Y, NZ, 0, OFNone);
+OP (21, MF(IndirectX), MF(AND), A, X, NZ, 0, OFNone);
+OP (31, MF(IndirectY), MF(AND), A, Y, NZ, 0, OFNone);
+OP (32, MF(Indirect), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (23, MF(ZeroPage), MF(AND), A, S, NZ, 0, OFNone);
+OP (33, MF(IndirectS), MF(AND), A, Y, NZ, 0, OFNone);
+OP (27, MF(IndirectFar), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (37, MF(IndirectFar), MF(AND), A, Y, NZ, 0, OFNone);
+OP (2F, MF(Long), MF(AND), A, NULL, NZ, 0, OFNone);
+OP (3F, MF(Long), MF(AND), A, X, NZ, 0, OFNone);
+
+OP (49, MF(Immediate), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (45, MF(ZeroPage), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (55, MF(ZeroPage), MF(EOR), A, X, NZ, 0, OFNone);
+OP (4D, MF(Absolute), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (5D, MF(Absolute), MF(EOR), A, X, NZ, 0, OFNone);
+OP (59, MF(Absolute), MF(EOR), A, Y, NZ, 0, OFNone);
+OP (41, MF(IndirectX), MF(EOR), A, X, NZ, 0, OFNone);
+OP (51, MF(IndirectY), MF(EOR), A, Y, NZ, 0, OFNone);
+OP (52, MF(Indirect), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (43, MF(ZeroPage), MF(EOR), A, S, NZ, 0, OFNone);
+OP (53, MF(IndirectS), MF(EOR), A, Y, NZ, 0, OFNone);
+OP (47, MF(IndirectFar), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (57, MF(IndirectFar), MF(EOR), A, Y, NZ, 0, OFNone);
+OP (4F, MF(Long), MF(EOR), A, NULL, NZ, 0, OFNone);
+OP (5F, MF(Long), MF(EOR), A, X, NZ, 0, OFNone);
+
+OP (69, MF(Immediate), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (65, MF(ZeroPage), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (75, MF(ZeroPage), MF(ADC), A, X, NZCV, C, OFNone);
+OP (6D, MF(Absolute), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (7D, MF(Absolute), MF(ADC), A, X, NZCV, C, OFNone);
+OP (79, MF(Absolute), MF(ADC), A, Y, NZCV, C, OFNone);
+OP (61, MF(IndirectX), MF(ADC), A, X, NZCV, C, OFNone);
+OP (71, MF(IndirectY), MF(ADC), A, Y, NZCV, C, OFNone);
+OP (72, MF(Indirect), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (63, MF(ZeroPage), MF(ADC), A, S, NZCV, C, OFNone);
+OP (73, MF(IndirectS), MF(ADC), A, Y, NZCV, C, OFNone);
+OP (67, MF(IndirectFar), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (77, MF(IndirectFar), MF(ADC), A, Y, NZCV, C, OFNone);
+OP (6F, MF(Long), MF(ADC), A, NULL, NZCV, C, OFNone);
+OP (7F, MF(Long), MF(ADC), A, X, NZCV, C, OFNone);
+
+OP (E9, MF(Immediate), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (E5, MF(ZeroPage), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (F5, MF(ZeroPage), MF(SBC), A, X, NZCV, C, OFNone);
+OP (ED, MF(Absolute), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (FD, MF(Absolute), MF(SBC), A, X, NZCV, C, OFNone);
+OP (F9, MF(Absolute), MF(SBC), A, Y, NZCV, C, OFNone);
+OP (E1, MF(IndirectX), MF(SBC), A, X, NZCV, C, OFNone);
+OP (F1, MF(IndirectY), MF(SBC), A, Y, NZCV, C, OFNone);
+OP (F2, MF(Indirect), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (E3, MF(ZeroPage), MF(SBC), A, S, NZCV, C, OFNone);
+OP (F3, MF(IndirectS), MF(SBC), A, Y, NZCV, C, OFNone);
+OP (E7, MF(IndirectFar), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (F7, MF(IndirectFar), MF(SBC), A, Y, NZCV, C, OFNone);
+OP (EF, MF(Long), MF(SBC), A, NULL, NZCV, C, OFNone);
+OP (FF, MF(Long), MF(SBC), A, X, NZCV, C, OFNone);
+
+OP (C9, MF(Immediate), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (C5, MF(ZeroPage), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (D5, MF(ZeroPage), MF(CMP), A, X, NZC, 0, OFNone);
+OP (CD, MF(Absolute), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (DD, MF(Absolute), MF(CMP), A, X, NZC, 0, OFNone);
+OP (D9, MF(Absolute), MF(CMP), A, Y, NZC, 0, OFNone);
+OP (C1, MF(IndirectX), MF(CMP), A, X, NZC, 0, OFNone);
+OP (D1, MF(IndirectY), MF(CMP), A, Y, NZC, 0, OFNone);
+OP (D2, MF(Indirect), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (C3, MF(ZeroPage), MF(CMP), A, S, NZC, 0, OFNone);
+OP (D3, MF(IndirectS), MF(CMP), A, Y, NZC, 0, OFNone);
+OP (C7, MF(IndirectFar), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (D7, MF(IndirectFar), MF(CMP), A, Y, NZC, 0, OFNone);
+OP (CF, MF(Long), MF(CMP), A, NULL, NZC, 0, OFNone);
+OP (DF, MF(Long), MF(CMP), A, X, NZC, 0, OFNone);
+
+OP (E0, XF(Immediate), XF(CMP), X, NULL, NZC, 0, OFNone);
+OP (E4, XF(ZeroPage), XF(CMP), X, NULL, NZC, 0, OFNone);
+OP (EC, XF(Absolute), XF(CMP), X, NULL, NZC, 0, OFNone);
+
+OP (C0, XF(Immediate), XF(CMP), Y, NULL, NZC, 0, OFNone);
+OP (C4, XF(ZeroPage), XF(CMP), Y, NULL, NZC, 0, OFNone);
+OP (CC, XF(Absolute), XF(CMP), Y, NULL, NZC, 0, OFNone);
+
+OP (24, MF(ZeroPage), MF(BIT), A, NULL, NZV, 0, OFNone);
+OP (2C, MF(Absolute), MF(BIT), A, NULL, NZV, 0, OFNone);
+OP (34, MF(ZeroPage), MF(BIT), A, X, NZV, 0, OFNone);
+OP (3C, MF(Absolute), MF(BIT), A, X, NZV, 0, OFNone);
+OP (89, MF(Immediate), MF(BIT), A, NULL, Z, 0, OFNone);
+
+OP (E6, MF(ZeroPage), MF(INC), NULL, NULL, NZ, 0, OFNone);
+OP (F6, MF(ZeroPage), MF(INC), NULL, X, NZ, 0, OFNone);
+OP (EE, MF(Absolute), MF(INC), NULL, NULL, NZ, 0, OFNone);
+OP (FE, MF(Absolute), MF(INC), NULL, X, NZ, 0, OFNone);
+OP (E8, XF(Register), XF(INC), X, NULL, NZ, 0, OFNone);
+OP (C8, XF(Register), XF(INC), Y, NULL, NZ, 0, OFNone);
+OP (1A, MF(Register), MF(INC), A, NULL, NZ, 0, OFNone);
+
+OP (C6, MF(ZeroPage), MF(DEC), NULL, NULL, NZ, 0, OFNone);
+OP (D6, MF(ZeroPage), MF(DEC), NULL, X, NZ, 0, OFNone);
+OP (CE, MF(Absolute), MF(DEC), NULL, NULL, NZ, 0, OFNone);
+OP (DE, MF(Absolute), MF(DEC), NULL, X, NZ, 0, OFNone);
+OP (CA, XF(Register), XF(DEC), X, NULL, NZ, 0, OFNone);
+OP (88, XF(Register), XF(DEC), Y, NULL, NZ, 0, OFNone);
+OP (3A, MF(Register), MF(DEC), A, NULL, NZ, 0, OFNone);
+
+OP (04, MF(ZeroPage), MF(TSB), A, NULL, Z, 0, OFNone);
+OP (0C, MF(Absolute), MF(TSB), A, NULL, Z, 0, OFNone);
+OP (14, MF(ZeroPage), MF(TRB), A, NULL, Z, 0, OFNone);
+OP (1C, MF(Absolute), MF(TRB), A, NULL, Z, 0, OFNone);
+
+OP (0A, MF(Register), MF(ASL), A, NULL, NZC, 0, OFNone);
+OP (06, MF(ZeroPage), MF(ASL), NULL, NULL, NZC, 0, OFNone);
+OP (16, MF(ZeroPage), MF(ASL), NULL, X, NZC, 0, OFNone);
+OP (0E, MF(Absolute), MF(ASL), NULL, NULL, NZC, 0, OFNone);
+OP (1E, MF(Absolute), MF(ASL), NULL, X, NZC, 0, OFNone);
+
+OP (4A, MF(Register), MF(LSR), A, NULL, NZC, 0, OFNone);
+OP (46, MF(ZeroPage), MF(LSR), NULL, NULL, NZC, 0, OFNone);
+OP (56, MF(ZeroPage), MF(LSR), NULL, X, NZC, 0, OFNone);
+OP (4E, MF(Absolute), MF(LSR), NULL, NULL, NZC, 0, OFNone);
+OP (5E, MF(Absolute), MF(LSR), NULL, X, NZC, 0, OFNone);
+
+OP (2A, MF(Register), MF(ROL), A, NULL, NZC, C, OFNone);
+OP (26, MF(ZeroPage), MF(ROL), NULL, NULL, NZC, C, OFNone);
+OP (36, MF(ZeroPage), MF(ROL), NULL, X, NZC, C, OFNone);
+OP (2E, MF(Absolute), MF(ROL), NULL, NULL, NZC, C, OFNone);
+OP (3E, MF(Absolute), MF(ROL), NULL, X, NZC, C, OFNone);
+
+OP (6A, MF(Register), MF(ROR), A, NULL, NZC, C, OFNone);
+OP (66, MF(ZeroPage), MF(ROR), NULL, NULL, NZC, C, OFNone);
+OP (76, MF(ZeroPage), MF(ROR), NULL, X, NZC, C, OFNone);
+OP (6E, MF(Absolute), MF(ROR), NULL, NULL, NZC, C, OFNone);
+OP (7E, MF(Absolute), MF(ROR), NULL, X, NZC, C, OFNone);
+
+OP (80, NF8(Immediate), NF(BRA), NULL, NULL, 0, NZCV, OFBreak);
+OP (82, NF16(Immediate), NF(BRL), NULL, NULL, 0, NZCV, OFBreak);
+OP (4C, NF16(Immediate), NF(JMP), NULL, NULL, 0, NZCV, OFBreak);
+OP (5C, NF(Immediate24), NF(JMP), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (6C, NF16(Absolute), NF(JMP), NULL, NULL, 0, NZCV, OFBreak);
+OP (7C, NF16(Absolute), NF(JMP), NULL, X, 0, NZCV, OFBreak);
+OP (DC, NF16(Absolute), NF(JML), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (20, NF16(Immediate), NF(JSR), NULL, NULL, 0, NZCV, OFBreak);
+OP (22, NF(Immediate24), NF(JSL), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (FC, NF16(IndirectX), NF(JSR), NULL, X, 0, NZCV, OFBreak);
+OP (40, NF(None), NF(RTI), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate);
+OP (6B, NF(None), NF(RTL), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (60, NF(None), NF(RTS), NULL, NULL, 0, NZCV, OFBreak);
+
+OP (10, NF8(Immediate), NF(BPL), NULL, NULL, 0, NZCV, OFBreak);
+OP (30, NF8(Immediate), NF(BMI), NULL, NULL, 0, NZCV, OFBreak);
+OP (50, NF8(Immediate), NF(BVC), NULL, NULL, 0, NZCV, OFBreak);
+OP (70, NF8(Immediate), NF(BVS), NULL, NULL, 0, NZCV, OFBreak);
+OP (90, NF8(Immediate), NF(BCC), NULL, NULL, 0, NZCV, OFBreak);
+OP (B0, NF8(Immediate), NF(BCS), NULL, NULL, 0, NZCV, OFBreak);
+OP (D0, NF8(Immediate), NF(BZC), NULL, NULL, 0, NZCV, OFBreak);
+OP (F0, NF8(Immediate), NF(BZS), NULL, NULL, 0, NZCV, OFBreak);
+
+OP (00, NF(None), NF(BRK), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (02, NF(None), NF(COP), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+
+OP (18, NF(None), NF(CLC), NULL, NULL, C, 0, OFNone);
+OP (58, NF(None), NF(CLI), NULL, NULL, 0, 0, OFNone);
+OP (D8, NF(None), NF(CLD), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (B8, NF(None), NF(CLV), NULL, NULL, V, 0, OFNone);
+OP (38, NF(None), NF(SEC), NULL, NULL, C, 0, OFNone);
+OP (78, NF(None), NF(SEI), NULL, NULL, 0, 0, OFNone);
+OP (F8, NF(None), NF(SED), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate);
+OP (C2, NF8(Immediate), NF(REP), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate);
+OP (E2, NF8(Immediate), NF(SEP), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate);
+OP (FB, NF(None), NF(XCE), NULL, NULL, C, NZCV, OFBreak | OFSectionUpdate);
+
+OP (DB, NF(None), NF(STP), NULL, NULL, 0, NZCV, OFBreak);
+OP (EB, NF(None), NF(XBA), NULL, NULL, NZ, 0, OFNone);
+OP (CB, NF(None), NF(WAI), NULL, NULL, 0, NZCV, OFBreak);
+OP (42, NF8(Immediate), NF(WDM), NULL, NULL, 0, NZCV, OFBreak); /* Speedhacks, might change PC */
+OP (EA, NF(None), NF(NOP), NULL, NULL, 0, 0, OFNone);
diff --git a/source/arm_dynarec/opgen.h b/source/arm_dynarec/opgen.h
new file mode 100644
index 0000000..aaa5556
--- /dev/null
+++ b/source/arm_dynarec/opgen.h
@@ -0,0 +1,26 @@
+/* Macros turn opcode tables into code */
+
+#define F8(F) F##8
+#define F16(F) F##16
+#define EF(F) 0, F16(F), F16(F)
+#define NF(F) 0, F, F
+#define NF8(F) 0, F8(F), F8(F)
+#define NF16(F) 0, F16(F), F16(F)
+#define MF(F) (CheckEmulation() || CheckMemory()), F8(F), F16(F)
+#define XF(F) (CheckEmulation() || CheckIndex()), F8(F), F16(F)
+
+#define C (Carry)
+#define Z (Zero)
+#define V (Overflow)
+#define NZ (Negative | Zero)
+#define NZC (Negative | Zero | Carry)
+#define NZV (Negative | Zero | Overflow)
+#define NZCV (Negative | Zero | Carry | Overflow)
+
+switch(opcode = *pc++) {
+
+#include "opdef.h"
+
+default: \
+ printf("Invalid opcode : 0x%X\n", opcode); \
+}