diff options
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | Makefile.common | 8 | ||||
-rw-r--r-- | libretro.c | 39 | ||||
-rw-r--r-- | libretro_core_options.h | 16 | ||||
-rw-r--r-- | source/arm_dynarec/armfn.S | 501 | ||||
-rw-r--r-- | source/arm_dynarec/armfn.h | 50 | ||||
-rw-r--r-- | source/arm_dynarec/armgen.h | 576 | ||||
-rw-r--r-- | source/arm_dynarec/dynaexec.c | 3049 | ||||
-rw-r--r-- | source/arm_dynarec/dynaexec.h | 14 | ||||
-rw-r--r-- | source/arm_dynarec/map_rwx.c | 16 | ||||
-rw-r--r-- | source/arm_dynarec/opdef.h | 281 | ||||
-rw-r--r-- | source/arm_dynarec/opgen.h | 26 | ||||
-rw-r--r-- | source/cpuexec.c | 8 | ||||
-rw-r--r-- | source/globals.c | 4 | ||||
-rw-r--r-- | source/ppu.c | 9 | ||||
-rw-r--r-- | source/snes9x.h | 1 |
16 files changed, 4601 insertions, 0 deletions
@@ -293,6 +293,7 @@ else ifeq ($(platform), trimui) CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar ARM_ASM := 1 + USE_DYNAREC := 1 SHARED := -shared -Wl,--version-script=link.T -Wl,--no-undefined CFLAGS += -fsingle-precision-constant -fno-PIC -flto CFLAGS += -DLSB_FIRST -DFAST_ALIGNED_LSB_WORD_ACCESS -DRIGHTSHIFT_IS_SAR @@ -631,9 +632,11 @@ ifeq ($(platform), psp1) endif OBJECTS := $(SOURCES_C:.c=.o) +OBJECTS := $(OBJECTS:.S=.o) CXXFLAGS += $(FLAGS) CFLAGS += $(FLAGS) +ASFLAGS += $(FLAGS) OBJOUT = -o LINKOUT = -o diff --git a/Makefile.common b/Makefile.common index 2c015a8..15c4a8e 100644 --- a/Makefile.common +++ b/Makefile.common @@ -52,6 +52,14 @@ ifeq ($(ARM_ASM),1) FLAGS += -DARM_ASM endif +ifeq ($(USE_DYNAREC),1) +SOURCES_C += \ + $(CORE_DIR)/arm_dynarec/dynaexec.c \ + $(CORE_DIR)/arm_dynarec/map_rwx.c \ + $(CORE_DIR)/arm_dynarec/armfn.S +FLAGS += -DUSE_DYNAREC +endif + ifeq ($(LOAD_FROM_MEMORY),1) FLAGS += -DLOAD_FROM_MEMORY else ifneq ($(STATIC_LINKING), 1) @@ -12,6 +12,10 @@ #include "srtc.h" #include "sa1.h" +#ifdef USE_DYNAREC +#include "arm_dynarec/dynaexec.h" +#endif + #ifdef PSP #include <pspkernel.h> #include <pspgu.h> @@ -456,6 +460,12 @@ void retro_init(void) #endif CPU.SaveStateVersion = 0; +#ifdef USE_DYNAREC + /* Init before loading ROM to have a better chance of mmaping close to code */ + if (DynaInit()) + Settings.EnableDynarec = false; +#endif + if (environ_cb(RETRO_ENVIRONMENT_GET_INPUT_BITMASKS, NULL)) libretro_supports_bitmasks = true; } @@ -535,6 +545,7 @@ static void check_variables(bool first_run) { struct retro_variable var; bool prev_frameskip_type; + bool prev_dynarec; if (first_run) { @@ -632,6 +643,27 @@ static void check_variables(bool first_run) if (strcmp(var.value, "enabled") == 0) reduce_sprite_flicker = true; +#ifdef USE_DYNAREC + prev_dynarec = Settings.EnableDynarec; + + var.key = "snes9x_2005_dynarec"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "enabled") == 0) + Settings.EnableDynarec = true; + else + Settings.EnableDynarec = false; + } + + if (Settings.EnableDynarec && Settings.EnableDynarec != prev_dynarec) { + if (DynaReset()) + Settings.EnableDynarec = false; + } + +#endif /* USE_DYNAREC */ + /* Reinitialise frameskipping, if required */ if (!first_run && (frameskip_type != prev_frameskip_type)) @@ -827,6 +859,13 @@ void retro_reset(void) { CPU.Flags = 0; S9xSoftReset(); +#ifdef USE_DYNAREC + if (Settings.EnableDynarec) + { + if (DynaReset()) + Settings.EnableDynarec = false; + } +#endif /* USE_DYNAREC */ } size_t retro_serialize_size(void) diff --git a/libretro_core_options.h b/libretro_core_options.h index 409590c..154dc74 100644 --- a/libretro_core_options.h +++ b/libretro_core_options.h @@ -221,6 +221,22 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled" }, +#ifdef USE_DYNAREC + { + "snes9x_2005_dynarec", + "Dynamic Recompiler", + NULL, + "Enables experimental dynamic recompiler.", + NULL, + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "disabled" + }, +#endif { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL }, }; diff --git a/source/arm_dynarec/armfn.S b/source/arm_dynarec/armfn.S new file mode 100644 index 0000000..c5d1b3f --- /dev/null +++ b/source/arm_dynarec/armfn.S @@ -0,0 +1,501 @@ +#define RegA r4 +#define RegX r5 +#define RegY r6 +#define RegP r7 +#define RegCPU r8 +#define RegCPUPC r9 +#define RegCycles r10 +#define RegChecks ip + +#ifndef NDEBUG +#define METRICS_GETSETS_OFF 28 +#define METRICS_SLOWGETSETS_OFF 32 +#endif + +#define CPU_FLAGS_OFF 0 +#define CPU_PC_OFF 12 +#define CPU_PCBASE_OFF 16 +#define CPU_PC_AT_OPCODE_START_OFF 20 +#define CPU_WAIT_ADDRESS_OFF 24 +#define CPU_CYCLES_OFF 32 +#define CPU_NEXT_EVENT_OFF 36 +#define CPU_ICPU_OFF 0x80 +#define CPU_MEMORY_MAP_OFF 0x100 + +#define ICPU_S9X_OPCODES_OFF (CPU_ICPU_OFF + 4) +#define ICPU_REG_PB_OFF (CPU_ICPU_OFF + 8) +#define ICPU_REG_DB_OFF (CPU_ICPU_OFF + 9) +#define ICPU_REG_P_OFF (CPU_ICPU_OFF + 10) +#define ICPU_REG_A_OFF (CPU_ICPU_OFF + 12) +#define ICPU_REG_D_OFF (CPU_ICPU_OFF + 14) +#define ICPU_REG_S_OFF (CPU_ICPU_OFF + 16) +#define ICPU_REG_X_OFF (CPU_ICPU_OFF + 18) +#define ICPU_REG_Y_OFF (CPU_ICPU_OFF + 20) +#define ICPU_REG_PC_OFF (CPU_ICPU_OFF + 22) +#define ICPU_CARRY_OFF (CPU_ICPU_OFF + 24) +#define ICPU_ZERO_OFF (CPU_ICPU_OFF + 25) +#define ICPU_NEGATIVE_OFF (CPU_ICPU_OFF + 26) +#define ICPU_OVERFLOW_OFF (CPU_ICPU_OFF + 27) + +#define MEMORY_MAP_OFF 0x0024 +#define MEMORY_MAP_WRITE_MAP_OFF 0x4000 +#define MEMORY_MAP_SPEED_OFF 0x8000 +#define MEMORY_MAP_BLOCK_IS_RAM_OFF 0x9000 +#define MEMORY_WRITE_MAP_SPEED_OFF 0x4000 +#define MEMORY_WRITE_MAP_BLOCK_IS_RAM_OFF 0x5000 + +#define SA1_OPCODES_OFF 0 +#define SA1_EXECUTING_OFF 24 +#define SA1_WAIT_COUNTER_OFF 52 +#define SA1_WAIT_BYTE_ADDRESS_1_OFF 56 +#define SA1_WAIT_BYTE_ADDRESS_2_OFF 60 + +#define FLAG_DECIMAL 0x08 +#define FLAG_INDEX 0x10 +#define FLAG_MEMORY 0x20 +#define FLAG_EMULATION 0x100 + +#define FUNC(name) \ + .global name ; \ +name: + +.macro S9xCall func + push { ip, lr } + bl S9xRegsToMem + bl \func + bl S9xMemToRegs + pop { ip, pc } +.endm + + .text + .align 2 + +#ifndef NDEBUG +.macro MetricsIncOffset off + push { r0, r1 } + ldr r0, = Metrics + ldr r1, [r0, #\off] + add r1, r1, #1 + str r1, [r0, #\off] + pop { r0, r1 } +.endm + +FUNC(DynaBreak) + bx lr +#endif + +FUNC(BlockEnter) + push { r4-r11, ip, lr } + ldr RegCPU, = CPU + bl S9xMemToRegs + bx r0 + +FUNC(BlockReturn) + bl S9xRegsToMem + pop { r4-r11, ip, pc } + +.macro LoadChecks rs + ldr \rs, [RegCPU, #CPU_NEXT_EVENT_OFF] + ldr RegChecks, [RegCPU, #CPU_FLAGS_OFF] + orr RegChecks, RegChecks, \rs, lsl #16 +.endm + +.macro PushCPUState + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] +.endm + +.macro PopCPUState rs + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + LoadChecks \rs +.endm + +.macro S9xUnpackStatusFast + strh RegP, [RegCPU, #ICPU_REG_P_OFF] + mov r3, #1 + and r2, r3, RegP, lsr #1 + sub r2, r2, #1 + strb r2, [RegCPU, #ICPU_ZERO_OFF] + and r2, RegP, #0x80 + strb r2, [RegCPU, #ICPU_NEGATIVE_OFF] + and r2, RegP, r3 + strb r2, [RegCPU, #ICPU_CARRY_OFF] + and r2, r3, RegP, lsr #6 + strb r2, [RegCPU, #ICPU_OVERFLOW_OFF] +.endm + +.macro S9xPackStatusFast + ldrh RegP, [RegCPU, #ICPU_REG_P_OFF] + ldrb r2, [RegCPU, #ICPU_ZERO_OFF] + ldrb r3, [RegCPU, #ICPU_NEGATIVE_OFF] + bic RegP, RegP, #0xc3 + cmp r2, #0 + ldrb r2, [RegCPU, #ICPU_CARRY_OFF] + orreq RegP, RegP, #0x2 + tst r3, #0x80 + ldrb r3, [RegCPU, #ICPU_OVERFLOW_OFF] + orrne RegP, RegP, #0x80 + orr RegP, RegP, r2 + orr RegP, RegP, r3, lsl #6 + strh RegP, [RegCPU, #ICPU_REG_P_OFF] +.endm + + @@ Callers assume r0 is not touched +FUNC(S9xCallUnpackStatusFast) + S9xUnpackStatusFast + bx lr + + @@ Preserves scratch and r0, can be used as a wrapper +FUNC(S9xRegsToMem) + push { r0, r1, r2, r3, ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + strh RegA, [RegCPU, #ICPU_REG_A_OFF] + strh RegX, [RegCPU, #ICPU_REG_X_OFF] + strh RegY, [RegCPU, #ICPU_REG_Y_OFF] + S9xUnpackStatusFast + pop { r0, r1, r2, r3, ip, pc } + + + @@ Preserves scratch and r0, can be used as a wrapper +FUNC(S9xMemToRegs) + push { r0, r1, r2, r3, ip, lr } + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xPackStatusFast + ldrh RegA, [RegCPU, #ICPU_REG_A_OFF] + ldrh RegX, [RegCPU, #ICPU_REG_X_OFF] + ldrh RegY, [RegCPU, #ICPU_REG_Y_OFF] + pop { r0, r1, r2, r3, ip, lr } + LoadChecks r2 + bx lr + +FUNC(UpdateSection) + push { RegP, RegCPU, lr } + ldr RegCPU, =CPU + ldrh RegP, [RegCPU, #ICPU_REG_P_OFF] + bl S9xCallUpdateSection + pop { RegP, RegCPU, pc } + +FUNC(S9xCallUpdateSection) + ldr r1, =CacheSection + ldrb r0, [RegCPU, #ICPU_REG_PB_OFF] + tst RegP, #FLAG_DECIMAL + lsl r0, #4 + orrne r0, r0, #0x8000 + tst RegP, #FLAG_EMULATION + orrne r0, r0, #0x4000 + bxne lr + tst RegP, #FLAG_INDEX + orrne r0, r0, #0x1000 + tst RegP, #FLAG_MEMORY + orrne r0, r0, #0x2000 + str r0, [r1] + bx lr + +FUNC(LoadChecks) + LoadChecks r2 + bx lr + +FUNC(S9xCallSA1MainLoop) + push { ip, lr } + bl S9xSA1MainLoop + pop { ip, pc } + +FUNC(S9xCallSetPCBase) + push { ip, lr } + bl S9xSetPCBase + pop { ip, pc } + +.macro S9xCallHandleHBlank type +FUNC(S9xCallHandleHBlank\type) + @ changes cycles, maybe PC in IRQ + push { ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl HandleHBlank\type + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, lr } + LoadChecks r2 + bx lr +.endm + +S9xCallHandleHBlank SFX +S9xCallHandleHBlank NoSFX + +.macro S9xCallHandleFlags WFI +FUNC(S9xCallHandleFlags\WFI) + @ needs S, changes flags (unpack status needed), changes PC, changes cycles + push { ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xUnpackStatusFast + bl HandleFlags\WFI + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xPackStatusFast + pop { ip, lr } + LoadChecks r2 + bx lr +.endm + +S9xCallHandleFlags WFI +S9xCallHandleFlags NoWFI + +.macro S9xCallHandleChecks WFI, SFX +FUNC(S9xCallHandleChecks\WFI\SFX) + push { r1, lr } + mov r0, #0 + cmp RegCycles, RegChecks, lsr #16 + blhs S9xCallHandleHBlank\SFX + cmp r0, #0 + popne { r1, lr } + bne BlockReturn + + tst RegChecks, #0x880 + blne S9xCallHandleFlags\WFI + cmp r0, #0 + movne RegCPUPC, r0 + popne { r1, lr } + bne JumpIndirect + + pop { r1, pc } +.endm + +S9xCallHandleChecks WFI, SFX +S9xCallHandleChecks WFI, NoSFX +S9xCallHandleChecks NoWFI, SFX +S9xCallHandleChecks NoWFI, NoSFX + +#ifndef NDEBUG +FUNC(S9xCallCheckInstruction) + S9xCall CheckInstruction +#endif + +FUNC(JumpIndirect) + push { r1, ip } + ldr r1, [RegCPU, #CPU_PCBASE_OFF] + sub r0, RegCPUPC, r1 + bl FetchBlock + pop { r1, ip } + ldr r0, [r0] + bx r0 + +FUNC(JumpDirectChecks) + push { r1, ip } + mov r0, RegCPUPC + sub r1, lr, #4 @ Patch the opcode at the address of this caller + bl PatchJumpDirectChecks + pop { r1, ip } + bx r0 + +FUNC(JumpDirect) + push { r1, ip } + mov r0, RegCPUPC + sub r1, lr, #4 @ Patch the opcode at the address of this caller + bl PatchJumpDirect + pop { r1, ip } + bx r0 + +.macro GetMemFast Type +FUNC(S9xGet\Type\()Fast) + push { r0, r3, ip, lr } +#ifndef NDEBUG + MetricsIncOffset METRICS_GETSETS_OFF +#endif + ldr r2, =0x0fff @ MEMMAP_MASK + .ifc \Type,Word + and r3, r0, r2 + cmp r3, r2 + beq 9f + .endif + + add ip, RegCPU, #CPU_MEMORY_MAP_OFF + and r3, r2, r0, lsr #12 @ MEMMAP_SHIFT + ldr r2, [ip, r3, lsl #2] + add lr, ip, #MEMORY_MAP_BLOCK_IS_RAM_OFF + cmp r2, #18 @ MAP_LAST + blo 9f + + ldrb lr, [lr, r3] + add ip, ip, #MEMORY_MAP_SPEED_OFF + ldrb ip, [ip, r3] + cmp lr, #1 + streq r1, [RegCPU, #CPU_WAIT_ADDRESS_OFF] + + mov r1, r0, lsl #16 + mov r1, r1, lsr #16 + ldrb r0, [r2, r1] + .ifc \Type,Word + add r1, r1, #1 + ldrb r1, [r2, r1] + .endif + .ifc \Type,Byte + add RegCycles, RegCycles, ip + .else + add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord + .endif + .ifc \Type,Word + orr r0, r0, r1, lsl #8 + .endif + pop { r1, r3, ip, pc } +9: +#ifndef NDEBUG + MetricsIncOffset METRICS_SLOWGETSETS_OFF +#endif + str r1, [RegCPU, #CPU_PC_AT_OPCODE_START_OFF] + PushCPUState + bl S9xGet\Type + pop { r1, r3, ip, lr } + PopCPUState r2 + bx lr +.endm + +GetMemFast Byte +GetMemFast Word + + .macro SetMemFast Type, SA1 +FUNC(S9xSet\Type\()Fast\SA1) + push { r1, r3, ip, lr} +#ifndef NDEBUG + MetricsIncOffset METRICS_GETSETS_OFF +#endif + ldr r2, =0x0fff @ MEMMAP_MASK + mov ip, #0 + .ifc \Type,Word + and r3, r1, r2 + cmp r3, r2 + beq 9f + .endif + + str ip, [RegCPU, #CPU_WAIT_ADDRESS_OFF] + + and r3, r2, r1, lsr #12 @ MEMMAP_SHIFT + add ip, RegCPU, #(CPU_MEMORY_MAP_OFF + MEMORY_MAP_WRITE_MAP_OFF) + ldr r2, [ip, r3, lsl #2] + cmp r2, #18 @ MAP_LAST + blo 9f + + add ip, ip, #MEMORY_WRITE_MAP_SPEED_OFF + ldrb ip, [ip, r3] + .ifc \Type,Byte + add RegCycles, RegCycles, ip + .else + add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord + .endif + + + mov r1, r1, lsl #16 + mov r1, r1, lsr #16 + + @@ Handle SA1 WaitAddress if needed + .ifc \SA1,SA1 + push { r1 } + ldr ip, =SA1 + add r1, r1, r2 + ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_1_OFF] + cmp r3, r1 + beq 8f + ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_2_OFF] + cmp r3, r1 + beq 8f +2: + pop { r1 } + .endif + + strb r0, [r2, r1] + .ifc \Type,Word + add r1, r1, #1 + mov r0, r0, lsr #8 + strb r0, [r2, r1] + .endif + pop { r1, r3, ip, pc } + + .ifc \SA1,SA1 +8: + ldr r1, [ip, #SA1_OPCODES_OFF] + mov r3, #0 + str r3, [ip, #SA1_WAIT_COUNTER_OFF] + cmp r1, #0 + movne r3, #1 + strb r3, [ip, #SA1_EXECUTING_OFF] + b 2b + .endif +9: +#ifndef NDEBUG + MetricsIncOffset METRICS_SLOWGETSETS_OFF +#endif + PushCPUState + .ifc \Type,Word + mov r0, r0, lsl #16 + mov r0, r0, lsr #16 + .else + and r0, r0, #0xFF + .endif + bl S9xSet\Type + pop { r1, r3, ip, lr } + PopCPUState r2 + bx lr +.endm + +SetMemFast Byte, NoSA1 +SetMemFast Word, NoSA1 + +SetMemFast Byte, SA1 +SetMemFast Word, SA1 + +FUNC(S9xCallFixCycles) + tst RegP, #FLAG_EMULATION + ldrne r0, =S9xOpcodesE1 + bne 9f + + tst RegP, #FLAG_MEMORY + beq 2f + tst RegP, #FLAG_INDEX + ldrne r0, =S9xOpcodesM1X1 + ldreq r0, =S9xOpcodesM1X0 + b 9f +2: + tst RegP, #FLAG_INDEX + ldrne r0, =S9xOpcodesM0X1 + ldreq r0, =S9xOpcodesM0X0 +9: + str r0, [RegCPU, #ICPU_S9X_OPCODES_OFF] + bx lr + +FUNC(S9xCallCPUShutdown) + push { ip, lr } + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl DynaCPUShutdown + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, pc } + +FUNC(S9xCallWAIShutdown) + push { ip, lr } + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl DynaWAIShutdown + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, pc } + + .bss + .align 4 + .global CPU +CPU: + .space 0x80 @ Actual: 0x50 + + .global ICPU +ICPU: + .space 0x40 @ Actual: 0x34 + + .global OpenBus +OpenBus: + .space 0x1 + + @ padding so Memory.Map is at 0x100 + .space 0x3F - 0x24 + + .global Memory +Memory: + .space 0xb468 diff --git a/source/arm_dynarec/armfn.h b/source/arm_dynarec/armfn.h new file mode 100644 index 0000000..4b859f9 --- /dev/null +++ b/source/arm_dynarec/armfn.h @@ -0,0 +1,50 @@ +#ifndef ARMFN_H +#define ARMFN_H + +#include <stdint.h> + +int BlockEnter(uint8_t *addr); +void UpdateSection(void); + +/* Below can only be called from generated code */ + +void S9xRegsToMem(void); +void S9xMemToRegs(void); + +void JumpIndirect(uint8_t *pc); +void JumpDirect(uint8_t *pc); +void JumpDirectChecks(uint8_t *pc); + +uint16_t S9xGetByteFast(uint32_t Address, uint8_t *StartPC); +uint16_t S9xGetWordFast(uint32_t Address, uint8_t *StartPC); +void S9xSetByteFastSA1(uint16_t Word, uint32_t Address); +void S9xSetWordFastSA1(uint16_t Word, uint32_t Address); +void S9xSetByteFastNoSA1(uint16_t Word, uint32_t Address); +void S9xSetWordFastNoSA1(uint16_t Word, uint32_t Address); + +#define S9xSetByteFast (Settings.SA1 ? S9xSetByteFastSA1 : S9xSetByteFastNoSA1) +#define S9xSetWordFast (Settings.SA1 ? S9xSetWordFastSA1 : S9xSetWordFastNoSA1) + +void S9xCallUnpackStatusFast(void); +void S9xCallUpdateSection(void); + +void S9xCallSA1MainLoop(void); +void S9xCallSetPCBase(uint32_t Address); + +void S9xCallHandleChecksWFISFX(void); +void S9xCallHandleChecksWFINoSFX(void); +void S9xCallHandleChecksNoWFISFX(void); +void S9xCallHandleChecksNoWFINoSFX(void); + +#define S9xCallHandleChecksWFI (Settings.SuperFX ? S9xCallHandleChecksWFISFX : S9xCallHandleChecksWFINoSFX) +#define S9xCallHandleChecksNoWFI (Settings.SuperFX ? S9xCallHandleChecksNoWFISFX : S9xCallHandleChecksNoWFINoSFX) + +void S9xCallFixCycles(void); +void S9xCallCPUShutdown(void); +void S9xCallWAIShutdown(void); + +#ifndef NDEBUG +void S9xCallCheckInstruction(uint8_t opcode, uint8_t *pc); +#endif + +#endif /* ARMFN_H */ diff --git a/source/arm_dynarec/armgen.h b/source/arm_dynarec/armgen.h new file mode 100644 index 0000000..bec7a58 --- /dev/null +++ b/source/arm_dynarec/armgen.h @@ -0,0 +1,576 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> + +static uint8_t *CachePtr; + +enum ARMCond +{ + CondEQ, + CondNE, + CondCS, + CondHS = CondCS, + CondCC, + CondLO = CondCC, + CondMI, + CondPL, + CondVS, + CondVC, + CondHI, + CondLS, + CondGE, + CondLT, + CondGT, + CondLE, + CondAL, +}; + +enum ARMShift +{ + ShiftLSL, + ShiftLSR, + ShiftASR, + ShiftROR, + ShiftRRX +}; + +enum ARMReg +{ + RegR0, + RegR1, + RegR2, + RegR3, + RegR4, + RegR5, + RegR6, + RegR7, + RegR8, + RegR9, + RegR10, + RegR11, + RegR12, + RegR13, + RegR14, + RegR15, + + RegA1 = RegR0, + RegA2, + RegA3, + RegA4, + RegV1 = RegR4, + RegV2, + RegV3, + RegV4, + RegV5, + RegV6, + RegV7, + RegV8, + RegIP = RegR12, + RegSP, + RegLR, + RegPC, + + RegA = RegR4, + RegX = RegR5, + RegY = RegR6, + RegP = RegR7, + RegCPU = RegR8, + RegCPUPC = RegR9, + RegCycles = RegR10, + + RegChecks = RegR12, +}; + +#define ASCOND(cond) (cond << 28) +#define ASLIST(reg) (1 << reg) +#define PCREL(pc, target) ((intptr_t)(target) - (intptr_t)(pc) - 8) +#define PCOFFSET(function) ((PCREL(CachePtr, function) >> 2) & 0xFFFFFF) +#define ENCODE_IMM(imm, shl) (((((32 - (shl)) & 0x1F) >> 1) << 8) | ((imm) & 0xFF)) +#define DECODE_IMM(imm) (((imm) & 0xFF) << (32 - (((imm) >> 8) << 1))); +#define ENCODE_SHIFT(type, imm) ((type) | (((imm) >= 32 ? 0 : (imm)) << 2)) + +#define CPU_ICPU_OFFSET ((uintptr_t)&ICPU - (uintptr_t)&CPU) +#define CPU_OPEN_BUS_OFFSET ((uintptr_t)&OpenBus - (uintptr_t)&CPU) + +#define CPU_OFFSET(field) (offsetof(__typeof__(CPU), field)) +#define ICPU_OFFSET(field) (CPU_ICPU_OFFSET + offsetof(__typeof__(ICPU), field)) + +static size_t DecomposeImm32(uint32_t imm32, uint32_t *result) +{ + uint8_t shift = 0; + size_t count = 0; + + if (!imm32) { + result[0] = 0; + return 1; + } + + while (1) + { + while (shift < 32 && !(imm32 >> shift & 0x03)) + shift += 2; + + if (shift >= 32) + break; + + if (shift > 24) + { + uint8_t remaining = (1 << (shift - 24)); + uint32_t firstStore = count == 0 ? 0 : DECODE_IMM(result[0]); + + if (firstStore && firstStore < remaining) + { + uint8_t bottom = imm32 >> shift; + uint8_t top = (imm32 & (remaining - 1)) << (32 - shift); + result[0] = ENCODE_IMM(bottom | top, shift); + break; + } + } + + result[count++] = ENCODE_IMM(imm32 >> shift, shift); + shift += 8; + } + + return count; +} + +static inline void ARMEmit(uint32_t value) +{ + *(uint32_t *)CachePtr = value; + CachePtr += 4; +} + +enum ARMDPIOp +{ + DPIOpAND, + DPIOpEOR, + DPIOpSUB, + DPIOpRSB, + DPIOpADD, + DPIOpADC, + DPIOpSBC, + DPIOpRSC, + DPIOpTST, + DPIOpTEQ, + DPIOpCMP, + DPIOpCMN, + DPIOpORR, + DPIOpMOV, + DPIOpBIC, + DPIOpNOT, +}; + +static void ARMEmitDPI_Imm(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmit( + ASCOND(cond) | + 0 << 26 | /* 0b00 */ + 1 << 25 | + op << 21 | + s << 20 | + rn << 16 | + rd << 12 | + imm + ); +} + +static void ARMEmitDPI_Reg(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmit( + ASCOND(cond) | + 0 << 26 | /* 0b00 */ + 0 << 25 | + op << 21 | + s << 20 | + rn << 16 | + rd << 12 | + shift << 5 | + 0 << 4 | + rm + ); +} + +static void ARMEmitDPI_RegShift(enum ARMCond cond, uint8_t op, bool s, enum ARMReg rd, enum ARMReg rn, enum ARMReg rs, uint8_t type, enum ARMReg rm) +{ + ARMEmit( + ASCOND(cond) | + 0 << 26 | /* 0b00 */ + 0 << 25 | + op << 21 | + s << 20 | + rn << 16 | + rd << 12 | + rs << 8 | + 0 << 7 | + type << 5 | + 1 << 4 | + rm + ); +} + +static void GenARM_CMP_Imm(enum ARMCond cond, enum ARMReg reg, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpCMP, 1, 0, reg, imm); +} + +static void GenARM_CMP_Reg(enum ARMCond cond, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpCMP, 1, 0, rn, shift, rm); +} + +static void GenARM_SUB_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpSUB, 0, rd, rn, imm); +} + +static void GenARM_SUBS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpSUB, 1, rd, rn, imm); +} + +static void GenARM_SUB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpSUB, 0, rd, rn, shift, rm); +} + +static void GenARM_SUBS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpSUB, 1, rd, rn, shift, rm); +} + +static void GenARM_ADD_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpADD, 0, rd, rn, imm); +} + +static void GenARM_ADD_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpADD, 0, rd, rn, shift, rm); +} + +static void GenARM_ADDS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpADD, 1, rd, rn, shift, rm); +} + +static void GenARM_MOV_Imm(enum ARMCond cond, enum ARMReg rd, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpMOV, 0, rd, 0, imm); +} + +static void GenARM_MOV_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpMOV, 0, rd, 0, shift, rm); +} + +static void GenARM_MOVS_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpMOV, 1, rd, 0, shift, rm); +} + +static void GenARM_AND_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpAND, 0, rd, rn, imm); +} + +static void GenARM_ANDS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpAND, 1, rd, rn, imm); +} + +static void GenARM_AND_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpAND, 0, rd, rn, shift, rm); +} + +static void GenARM_ANDS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpAND, 1, rd, rn, shift, rm); +} + +static void GenARM_EOR_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpEOR, 0, rd, rn, imm); +} + +static void GenARM_EORS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpEOR, 1, rd, rn, imm); +} + +static void GenARM_EOR_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpEOR, 0, rd, rn, shift, rm); +} + +static void GenARM_EORS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpEOR, 1, rd, rn, shift, rm); +} + +static void GenARM_ORR_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpORR, 0, rd, rn, imm); +} + +static void GenARM_ORRS_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpORR, 1, rd, rn, imm); +} + +static void GenARM_ORR_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpORR, 0, rd, rn, shift, rm); +} + +static void GenARM_ORRS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpORR, 1, rd, rn, shift, rm); +} + +static void GenARM_TST_Imm(enum ARMCond cond, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpTST, 1, 0, rn, imm); +} + +static void GenARM_TST_Reg(enum ARMCond cond, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpTST, 1, 0, rn, shift, rm); +} + +static void GenARM_BIC_Imm(enum ARMCond cond, enum ARMReg rd, enum ARMReg rn, uint16_t imm) +{ + ARMEmitDPI_Imm(cond, DPIOpBIC, 0, rd, rn, imm); +} + +static void GenARM_MVN_Reg(enum ARMCond cond, enum ARMReg rd, uint8_t shift, enum ARMReg rm) +{ + ARMEmitDPI_Reg(cond, DPIOpNOT, 0, rd, 0, shift, rm); +} + +static void ARMEmitLS_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + bool u = imm >= 0 ? 1 : 0; + imm = (u ? imm : -imm) & 0xFFF; + + ARMEmit( + ASCOND(cond) | + 1 << 26 | /* 0b01 */ + 0 << 25 | + p << 24 | + u << 23 | + w << 21 | + op << 20 | + rn << 16 | + rt << 12 | + imm + ); +} + +static void ARMEmitLS_Reg(enum ARMCond cond, bool p, bool u, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, uint8_t shift, enum ARMReg rm) +{ + ARMEmit( + ASCOND(cond) | + 1 << 26 | /* 0b01 */ + 1 << 25 | + p << 24 | + u << 23 | + w << 21 | + op << 20 | + rn << 16 | + rt << 12 | + shift << 5 | + 0 << 4 | + rm + ); +} + +static void ARMEmitLSH_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + bool u = imm >= 0 ? 1 : 0; + imm = (u ? imm : -imm) & 0xFF; + + ARMEmit( + ASCOND(cond) | + 0 << 25 | /* 0b000 */ + p << 24 | + u << 23 | + 1 << 22 | + w << 21 | + op << 20 | + rn << 16 | + rt << 12 | + ((imm & 0xF0) << 4) | + 0xb << 4 | /* 0b1011 */ + (imm & 0x0F) + ); +} + +static void ARMEmitLSB_Imm(enum ARMCond cond, bool p, bool w, uint8_t op, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + bool u = imm >= 0 ? 1 : 0; + imm = (u ? imm : -imm) & 0xFF; + + ARMEmit( + ASCOND(cond) | + 0x2 << 25 | /* 0b010 */ + p << 24 | + u << 23 | + 1 << 22 | + w << 21 | + op << 20 | + rn << 16 | + rt << 12 | + imm + ); +} + +static void GenARM_LDR_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLS_Imm(cond, 1, 0, 1, rt, rn, imm); +} + +static void GenARM_LDR_Reg(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, bool add, uint8_t shift, enum ARMReg rm) +{ + ARMEmitLS_Reg(cond, 1, add, 0, 1, rt, rn, shift, rm); +} + +static void GenARM_LDRH_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLSH_Imm(cond, 1, 0, 1, rt, rn, imm); +} + +static void GenARM_LDRB_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLSB_Imm(cond, 1, 0, 1, rt, rn, imm); +} + +static void GenARM_STR_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLS_Imm(cond, 1, 0, 0, rt, rn, imm); +} + +static void GenARM_STRH_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLSH_Imm(cond, 1, 0, 0, rt, rn, imm); +} + +static void GenARM_STRB_Imm(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, int16_t imm) +{ + ARMEmitLSB_Imm(cond, 1, 0, 0, rt, rn, imm); +} + +static void GenARM_STR_Reg(enum ARMCond cond, enum ARMReg rt, enum ARMReg rn, bool add, uint8_t shift, enum ARMReg rm) +{ + ARMEmitLS_Reg(cond, 1, add, 0, 0, rt, rn, shift, rm); +} + +static void GenARM_PUSH(uint16_t regs) +{ + ARMEmit( + ASCOND(CondAL) | + (0x92 << 20) | /* 0b10010010 */ + (RegSP << 16) | + (regs) + ); +} + +static void GenARM_POP(uint16_t regs) +{ + ARMEmit( + ASCOND(CondAL) | + (0x8B << 20) | /* 0b10001011 */ + (RegSP << 16) | + (regs) + ); +} + +static void GenARM_B(enum ARMCond cond, uint32_t offset) +{ + ARMEmit( + ASCOND(cond) | + (0xA << 24) | /* 0b1010 */ + (offset) + ); +} + +static void GenARM_BL(enum ARMCond cond, uint32_t offset) +{ + ARMEmit( + ASCOND(cond) | + (0xB << 24) | /* 0b1011 */ + (offset) + ); +} + +static void GenARM_BX_Reg(enum ARMCond cond, enum ARMReg reg) +{ + ARMEmit( + ASCOND(cond) | + (0x12 << 20) | /* 0b 0001 0010 */ + (0xFFF1 << 4) | /* 0b 1111 1111 1111 0001 */ + (reg) + ); +} + +static void GenARM_BLX_Reg(enum ARMCond cond, enum ARMReg reg) +{ + ARMEmit( + ASCOND(cond) | + (0x12 << 20) | /* 0b 0001 0010 */ + (0xFFF3 << 4) | /* 0b 1111 1111 1111 0011 */ + (reg) + ); +} + +static void GenARM_UXTB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm) +{ + GenARM_AND_Imm(cond, rd, rm, ENCODE_IMM(0xFF, 0)); +} + +static void GenARM_MOVB_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm) +{ + GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 8), rd); + GenARM_ORR_Reg(cond, rd, rd, ENCODE_SHIFT(ShiftLSL, 24), rm); + GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftROR, 24), rd); +} + +static void GenARM_MOVBS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg scratch, enum ARMReg rm) +{ + GenARM_AND_Imm(cond, rd, rd, ENCODE_IMM(0xFF, 8)); + GenARM_ANDS_Imm(CondAL, scratch, rm, ENCODE_IMM(0xFF, 0)); + GenARM_ORR_Reg(CondAL, rd, rd, 0, scratch); +} + +static void GenARM_UXTHS_Reg_(enum ARMCond cond, bool s, enum ARMReg rd, enum ARMReg rm) +{ + GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSL, 16), rm); + if (s) + GenARM_MOVS_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 16), rd); + else + GenARM_MOV_Reg(cond, rd, ENCODE_SHIFT(ShiftLSR, 16), rd); +} + +static void GenARM_UXTH_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm) +{ + GenARM_UXTHS_Reg_(cond, 0, rd, rm); +} + +static void GenARM_UXTHS_Reg(enum ARMCond cond, enum ARMReg rd, enum ARMReg rm) +{ + GenARM_UXTHS_Reg_(cond, 1, rd, rm); +} + +static void GenARM_MOV32_Imm(enum ARMCond cond, enum ARMReg reg, uint32_t imm) +{ + uint32_t values[4]; + uint32_t count = DecomposeImm32(imm, values); + uint32_t i; + + GenARM_MOV_Imm(cond, reg, values[0]); + + for (i = 1; i < count; i++) + { + GenARM_ORR_Imm(cond, reg, reg, values[i]); + } +} diff --git a/source/arm_dynarec/dynaexec.c b/source/arm_dynarec/dynaexec.c new file mode 100644 index 0000000..d63d710 --- /dev/null +++ b/source/arm_dynarec/dynaexec.c @@ -0,0 +1,3049 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "snes9x.h" +#include "cpuexec.h" +#include "sa1.h" +#include "apu.h" +#include "arm_dynarec/armfn.h" +#include "arm_dynarec/armgen.h" +#include "arm_dynarec/dynaexec.h" + +#define BUFFER_SIZE (6 << 20) +#define BUFFER_EXTRA (1 << 20) +#define BLOCK_SIZE 0x1000 + +#ifndef NDEBUG +int trace[128]; +int traceidx; +int BreakPC; +int BreakOpcode; +#endif + +enum SNESRegArg +{ + ArgNULL, + ArgA, + ArgX, + ArgY, + ArgS, + ArgD, + ArgDB, + ArgPB, + ArgP, + ArgZ, +}; + +enum OpcodeFlag +{ + OFNone = 0, + OFBreak = 1 << 0, + OFSectionUpdate = 1 << 1, +}; + +#define V(val) val##8, val##16 + +enum OpAddrMode +{ + AddrNone, + V(AddrRegister), + V(AddrImmediate), + AddrImmediate24, + V(AddrZeroPage), + V(AddrIndirect), + V(AddrIndirectX), + V(AddrIndirectY), + V(AddrIndirectS), + V(AddrIndirectFar), + V(AddrAbsolute), + V(AddrLong), +}; + +enum OpFunction +{ + V(FnMOV), + V(FnMOVSP), + V(FnLD), + V(FnST), + V(FnPUSH), + FnPEA, + FnPER, + V(FnPOP), + FnMVN, + FnMVP, + V(FnOR), + V(FnAND), + V(FnEOR), + V(FnADC), + V(FnSBC), + V(FnCMP), + V(FnBIT), + V(FnINC), + V(FnDEC), + V(FnTSB), + V(FnTRB), + V(FnASL), + V(FnLSR), + V(FnROL), + V(FnROR), + FnBRA, + FnBRL, + FnJMP, + FnJML, + FnJSR, + FnJSL, + FnRTI, + FnRTL, + FnRTS, + FnBPL, + FnBMI, + FnBVC, + FnBVS, + FnBCC, + FnBCS, + FnBZC, + FnBZS, + FnBRK, + FnCOP, + FnCLC, + FnCLI, + FnCLD, + FnCLV, + FnSEC, + FnSEI, + FnSED, + FnREP, + FnSEP, + FnXCE, + FnSTP, + FnXBA, + FnWAI, + FnWDM, + FnNOP, +}; + +#undef V + +#define CHECK_8_16(field, value) (in->field == value##8 || in->field == value##16 ) +#define CHECK_FIELD(field, value) (in->field == value) +#define IS_VOP(value) (CHECK_8_16(Op, Fn##value)) +#define IS_OP(value) (CHECK_FIELD(Op, Fn##value)) +#define IS_VADDRMODE(value) (CHECK_8_16(AddrMode, Addr##value)) +#define IS_ADDRMODE(value) (CHECK_FIELD(AddrMode, Addr##value)) + +#define CASE_VOP(value) case Fn##value##8: case Fn##value##16 +#define CASE_OP(value) case Fn##value + +enum ChecksFlags +{ + CheckWFI = 1, +}; + +struct Instruction; + +typedef struct Instruction { + void (*S9xOpcode)(void); + void (*Emitter)(struct Instruction *); + uint8_t *PC; + uint8_t Opcode; + enum OpAddrMode AddrMode; + enum OpFunction Op; + enum SNESRegArg Arg1; + enum SNESRegArg Arg2; + uint16_t OutFlags; + uint16_t InFlags; + bool SectionUpdate; + bool ShouldBreak; +} Instruction; + +uint32_t CacheSection; +static uint8_t *CachePtr; +static uint8_t *Cache; +static uint8_t **Sections[13 * 0x1000]; /* Max: Decimal | Emulation = 13 */ + +#ifndef NDEBUG +struct +{ + uint32_t Emits; + uint32_t Finds; + uint32_t InterpretedFinds; + bool InterpretedBlock; + uint32_t Ops; + uint32_t InterpretedOps; + uint32_t ContextSwitches; + uint32_t GetSets; + uint32_t SlowGetSets; + uint32_t OpCounts[0x100]; + uint32_t InterpretedOpCounts[0x100]; + uint32_t InterpretedBlockStarts[0x100]; +} Metrics; + +void MetricsReset(void) +{ + memset(&Metrics, 0, sizeof(Metrics)); +} + +void MetricsPrint(void) +{ + int i; + printf("Cache hit rate: %f\n", (float)(Metrics.Finds - Metrics.Emits) / Metrics.Finds); + printf("Avg. instructions per find: %f\n", (float)Metrics.Ops / Metrics.Finds); + + printf("Counts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.OpCounts) / sizeof(Metrics.OpCounts[0]); i++) + { + printf("0x%X: %d\n", i, Metrics.OpCounts[i]); + } + + printf("Interpreted counts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.InterpretedOpCounts) / sizeof(Metrics.InterpretedOpCounts[0]); i++) + { + if (Metrics.InterpretedOpCounts[i] > 0) + printf("0x%X: %d\n", i, Metrics.InterpretedOpCounts[i]); + } + + printf("Interpreted block starts by instruction: \n"); + for (i = 0; i < sizeof(Metrics.InterpretedBlockStarts) / sizeof(Metrics.InterpretedBlockStarts[0]); i++) + { + if (Metrics.InterpretedBlockStarts[i] > 0) + printf("0x%X: %d\n", i, Metrics.InterpretedBlockStarts[i]); + } +} +#endif + +uint8_t **FetchBlock(uint16_t pc); + +static void *CacheAlloc(size_t size) +{ + uint8_t *prev = CachePtr; + CachePtr += size; + return (void *)prev; +} + +static void CacheEmpty(void) +{ + memset(Cache, 0, BUFFER_SIZE + BUFFER_EXTRA); + memset(Sections, 0, sizeof(Sections)); + CachePtr = Cache; +} + +static uint8_t **FindBlock(uint16_t pc) +{ + /* Section is D/E/X/M flags, PB, and top 4 bits of PC */ + uint32_t section = (CacheSection | ((pc & 0xF000) >> 12)); + uint8_t **block = Sections[section]; + pc &= ~0xF000; + + if (!block || !block[pc]) + { + if (CachePtr >= Cache + BUFFER_SIZE) + { + CacheEmpty(); + printf("Reset translation cache\n"); + block = NULL; + } + + if (!block) + block = Sections[section] = CacheAlloc(BLOCK_SIZE * sizeof(uint8_t *)); + } + + return &block[pc]; +} + + +#ifndef NDEBUG + +void CheckInstruction(uint8_t opcode, uint8_t *pc, bool interpreted) +{ + Metrics.Ops++; + Metrics.OpCounts[opcode]++; + + if (interpreted) + { + Metrics.InterpretedOps++; + Metrics.InterpretedOpCounts[opcode]++; + if (!Metrics.InterpretedBlock) + { + Metrics.InterpretedBlock = true; + Metrics.InterpretedFinds++; + Metrics.InterpretedBlockStarts[opcode]++; + } + } + + if (CPU.PC - CPU.PCBase == (BreakPC & 0xffff) && ICPU.Registers.PB == ((BreakPC >> 16) & 0xFF)) + DynaBreak(); + + if (*CPU.PC == BreakOpcode) + DynaBreak(); + + if (pc != CPU.PC) + { + fprintf(stderr, "Incorrect PC: Expected 0x%X, was 0x%X\n", (uintptr_t)pc, (uintptr_t)CPU.PC); + DynaBreak(); + } + + if (opcode != *CPU.PC) + { + fprintf(stderr, "Incorrect opcode: Expected 0x%X, was 0x%X\n", opcode, *CPU.PC); + DynaBreak(); + } +} +#endif + +static void EmitChecks(enum ChecksFlags flags) +{ + if (Settings.SA1) + { + GenARM_MOV32_Imm(CondAL, RegR0, (uintptr_t)&SA1); + GenARM_LDRB_Imm(CondAL, RegR0, RegR0, offsetof(__typeof__(SA1), Executing)); + GenARM_CMP_Imm(CondAL, RegR0, ENCODE_IMM(0, 0)); + GenARM_BL(CondNE, PCOFFSET(S9xCallSA1MainLoop)); + } + + GenARM_ANDS_Imm(CondAL, RegR0, RegChecks, ENCODE_IMM(0x88, 4)); /* 0x880 = NMI_FLAG | IRQ_PENDING_FLAG */ + GenARM_CMP_Reg(CondEQ, RegCycles, ENCODE_SHIFT(ShiftLSR, 16), RegChecks); + GenARM_ORRS_Imm(CondHS, RegR0, RegR0, ENCODE_IMM(1, 0)); + + if (flags & CheckWFI) + GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksWFI)); + else + GenARM_BL(CondNE, PCOFFSET(S9xCallHandleChecksNoWFI)); +} + +static void PatchJump(uint32_t *source, uint8_t *target) +{ + *source = ((*source & 0xF0000000) | (0xA << 24) | ((PCREL(source, target) >> 2) & 0xFFFFFF)); +} + +static uint8_t *HandleFlags(bool checkWFI) +{ + uint8_t *NewPC = NULL; + + if (CPU.Flags & NMI_FLAG) + { + if (--CPU.NMICycleCount == 0) + { + CPU.Flags &= ~NMI_FLAG; + if (checkWFI && CPU.WaitingForInterrupt) + { + CPU.WaitingForInterrupt = false; + CPU.PC++; + } + S9xOpcode_NMI(); + UpdateSection(); + NewPC = CPU.PC; + } + } + + if (CPU.Flags & IRQ_PENDING_FLAG) + { + if (CPU.IRQCycleCount == 0) + { + if (checkWFI && CPU.WaitingForInterrupt) + { + CPU.WaitingForInterrupt = false; + NewPC = CPU.PC++; + } + if (CPU.IRQActive && !Settings.DisableIRQ) + { + if (!CheckFlag(IRQ)) + { + S9xOpcode_IRQ(); + UpdateSection(); + NewPC = CPU.PC; + } + } + else + CPU.Flags &= ~IRQ_PENDING_FLAG; + } + else if (--CPU.IRQCycleCount == 0 && CheckFlag(IRQ)) + CPU.IRQCycleCount = 1; + } + + return NewPC; +} + +uint8_t *HandleFlagsNoWFI(void) +{ + return HandleFlags(false); +} + +uint8_t *HandleFlagsWFI(void) +{ + return HandleFlags(true); +} + +static int HandleHBlank(bool SFX) +{ +#ifndef NDEBUG + Metrics.ContextSwitches++; +#endif + +#ifndef USE_BLARGG_APU + if (CPU.WhichEvent == HBLANK_END_EVENT) + while (IAPU.APUExecuting && APU.Cycles <= CPU.Cycles) + APU_EXECUTE1(); +#endif + + if (SFX) + S9xDoHBlankProcessing_SFX(); + else + S9xDoHBlankProcessing_NoSFX(); + +#ifdef LAGFIX + if(finishedFrame) + return 3; +#endif + if (CPU.Flags & SCAN_KEYS_FLAG) + return 1; + + return 0; +} + +int HandleHBlankSFX(void) +{ + return HandleHBlank(true); +} + +int HandleHBlankNoSFX(void) +{ + return HandleHBlank(false); +} + +uint8_t *PatchJumpDirectChecks(uint8_t *PC, uint32_t *source) +{ + uint8_t *dest = *FetchBlock(PC - CPU.PCBase); + uint8_t *checks = CachePtr; + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(dest)); + __clear_cache(checks, CachePtr); + PatchJump(source, checks); + __clear_cache(source, source+1); + return checks; +} + +uint8_t *PatchJumpDirect(uint8_t *PC, uint32_t *source) +{ + uint8_t *dest = *FetchBlock(PC - CPU.PCBase); + PatchJump(source, dest); + __clear_cache(source, source+1); + return dest; +} + +void DynaCPUShutdown(void) +{ + /* Don't skip cycles with a pending NMI or IRQ - could cause delayed + * interrupt. Interrupts are delayed for a few cycles already, but + * the delay could allow the shutdown code to cycle skip again. + * Was causing screen flashing on Top Gear 3000. */ + if (CPU.WaitCounter == 0 && !(CPU.Flags & (IRQ_PENDING_FLAG | NMI_FLAG))) + { + CPU.WaitAddress = NULL; +#ifndef USE_BLARGG_APU + CPU.Cycles = CPU.NextEvent; + if (IAPU.APUExecuting) + { + ICPU.CPUExecuting = false; + do + { + APU_EXECUTE1(); + } while (APU.Cycles < CPU.NextEvent); + ICPU.CPUExecuting = true; + } +#endif + } + else if (CPU.WaitCounter >= 2) + CPU.WaitCounter = 1; + else + CPU.WaitCounter--; +} + +void DynaWAIShutdown(void) +{ + CPU.Cycles = CPU.NextEvent; +#ifndef USE_BLARGG_APU + if (IAPU.APUExecuting) + { + ICPU.CPUExecuting = false; + do + { + APU_EXECUTE1(); + } while (APU.Cycles < CPU.NextEvent); + ICPU.CPUExecuting = true; + } +#endif +} + +static void EmitFlushCyclesPC(int *dpc, int *dcycles) +{ + if (*dpc != 0) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(*dpc, 0)); + + if (*dcycles != 0) + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(*dcycles, 0)); + + dpc = 0; + dcycles = 0; +} + +/* a1 = value, a4 = sp, preserves a4 */ +static void EmitPush(Instruction *in, bool n8) +{ + enum ARMReg RegS = RegA4; /* Preserved during memory ops */ + + if (n8) + GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); + else + GenARM_SUB_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); + + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); + } + + GenARM_ADD_Imm(CondAL, RegA2, RegS, ENCODE_IMM(1, 0)); + GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(0xFF, 16)); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } +} + +/* a1 = value, a4 = sp, preserves a4 */ +static void EmitPop(Instruction *in, bool n8) +{ + enum ARMReg RegS = RegA4; /* Preserved during memory ops */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + + if (n8) + GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 0)); + else + GenARM_ADD_Imm(CondAL, RegS, RegS, ENCODE_IMM(2, 0)); + + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegS, RegS, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegS, RegS, ENCODE_IMM(1, 8)); + } + + if (n8) + { + GenARM_MOV_Reg(CondAL, RegA1, 0, RegS); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + } + else + { + GenARM_SUB_Imm(CondAL, RegA1, RegS, ENCODE_IMM(1, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } +} + +static enum ARMReg EmitLoadRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg RegMEM, bool read) +{ + enum ARMReg reg; + + switch(arg) + { + case ArgA: + reg = RegA; + break; + case ArgX: + reg = RegX; + break; + case ArgY: + reg = RegY; + break; + case ArgP: + reg = RegP; + break; + case ArgD: + if (read) + GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.D)); + reg = RegMEM; + break; + case ArgS: + if (read) + GenARM_LDRH_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.S)); + reg = RegMEM; + break; + case ArgDB: + if (read) + GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.DB)); + reg = RegMEM; + break; + case ArgPB: + if (read) + GenARM_LDRB_Imm(CondAL, RegMEM, RegCPU, ICPU_OFFSET(Registers.PB)); + reg = RegMEM; + break; + default: + fprintf(stderr, "Unhandled SNES Register: %d\n", arg); + reg = -1; + break; + } + + return reg; +} + +/* Preserves reg, modifies R3 */ +static void EmitStoreRegister(Instruction *in, enum SNESRegArg arg, enum ARMReg reg, bool n8) +{ + switch(arg) + { + case ArgA: + if (reg != RegA) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegA, reg); + else + GenARM_UXTH_Reg(CondAL, RegA, reg); + } + break; + case ArgX: + if (reg != RegX) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegX, reg); + else + GenARM_UXTH_Reg(CondAL, RegX, reg); + } + break; + case ArgY: + if (reg != RegY) + { + if (n8) + GenARM_MOVB_Reg(CondAL, RegY, reg); + else + GenARM_UXTH_Reg(CondAL, RegY, reg); + } + break; + case ArgP: + { + uint8_t *branch; + GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); + GenARM_EOR_Reg(CondAL, reg, RegP, 0, reg); + GenARM_EOR_Reg(CondAL, RegP, RegP, 0, reg); + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(MemoryFlag | IndexFlag, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xCallUnpackStatusFast)); + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(IndexFlag, 0)); + GenARM_BIC_Imm(CondNE, RegX, RegX, ENCODE_IMM(0xFF, 8)); + GenARM_BIC_Imm(CondNE, RegY, RegY, ENCODE_IMM(0xFF, 8)); + + /* Update interpreter lookup table and CacheSection if processor flags changed */ + GenARM_EOR_Reg(CondAL, reg, reg, 0, RegP); + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM((Emulation | MemoryFlag | IndexFlag | Decimal) >> 2, 2)); + branch = CachePtr; + GenARM_B(CondEQ, PCOFFSET(branch)); + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + GenARM_BL(CondAL, PCOFFSET(S9xCallFixCycles)); + PatchJump((uint32_t *)branch, CachePtr); + break; + } + case ArgD: + GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.D)); + break; + case ArgDB: + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); + GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.DB)); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); + break; + case ArgPB: + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), reg); + GenARM_STRB_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedPB)); + break; + case ArgS: + if (CheckEmulation()) + { + GenARM_BIC_Imm(CondAL, RegA3, reg, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Imm(CondAL, RegA3, RegA3, ENCODE_IMM(1, 8)); + GenARM_STRH_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.S)); + } + else + { + GenARM_STRH_Imm(CondAL, reg, RegCPU, ICPU_OFFSET(Registers.S)); + } + + break; + default: + fprintf(stderr, "Unhandled SNES Register: %d\n", arg); + break; + } +} + +static uint16_t EmitLoadLiteral(Instruction *in, bool n8, int *dpc, int *dcycles) +{ + uint16_t literal; + *dcycles += CPU.MemSpeed; + *dpc += 1; + literal = in->PC[1]; + + if (!n8) + { + *dcycles += CPU.MemSpeed; + *dpc += 1; + literal |= in->PC[2] << 8; + } + + return literal; +} + +static int32_t EmitLoadRelative(Instruction *in, bool n8, int *dpc, int *dcycles) +{ + int32_t disp; + int32_t pc; + int32_t target; + bool overflow; + + if (n8) + { + disp = (int8_t)in->PC[1]; + pc = (int32_t)((in->PC + 2) - CPU.PCBase); + (*dpc)++; + *dcycles += CPU.MemSpeed; + } + else + { + disp = (int16_t)(in->PC[1] | (in->PC[2] << 8)); + pc = (int32_t)((in->PC + 3) - CPU.PCBase); + *dpc += 2; + *dcycles += CPU.MemSpeedx2; + } + + target = (pc + disp) & 0xFFFF; + overflow = target != (pc + disp); + + if (overflow) + disp = (target - pc); + + return disp; +} + +/* Stores result in A1 for reads, A2 for writes, because that is where + * S9XRead / S9xWrite look for address. Modifies the other. Reads also + * write over A4. */ +static void EmitAddrZeroPage(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + + *dpc += 1; + *dcycles += CPU.MemSpeed; + GenARM_MOV_Imm(CondAL, rd, ENCODE_IMM(in->PC[1], 0)); + + if (offsetReg == ArgS) + GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.S)); + else + GenARM_LDRH_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(Registers.D)); + + if (offsetReg == ArgX) + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); + else if (offsetReg == ArgY) + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); + + if (offsetReg == ArgX || offsetReg == ArgY || offsetReg == ArgS) + *dcycles += ONE_CYCLE; + + if (read) + { + /* Set OpenBus to in->PC[1] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[1], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); + + if (CheckEmulation() && (offsetReg == ArgX || offsetReg == ArgY)) + { + GenARM_UXTB_Reg(CondAL, rd, rd); + *dcycles += ONE_CYCLE; + } + else + { + GenARM_UXTH_Reg(CondAL, rd, rd); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that is where + * S9XRead / S9xWrite look for address. Modifies the other. Reads also + * write over A4. */ +static void EmitAddrAbsolute(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + *dpc += 2; + *dcycles += CPU.MemSpeedx2; + GenARM_MOV32_Imm(CondAL, scratch, in->PC[1] + (in->PC[2] << 8)); + GenARM_LDR_Imm(CondAL, rd, RegCPU, ICPU_OFFSET(ShiftedDB)); + + if (offsetReg == ArgX) + GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegX); + else if (offsetReg == ArgY) + GenARM_ADD_Reg(CondAL, scratch, scratch, 0, RegY); + + if (read) + { + /* Set OpenBus to in->PC[2] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Reads also write over A4. */ +static void EmitAddrLong(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + *dpc += 3; + *dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; + + GenARM_MOV32_Imm(CondAL, rd, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); + + if (offsetReg == ArgX) + { + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegX); + GenARM_BIC_Imm(CondAL, rd, rd, ENCODE_IMM(0xFF, 24)); + } + + if (read) + { + /* Set OpenBus to in->PC[2] */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Writes over A4. */ +static void EmitAddrIndirect(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + enum ARMReg scratch = read ? RegA2 : RegA1; + enum SNESRegArg ZPArg = ArgNULL; + + if (offsetReg == ArgX) + ZPArg = ArgX; + else if (IS_VADDRMODE(IndirectS)) + ZPArg = ArgS; + + EmitAddrZeroPage(true, in, ZPArg, dpc, dcycles); + + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + if (rd != RegA1) + GenARM_MOV_Reg(CondAL, rd, 0, RegA1); + + if (offsetReg == ArgY) + { + GenARM_ADD_Reg(CondAL, rd, rd, 0, RegY); + } + + if (IS_VADDRMODE(IndirectS)) + *dcycles += ONE_CYCLE; + + GenARM_LDR_Imm(CondAL, scratch, RegCPU, ICPU_OFFSET(ShiftedDB)); + GenARM_ADD_Reg(CondAL, rd, rd, 0, scratch); + + if (read) + { + /* Set OpenBus to last byte fetched */ + GenARM_MOV_Reg(CondAL, RegA4, ENCODE_SHIFT(ShiftLSR, 8), rd); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + } +} + +/* Stores result in A1 for reads, A2 for writes, because that's where + * S9XRead / S9xWrite look for address. Writes over A4. */ +static void EmitAddrIndirectFar(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + enum ARMReg rd = read ? RegA1 : RegA2; + + EmitAddrZeroPage(true, in, ArgNULL, dpc, dcycles); + + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + /* GetFast preserves A4 */ + GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + + if (offsetReg == ArgY) + { + GenARM_ADD_Reg(CondAL, RegA4, RegA4, 0, RegY); + } + + if (read) + { + /* Set OpenBus to last byte fetched */ + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); + } + + GenARM_ADD_Reg(CondAL, rd, RegA4, ENCODE_SHIFT(ShiftLSL, 16), RegA1); +} + +static void EmitGetAddress(bool read, Instruction *in, enum SNESRegArg offsetReg, int *dpc, int *dcycles) +{ + switch(in->AddrMode) + { + case AddrZeroPage8: + case AddrZeroPage16: + EmitAddrZeroPage(read, in, offsetReg, dpc, dcycles); + break; + case AddrAbsolute8: + case AddrAbsolute16: + EmitAddrAbsolute(read, in, offsetReg, dpc, dcycles); + break; + case AddrLong8: + case AddrLong16: + EmitAddrLong(read, in, offsetReg, dpc, dcycles); + break; + case AddrIndirect8: + case AddrIndirect16: + case AddrIndirectX8: + case AddrIndirectX16: + case AddrIndirectY8: + case AddrIndirectY16: + case AddrIndirectS8: + case AddrIndirectS16: + EmitAddrIndirect(read, in, offsetReg, dpc, dcycles); + break; + case AddrIndirectFar8: + case AddrIndirectFar16: + EmitAddrIndirectFar(read, in, offsetReg, dpc, dcycles); + break; + default: + break; + } +} + +/* Returns loaded register, leaves address in RegA2 for storing */ +static enum ARMReg EmitLoadMemRegister(Instruction *in, enum ARMReg RegMEM, bool n8, int *dpc, int *dcycles) +{ + EmitGetAddress(true, in, in->Arg2, dpc, dcycles); + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + GenARM_MOV_Reg(CondAL, RegMEM, 0, RegA1); + + return RegMEM; +} + +/* Assumes address is in RegA2 */ +static void EmitStoreMemRegister(enum ARMReg RegMEM, bool n8) +{ + GenARM_MOV_Reg(CondAL, RegA1, 0, RegMEM); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); +} + +/* Expects Z to be set in ARM flags, HS to be set in ARM if CarryHS is true (SBC, CMP) */ +static void EmitSetFlags(enum ARMReg reg, uint32_t flags, bool n8, bool CarryHS) +{ + if (flags == 0) + return; + + GenARM_BIC_Imm(CondAL, RegP, RegP, flags); + + if (flags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if ((flags & Carry) && CarryHS) + GenARM_ORR_Imm(CondHS, RegP, RegP, Carry); + + if (n8) + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 6)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Carry) && !CarryHS) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); + } + } + else + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(2, 14)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Carry) && !CarryHS) + { + GenARM_TST_Imm(CondAL, reg, ENCODE_IMM(1, 16)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Carry); + } + } +} + +static void EmitLD(enum ARMReg reg, bool n8, int *dpc, int *dcycles) +{ + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(dpc, dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_MOVBS_Reg(CondAL, reg, RegR0, RegR0); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_MOVS_Reg(CondAL, reg, 0, RegR0); + } +} + +static void EmitST(enum SNESRegArg reg, bool n8) +{ + switch(reg) + { + case ArgA: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegA); + break; + case ArgX: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegX); + break; + case ArgY: + GenARM_MOV_Reg(CondAL, RegA1, 0, RegY); + break; + case ArgZ: + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + break; + default: + break; + } + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } +} + +static void EmitBranch(Instruction *in, uint32_t flag, bool set, int *dpc, int *dcycles) +{ + int32_t disp = EmitLoadRelative(in, true, dpc, dcycles); + + EmitFlushCyclesPC(dpc, dcycles); + + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); + /* If condition doesn't match, jump to next instruction */ + if (set) + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + else + GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); +} + +static void EmitNativeCall(Instruction *in) +{ + int dpc = 1; + int dcycles = CPU.MemSpeed; + bool didBreak = false; + bool n8; + + switch(in->Op) + { + CASE_VOP(MOV): + CASE_VOP(MOVSP): + { + enum ARMReg RegSRC; + enum ARMReg RegDST; + n8 = (in->Op == FnMOV8) || (in->Op == FnMOVSP8); + + dcycles += ONE_CYCLE; + RegDST = EmitLoadRegister(in, in->Arg1, RegA2, false); + RegSRC = EmitLoadRegister(in, in->Arg2, RegA3, true); + + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegDST, RegA1, RegSRC); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegDST, RegSRC); + } + + EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); + EmitStoreRegister(in, in->Arg1, RegDST, n8); + EmitChecks(0); + break; + } + CASE_VOP(LD): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnLD8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BIC_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ORR_Imm(CondAL, RegDST, RegDST, ENCODE_IMM(literal, 0)); + + if (in->OutFlags & Zero) + GenARM_ANDS_Imm(CondAL, RegA1, RegDST, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegDST, literal); + if (in->OutFlags & Zero) + GenARM_ANDS_Reg(CondAL, RegDST, RegDST, 0, RegDST); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + EmitLD(RegDST, n8, &dpc, &dcycles); + } + + EmitSetFlags(RegDST, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(ST): + { + n8 = in->Op == FnST8; + + EmitGetAddress(false, in, in->Arg2, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + EmitST(in->Arg1, n8); + EmitChecks(0); + break; + } + CASE_VOP(PUSH): + { + enum ARMReg RegSRC; + n8 = in->Op == FnPUSH8; + + if (in->Arg1 == ArgNULL) + { + RegSRC = EmitLoadMemRegister(in, RegA1, n8, &dpc, &dcycles); + } + else + { + RegSRC = EmitLoadRegister(in, in->Arg1, RegA1, true); + dcycles += ONE_CYCLE; + } + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + if (RegSRC != RegA1) + GenARM_MOV_Reg(CondAL, RegA1, 0, RegSRC); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, n8); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_OP(PEA): + { + uint16_t literal = EmitLoadLiteral(in, false, &dpc, &dcycles); + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + GenARM_MOV32_Imm(CondAL, RegA1, literal); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, false); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_OP(PER): + { + uint16_t literal = (in->PC + 3 - CPU.PCBase + in->PC[1] + (in->PC[2] << 8)) & 0xffff; + dcycles += CPU.MemSpeedx2 + ONE_CYCLE; + dpc += 2; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + GenARM_MOV32_Imm(CondAL, RegA1, literal); + + EmitFlushCyclesPC(&dpc, &dcycles); + EmitPush(in, false); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitChecks(0); + break; + } + CASE_VOP(POP): + { + n8 = in->Op == FnPOP8; + dcycles += TWO_CYCLES; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitPop(in, n8); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->OutFlags & Zero) + GenARM_MOVS_Reg(CondAL, RegA1, 0, RegA1); /* Set ZF on A1 */ + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + EmitStoreRegister(in, in->Arg1, RegA1, n8); + EmitChecks(0); + break; + } + CASE_OP(MVP): + CASE_OP(MVN): + { + uint8_t *start; + dpc += 2; + dcycles += CPU.MemSpeedx2 + TWO_CYCLES; + + /* Load DD */ + /* Set DB / ShiftedDB */ + GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->PC[1], 0)); + GenARM_STRB_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(Registers.DB)); + GenARM_MOV_Reg(CondAL, RegA3, ENCODE_SHIFT(ShiftLSL, 16), RegA3); + GenARM_STR_Imm(CondAL, RegA3, RegCPU, ICPU_OFFSET(ShiftedDB)); + + /* A3 = DD << 16 */ + + /* Load SS */ + /* Set OpenBus to SS */ + GenARM_MOV_Imm(CondAL, RegA4, ENCODE_IMM(in->PC[2], 0)); + GenARM_STRB_Imm(CondAL, RegA4, RegCPU, CPU_OPEN_BUS_OFFSET); + + /* A4 = SS << 16 */ + start = CachePtr; + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->PC[2], 16)); + GenARM_ORR_Reg(CondAL, RegA1, RegA1, 0, RegX); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + + /* SetByte -- A1 val, A2 addr */ + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[1], 16)); + GenARM_ORR_Reg(CondAL, RegA2, RegA2, 0, RegY); + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + + if (in->Op == FnMVN) + { + if (CheckIndex()) + { + GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegX, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegY, RegA1); + } + else + { + GenARM_ADD_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegX, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegY, RegA1); + } + } + else + { + if (CheckIndex()) + { + GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegX, RegA1); + GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_MOVB_Reg(CondAL, RegY, RegA1); + } + else + { + GenARM_SUB_Imm(CondAL, RegA1, RegX, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegX, RegA1); + GenARM_SUB_Imm(CondAL, RegA1, RegY, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegY, RegA1); + } + } + + GenARM_CMP_Imm(CondAL, RegA, ENCODE_IMM(0, 0)); + GenARM_SUB_Imm(CondAL, RegA1, RegA, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegA, RegA1); + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(dpc, 0)); + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(start)); + didBreak = true; + break; + } + CASE_VOP(OR): + { + n8 = in->Op == FnOR8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_ORR_Imm(CondAL, RegA, RegA, ENCODE_IMM(literal, 0)); + GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); + } + else + { + GenARM_MOV32_Imm(CondAL, RegR0, literal); + GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegR0); + GenARM_ANDS_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + EmitSetFlags(RegA2, in->OutFlags & (Zero | Negative), n8, false); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ORRS_Reg(CondAL, RegA, RegA, 0, RegR0); + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + } + } + + EmitChecks(0); + break; + } + CASE_VOP(AND): + { + n8 = in->Op == FnAND8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_ANDS_Imm(CondAL, RegA1, RegA, ENCODE_IMM(literal, 0)); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_ANDS_Reg(CondAL, RegA1, RegA, 0, RegA1); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ANDS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(EOR): + { + n8 = in->Op == FnEOR8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegA, ENCODE_IMM(0xFF, 0)); + GenARM_EORS_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(literal, 0)); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegA2, RegA, ENCODE_IMM(0xFF, 0)); + GenARM_EORS_Reg(CondAL, RegA1, RegA1, 0, RegA2); + GenARM_AND_Imm(CondAL, RegA, RegA, ENCODE_IMM(0xFF, 8)); + GenARM_ORR_Reg(CondAL, RegA, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_EORS_Reg(CondAL, RegA, RegA, 0, RegA1); + } + } + + EmitSetFlags(RegA, in->OutFlags & (Zero | Negative), n8, false); + EmitChecks(0); + break; + } + CASE_VOP(ADC): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnADC8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV32_Imm(CondAL, RegA1, literal); + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } + + GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); + + if (CheckDecimal()) + { + uint32_t shift; + + for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) + { + GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0xA, shift)); + GenARM_ADD_Imm(CondHS, RegA3, RegA3, ENCODE_IMM(0x6, shift)); + } + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + else + { + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + + EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Carry | Overflow), n8, false); + + if (in->OutFlags & Overflow) + { + /* Calculate overflow flag */ + if (CheckDecimal()) { + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, n8 ? 8 : 16)); + GenARM_SUB_Imm(CondGE, RegA3, RegA3, ENCODE_IMM(0x60, n8 ? 0 : 8)); + } + + GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); + GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); + if (n8) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + } + else + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + } + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); + } + + EmitChecks(0); + break; + } + CASE_VOP(SBC): + { + enum ARMReg RegDST = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnSBC8; + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV32_Imm(CondAL, RegA1, literal); + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + else + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + } + + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + if (n8) + GenARM_UXTB_Reg(CondAL, RegA1, RegA1); + else + GenARM_UXTH_Reg(CondAL, RegA1, RegA1); + + GenARM_AND_Imm(CondAL, RegA3, RegP, ENCODE_IMM(Carry, 0)); + + if (CheckDecimal()) + { + uint32_t shift; + + for (shift = 0; shift < (n8 ? 8 : 16) ; shift += 4) + { + GenARM_AND_Imm(CondAL, RegA4, RegA1, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_AND_Imm(CondAL, RegA4, RegDST, ENCODE_IMM(0xF, shift)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA4); + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(0x10, shift)); + GenARM_SUB_Imm(CondLO, RegA3, RegA3, ENCODE_IMM(0x6, shift)); + } + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + else + { + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA1); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA2, RegDST, ENCODE_IMM(0xFF, 0)); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegA2); + GenARM_MOVBS_Reg(CondAL, RegDST, RegA4, RegA3); + } + else + { + GenARM_MOV_Reg(CondAL, RegA2, 0, RegDST); + GenARM_ADD_Reg(CondAL, RegA3, RegA3, 0, RegDST); + GenARM_UXTHS_Reg(CondAL, RegDST, RegA3); + } + } + + EmitSetFlags(RegA3, in->OutFlags & (Zero | Negative | Overflow), n8, false); + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + if (n8) + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 8)); + else + GenARM_CMP_Imm(CondAL, RegA3, ENCODE_IMM(1, 16)); + GenARM_ORR_Imm(CondGE, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + if (in->OutFlags & Overflow) + { + /* Calculate overflow flag */ + GenARM_EOR_Reg(CondAL, RegA1, RegA2, 0, RegA1); + GenARM_MVN_Reg(CondAL, RegA1, 0, RegA1); + GenARM_EOR_Reg(CondAL, RegA2, RegA3, 0, RegA2); + GenARM_AND_Reg(CondAL, RegA1, RegA1, 0, RegA2); + if (n8) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + } + else + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + } + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Overflow, 0)); + } + + EmitChecks(0); + break; + } + CASE_VOP(CMP): + { + enum ARMReg RegW = EmitLoadRegister(in, in->Arg1, RegR0, true); + n8 = in->Op == FnCMP8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_SUBS_Imm(CondAL, RegR0, RegR1, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegR0, literal); + GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); + } + } + else + { + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegR1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_SUBS_Reg(CondAL, RegR0, RegR1, 0, RegR0); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_SUBS_Reg(CondAL, RegR0, RegW, 0, RegR0); + } + } + + EmitSetFlags(RegR0, in->OutFlags & (Carry | Zero | Negative), n8, true); + EmitChecks(0); + break; + } + CASE_VOP(BIT): + { + n8 = in->Op == FnBIT8; + + if (IS_VADDRMODE(Immediate)) + { + uint16_t literal = EmitLoadLiteral(in, n8, &dpc, &dcycles); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_TST_Imm(CondAL, RegA, ENCODE_IMM(literal, 0)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, literal); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + + EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); + } + else + { + uint32_t flags; + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + } + + flags = in->OutFlags & (Zero | Negative | Overflow); + + if (flags == 0) + return; + + GenARM_BIC_Imm(CondAL, RegP, RegP, flags); + + if (flags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if (n8) + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 0)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Overflow)) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 0)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); + } + } + else + { + if (flags & Negative) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x80, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Negative); + } + + if ((flags & Overflow)) + { + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(0x40, 8)); + GenARM_ORR_Imm(CondNE, RegP, RegP, Overflow); + } + } + } + + EmitChecks(0); + break; + } + CASE_VOP(INC): + CASE_VOP(DEC): + { + enum ARMReg RegW; + n8 = in->Op == FnINC8 || in->Op == FnDEC8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA2, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + + /* Clear WaitAddress */ + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_SHIFT(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + + EmitFlushCyclesPC(&dpc, &dcycles); + + if (IS_VOP(INC)) + GenARM_ADD_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); + else + GenARM_SUB_Imm(CondAL, RegA1, RegW, ENCODE_IMM(1, 0)); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA1, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(TSB): + CASE_VOP(TRB): + { + n8 = in->Op == FnTSB8 || in->Op == FnTRB8; + EmitGetAddress(true, in, in->Arg2, &dpc, &dcycles); + + dcycles += ONE_CYCLE; + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_AND_Imm(CondAL, RegA4, RegA, ENCODE_IMM(0xFF, 0)); + + GenARM_TST_Reg(CondAL, RegA4, 0, RegA1); + EmitSetFlags(RegA4, in->OutFlags & (Zero), n8, false); + + if (IS_VOP(TSB)) + { + GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + else + { + GenARM_MVN_Reg(CondAL, RegA4, 0, RegA4); + GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + GenARM_BL(CondAL, PCOFFSET(S9xSetByteFast)); + } + else + { + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_TST_Reg(CondAL, RegA, 0, RegA1); + EmitSetFlags(RegA, in->OutFlags & (Zero), n8, false); + + if (IS_VOP(TSB)) + { + GenARM_ORR_Reg(CondAL, RegA1, RegA, 0, RegA1); + } + else + { + GenARM_MVN_Reg(CondAL, RegA4, 0, RegA); + GenARM_AND_Reg(CondAL, RegA1, RegA4, 0, RegA1); + } + GenARM_BL(CondAL, PCOFFSET(S9xSetWordFast)); + } + + EmitChecks(0); + break; + } + CASE_VOP(ASL): + { + enum ARMReg RegW; + n8 = in->Op == FnASL8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Carry | Zero | Negative), CheckMemory(), false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(LSR): + { + enum ARMReg RegW; + n8 = in->Op == FnLSR8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA3, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA3, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); + GenARM_BIC_Imm(CondAL, RegW, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_ORR_Reg(CondAL, RegW, RegW, 0, RegA1); + } + else + { + GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); + } + + /* Flags easier to set inline here */ + GenARM_BIC_Imm(CondAL, RegP, RegP, in->OutFlags & (Carry | Zero | Negative)); + + /* Negative is always unset */ + if (in->OutFlags & Zero) + GenARM_ORR_Imm(CondEQ, RegP, RegP, Zero); + + if (in->OutFlags & Carry) + GenARM_ORR_Imm(CondCS, RegP, RegP, Carry); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(ROL): + { + enum ARMReg RegW; + n8 = in->Op == FnROL8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_MOV_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSL, 1), RegW); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 0)); + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + GenARM_TST_Imm(CondAL, RegA1, ENCODE_IMM(1, n8 ? 8 : 16)); + GenARM_ORR_Imm(CondNE, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + if (n8) + { + GenARM_MOVBS_Reg(CondAL, RegW, RegA4, RegA1); + } + else + { + GenARM_UXTHS_Reg(CondAL, RegW, RegA1); + } + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_VOP(ROR): + { + enum ARMReg RegW; + n8 = in->Op == FnROR8; + + if (in->Arg1 == ArgNULL) + RegW = EmitLoadMemRegister(in, RegA4, n8, &dpc, &dcycles); + else + RegW = EmitLoadRegister(in, in->Arg1, RegA4, true); + + /* Add cycles */ + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (n8) + { + GenARM_AND_Imm(CondAL, RegA1, RegW, ENCODE_IMM(0xFF, 0)); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA1, RegA1, ENCODE_IMM(1, 8)); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 1), RegA1); + GenARM_MOVB_Reg(CondAL, RegW, RegA1); + } + else + { + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegW, RegW, ENCODE_IMM(1, 16)); + GenARM_MOVS_Reg(CondAL, RegW, ENCODE_SHIFT(ShiftLSR, 1), RegW); + } + + if (in->OutFlags & Carry) + { + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondCS, RegP, RegP, ENCODE_IMM(Carry, 0)); + } + + EmitSetFlags(n8 ? RegA1 : RegW, in->OutFlags & (Zero | Negative), n8, false); + + if (in->Arg1 == ArgNULL) + EmitStoreMemRegister(RegW, n8); + + EmitChecks(0); + break; + } + CASE_OP(BRA): + CASE_OP(BRL): + { + int32_t disp; + n8 = in->Op == FnBRA; + disp = EmitLoadRelative(in, n8, &dpc, &dcycles); + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->Op == FnBRA && Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (in->Op == FnBRA && Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + + CASE_OP(JMP): + CASE_OP(JML): + { + switch(in->AddrMode) + { + case AddrImmediate16: + dcycles += CPU.MemSpeedx2; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8) | ICPU.ShiftedPB); + + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + break; + case AddrImmediate24: + dcycles += CPU.MemSpeedx2 + CPU.MemSpeed; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); + EmitStoreRegister(in, ArgPB, RegA2, true); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(in->PC[3], 16)); + + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + break; + case AddrAbsolute16: + dcycles += CPU.MemSpeedx2; + dpc += 2; + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + + if (in->Arg2 == ArgX) + { + GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); + GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + dcycles += ONE_CYCLE; + } + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + if (in->Op == FnJMP) + { + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + else + { + GenARM_MOV_Reg(CondAL, RegA4, 0, RegA1); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(2, 0)); + GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(3, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetByteFast)); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ORR_Reg(CondAL, RegA1, RegA4, 0, RegA2); + } + + break; + default: + break; + } + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->AddrMode == AddrImmediate24 || in->Op == FnJML) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + if (in->AddrMode == AddrImmediate16 && Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + + EmitChecks(0); + + if (in->AddrMode == AddrImmediate16 || in->AddrMode == AddrImmediate24) + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + else + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + + didBreak = true; + break; + } + CASE_OP(JSR): + CASE_OP(JSL): + { + if (in->Op == FnJSL) + { + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(ICPU.Registers.PB, 0)); + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + dcycles += CPU.MemSpeed * 3; + dpc += 3; + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + + EmitPush(in, true); + } + else + { + dcycles += CPU.MemSpeedx2 + ONE_CYCLE; + dpc += 2; + /* Only flush cycles because PC will change */ + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + } + + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); + GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(dpc - 1, 0)); + GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); + + EmitPush(in, false); + + /* Load new PC */ + if (in->Op == FnJSL) + { + GenARM_MOV_Imm(CondAL, RegA2, ENCODE_IMM(in->PC[3], 0)); + GenARM_STRB_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA2); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] + (in->PC[2] << 8) + (in->PC[3] << 16)); + } + else if (in->Arg2 == ArgNULL) + { + GenARM_MOV32_Imm(CondAL, RegA2, in->PC[1] + (in->PC[2] << 8)); + GenARM_ADD_Imm(CondAL, RegA1, RegA2, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + else + { + GenARM_MOV32_Imm(CondAL, RegA1, in->PC[1] | (in->PC[2] << 8)); + /* Set OpenBus to PC[2] */ + + GenARM_ADD_Reg(CondAL, RegA1, RegA1, 0, RegX); + GenARM_BIC_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(0xFF, 16)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + + /* Update cycles / cpu for slow lookup */ + GenARM_MOV_Reg(CondAL, RegA2, 0, RegCPUPC); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->Op == FnJSL) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + if (in->Arg2 == ArgNULL) + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + else + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(RTI): + { + uint8_t *branch1; + dcycles += ONE_CYCLE * 2; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + EmitPop(in, true); + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitStoreRegister(in, ArgP, RegA1, false); + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + EmitPop(in, false); + GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* RegCPU will be overwritten later */ + + /* Ignore pop PB if Emulation is set */ + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Emulation >> 8, 8)); + branch1 = CachePtr; + GenARM_B(CondNE, PCOFFSET(branch1)); + + EmitPop(in, true); + + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + + PatchJump((uint32_t *)branch1, CachePtr); + + GenARM_LDR_Imm(CondNE, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(RTL): + CASE_OP(RTS): + { + dcycles += ONE_CYCLE * 2; + if (in->Op == FnRTS) + dcycles += ONE_CYCLE; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(dcycles, 0)); + EmitPop(in, false); + + GenARM_ADD_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(1, 0)); + GenARM_UXTH_Reg(CondAL, RegCPUPC, RegA1); /* No need to preserve RegCPU. It will be overwritten later, and this should never be a WaitAddress */ + + if (in->Op == FnRTL) + { + EmitPop(in, true); + GenARM_MOV_Reg(CondAL, RegA2, ENCODE_SHIFT(ShiftLSL, 16), RegA1); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(ShiftedPB)); + GenARM_ADD_Reg(CondAL, RegA1, RegCPUPC, 0, RegA2); + } + else + { + GenARM_ADD_Imm(CondAL, RegA1, RegCPUPC, ENCODE_IMM(ICPU.Registers.PB, 16)); + } + + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + if (in->Op == FnRTL) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(BPL): + EmitBranch(in, Negative, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BMI): + EmitBranch(in, Negative, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BVC): + EmitBranch(in, Overflow, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BVS): + EmitBranch(in, Overflow, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BCC): + EmitBranch(in, Carry, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BCS): + EmitBranch(in, Carry, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BZC): + EmitBranch(in, Zero, false, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BZS): + EmitBranch(in, Zero, true, &dpc, &dcycles); + didBreak = true; + break; + CASE_OP(BRK): + CASE_OP(COP): + { + uint32_t destPC; + dcycles += ONE_CYCLE; + + GenARM_LDRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + if (!CheckEmulation()) + { + GenARM_LDRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + EmitPush(in, true); + dcycles += ONE_CYCLE; + } + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(PCBase)); + + if (!CheckEmulation()) + { + GenARM_ADD_Imm(CondAL, RegA3, RegCPUPC, ENCODE_IMM(1, 0)); + } + + GenARM_SUB_Reg(CondAL, RegA1, RegA3, 0, RegA1); + + EmitPush(in, false); + + GenARM_AND_Imm(CondAL, RegA1, RegP, ENCODE_IMM(0xFF, 0)); + EmitPush(in, true); + GenARM_STRH_Imm(CondAL, RegA4, RegCPU, ICPU_OFFSET(Registers.S)); + + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OPEN_BUS_OFFSET); + + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(Decimal, 0)); + GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(IRQ, 0)); + GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(Registers.PB)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, ICPU_OFFSET(ShiftedPB)); + + if (in->Op == FnBRK) + destPC = CheckEmulation() ? 0xFFFE : 0xFFE6; + else + destPC = CheckEmulation() ? 0xFFF4 : 0xFFE4; + + GenARM_MOV32_Imm(CondAL, RegA1, destPC); + GenARM_SUB_Imm(CondAL, RegA2, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xGetWordFast)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallSetPCBase)); + GenARM_LDR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PC)); + + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(CLC): + CASE_OP(CLI): + CASE_OP(CLD): + CASE_OP(CLV): + CASE_OP(SEC): + CASE_OP(SEI): + CASE_OP(SED): + { + uint16_t flag; + bool update = false; + + switch(in->Op) + { + CASE_OP(SEC): + CASE_OP(CLC): + flag = Carry; + break; + CASE_OP(SEI): + CASE_OP(CLI): + flag = IRQ; + update = true; + break; + CASE_OP(SED): + CASE_OP(CLD): + flag = Decimal; + update = true; + break; + CASE_OP(CLV): + flag = Overflow; + break; + default: + fprintf(stderr, "Invalid opcode: %X\n", in->Opcode); + break; + } + + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (in->Op == FnCLC || in->Op == FnCLD || in->Op == FnCLI || in->Op == FnCLV) + GenARM_BIC_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); + else + GenARM_ORR_Imm(CondAL, RegP, RegP, ENCODE_IMM(flag, 0)); + + if (update) + GenARM_STRH_Imm(CondAL, RegP, RegCPU, ICPU_OFFSET(Registers.P)); + + EmitChecks(0); + + if (in->SectionUpdate) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + if (in->ShouldBreak) + { + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + } + break; + } + CASE_OP(SEP): + CASE_OP(REP): + { + dpc++; + dcycles += CPU.MemSpeed + ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (IS_OP(REP)) + GenARM_BIC_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); + else + GenARM_ORR_Imm(CondAL, RegA1, RegP, ENCODE_IMM(in->PC[1], 0)); + + EmitStoreRegister(in, ArgP, RegA1, false); + + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(XCE): + { + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BIC_Imm(CondAL, RegA2, RegP, ENCODE_IMM(Emulation >> 8, 8)); + GenARM_BIC_Imm(CondAL, RegA2, RegA2, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Reg(CondAL, RegA2, RegA2, ENCODE_SHIFT(ShiftLSR, 8), RegP); + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Carry, 0)); + GenARM_ORR_Imm(CondNE, RegA2, RegA2, ENCODE_IMM(Emulation >> 8, 8)); + + EmitStoreRegister(in, ArgP, RegA2, false); + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpIndirect)); + didBreak = true; + break; + } + CASE_OP(STP): + { +#ifdef NO_SPEEDHACKS + dpc--; + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); + GenARM_ORR_Imm(CondAL, RegA1, RegA1, ENCODE_IMM(DEBUG_MODE_FLAG, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(Flags)); +#else + int8_t disp = (in->PC[1] & 0x7F) | ((in->PC[1] & 0x40) << 1); + uint32_t pc = in->PC + 2 - CPU.PCBase; + uint32_t target = (pc + disp) & 0xFFFF; + bool overflow = target != (pc + disp); + dpc++; + + if (overflow) + disp = (target - pc); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(Zero, 0)); + + if (in->PC[1] & 0x80) + GenARM_BL(CondEQ, PCOFFSET(JumpDirectChecks)); + else + GenARM_BL(CondNE, PCOFFSET(JumpDirectChecks)); + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } +#endif + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(XBA): + { + dcycles += TWO_CYCLES; + EmitFlushCyclesPC(&dpc, &dcycles); + GenARM_MOVS_Reg(CondAL, RegA1, ENCODE_SHIFT(ShiftLSR, 8), RegA); + GenARM_MOV_Reg(CondAL, RegA, ENCODE_SHIFT(ShiftLSL, 24), RegA); + GenARM_ORR_Reg(CondAL, RegA, RegA1, ENCODE_SHIFT(ShiftLSR, 16), RegA); + + EmitSetFlags(RegA1, in->OutFlags & (Zero | Negative), true, false); + EmitChecks(0); + break; + } + CASE_OP(WAI): + { + uint8_t *loop; + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(1, 0)); + GenARM_STRB_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitingForInterrupt)); + dpc--; + loop = CachePtr; + EmitFlushCyclesPC(&dpc, &dcycles); + + if (Settings.Shutdown) { + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + } + + EmitChecks(CheckWFI); + GenARM_B(CondAL, PCOFFSET(loop)); + break; + } + CASE_OP(WDM): + { +#ifndef NO_SPEEDHACKS + int8_t disp = 0xF0 | (in->PC[1] & 0xF); + uint32_t pc = in->PC + 2 - CPU.PCBase; + uint32_t target = (pc + disp) & 0xFFFF; + bool overflow = target != (pc + disp); + uint8_t flag; + uint8_t skip; + dpc++; + + if (overflow) + disp = (target - pc); + + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(0, 0)); + GenARM_STR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + EmitFlushCyclesPC(&dpc, &dcycles); + + GenARM_BL(CondAL, PCOFFSET(S9xCallWAIShutdown)); + + switch(in->PC[1] & 0xF0) + { + case 0x10: /* BPL */ + flag = Negative; + skip = CondNE; + break; + case 0x30: /* BMI */ + flag = Negative; + skip = CondEQ; + break; + case 0x50: /* BVC */ + flag = Overflow; + skip = CondNE; + break; + case 0x70: /* BVS */ + flag = Overflow; + skip = CondEQ; + break; + case 0x80: /* BRA */ + flag = 0; + break; + case 0x90: /* BCC */ + flag = Carry; + skip = CondNE; + break; + case 0xB0: /* BCS */ + flag = Carry; + skip = CondEQ; + break; + case 0xD0: /* BNE */ + flag = Zero; + skip = CondNE; + break; + case 0xF0: /* BEQ */ + flag = Zero; + skip = CondEQ; + break; + default: + didBreak = true; + break; + } + + if (!didBreak) + { + /* Interpreter runs BranchCheck here. Only when APU is disabled + * until next reset. So cost of the load seems not worth it, unless + * games break. */ + + if (flag) + { + GenARM_TST_Imm(CondAL, RegP, ENCODE_IMM(flag, 0)); + GenARM_BL(skip, PCOFFSET(JumpDirectChecks)); + } + + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(ONE_CYCLE, 0)); + + if (Settings.Shutdown) + { + GenARM_LDR_Imm(CondAL, RegA1, RegCPU, CPU_OFFSET(WaitAddress)); + } + + if (disp > 0) + { + if (disp & 0xFF) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + else if (disp < 0) + { + disp = -disp; + if (disp & 0xFF) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp & 0xFF, 0)); + + if (disp & 0xFF00) + GenARM_SUB_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(disp >> 8 & 0xFF, 8)); + } + + if (Settings.Shutdown) { + GenARM_CMP_Reg(CondAL, RegA1, 0, RegCPUPC); + GenARM_BL(CondEQ, PCOFFSET(S9xCallCPUShutdown)); + } + } + +#endif + EmitChecks(0); + GenARM_BL(CondAL, PCOFFSET(JumpDirect)); + didBreak = true; + break; + } + CASE_OP(NOP): + { + dcycles += ONE_CYCLE; + EmitFlushCyclesPC(&dpc, &dcycles); + EmitChecks(0); + break; + } + } + + if (in->ShouldBreak && !didBreak) + { + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + } +} + +static void EmitS9xCall(Instruction *in) +{ + /* CPU.PCAtOpcodeStart = CPU.PC; */ + /* CPU.Cycles += CPU.MemSpeed; */ + /* CPU.PC++; */ + + GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); + + GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); + + GenARM_BL(CondAL, PCOFFSET((uintptr_t)in->S9xOpcode)); + + if (in->SectionUpdate) + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); + + EmitChecks(0); + + if (in->ShouldBreak) + { + GenARM_B(CondAL, PCOFFSET(JumpIndirect)); + } +} + +static void EmitInterpreterLoop(Instruction *in) +{ + /* CPU.PCAtOpcodeStart = CPU.PC; */ + /* CPU.Cycles += CPU.MemSpeed; */ + /* CPU.PC++; */ + uint8_t *start = CachePtr; + + GenARM_LDRB_Imm(CondAL, RegA1, RegCPUPC, 0); + GenARM_STR_Imm(CondAL, RegCPUPC, RegCPU, CPU_OFFSET(PCAtOpcodeStart)); + GenARM_LDR_Imm(CondAL, RegA2, RegCPU, ICPU_OFFSET(S9xOpcodes)); + GenARM_ADD_Imm(CondAL, RegCPUPC, RegCPUPC, ENCODE_IMM(1, 0)); + GenARM_ADD_Imm(CondAL, RegCycles, RegCycles, ENCODE_IMM(CPU.MemSpeed, 0)); + GenARM_LDR_Reg(CondAL, RegA2, RegA2, 1, ENCODE_SHIFT(ShiftLSL, 2), RegA1); + + GenARM_BL(CondAL, PCOFFSET(S9xRegsToMem)); + + GenARM_BLX_Reg(CondAL, RegA2); + GenARM_BL(CondAL, PCOFFSET(S9xCallUpdateSection)); + + GenARM_BL(CondAL, PCOFFSET(S9xMemToRegs)); + + EmitChecks(0); + + GenARM_B(CondAL, PCOFFSET(start)); +} + +static void EmitOne(Instruction *in) +{ + +#ifndef NDEBUG + GenARM_MOV_Imm(CondAL, RegA1, ENCODE_IMM(in->Opcode, 0)); + GenARM_MOV32_Imm(CondAL, RegA2, (uintptr_t)in->PC); + GenARM_MOV_Imm(CondAL, RegA3, ENCODE_IMM(in->Emitter == EmitS9xCall, 0)); + GenARM_BL(CondAL, PCOFFSET(S9xCallCheckInstruction)); +#endif + + /* Call Opcode */ + in->Emitter(in); +} + +#define SizeNone 0 +#define SizeRegister8 0 +#define SizeRegister16 0 +#define SizeImmediate8 1 +#define SizeImmediate16 2 +#define SizeImmediate24 3 +#define SizeZeroPage8 1 +#define SizeZeroPage16 1 +#define SizeIndirect8 1 +#define SizeIndirect16 1 +#define SizeIndirectX8 1 +#define SizeIndirectX16 1 +#define SizeIndirectY8 1 +#define SizeIndirectY16 1 +#define SizeIndirectS8 1 +#define SizeIndirectS16 1 +#define SizeIndirectFar8 1 +#define SizeIndirectFar16 1 +#define SizeAbsolute8 2 +#define SizeAbsolute16 2 +#define SizeLong8 3 +#define SizeLong16 3 + +#define OPSIZE(cond, t, f) cond ? Size##t : Size##f +#define OPADDRMODE(cond, t, f) cond ? Addr##t : Addr##f +#define OPFN(cond, t, f) cond ? Fn##t : Fn##f + +#define OP(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ + case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitNativeCall; in.ShouldBreak = (flags) & OFBreak; in.SectionUpdate = (flags) & OFSectionUpdate; in.Op = OPFN(op); in.AddrMode = OPADDRMODE(addrmode); in.Arg1 = Arg##arg1; in.Arg2 = Arg##arg2; in.OutFlags = gen; in.InFlags = used; break; + +#define OPD(opcode, addrmode, op, arg1, arg2, gen, used, flags) \ + case 0x##opcode: pc += OPSIZE(addrmode); in.Emitter = EmitS9xCall; in.S9xOpcode = ICPU.S9xOpcodes[0x##opcode].S9xOpcode; in.SectionUpdate = (flags) & OFSectionUpdate; in.ShouldBreak = (flags) & OFBreak; in.OutFlags = gen; in.InFlags = used; break; + +static size_t ParseInstructions(uint8_t *pc, Instruction *instructions, size_t max) +{ + bool shouldBreak; + int i; + + for (i = 0, shouldBreak = false; i < max && !shouldBreak; i++) + { + uint8_t opcode; + Instruction in = {0}; + + in.PC = pc; + +#include "arm_dynarec/opgen.h" + + in.Opcode = opcode; + + instructions[i] = in; + shouldBreak = in.ShouldBreak; + } + + if (i > 0) + instructions[i - 1].ShouldBreak = true; + + return i; +} + +static void ScanInstructions(Instruction *instructions, size_t length) +{ + uint16_t CurrentFlags = (Negative | Zero | Carry | Overflow); + uint16_t i; + + if (length < 2) + return; + + /* Dead flag elimination */ + for (i = length - 1; i > 0; i--) + { + Instruction *in = &instructions[i - 1]; + Instruction *next = &instructions[i]; + + CurrentFlags = CurrentFlags | next->InFlags; + in->OutFlags = in->OutFlags & CurrentFlags; + CurrentFlags = CurrentFlags & ~in->OutFlags; + } +} + +static void Emit(uint16_t pc, uint8_t **block) +{ + uint8_t *start = CachePtr; + uint8_t *StartPC = pc + CPU.PCBase; + uint32_t address = pc | ICPU.ShiftedPB; + int32_t MemoryBlock = (address >> MEMMAP_SHIFT) & MEMMAP_MASK; + + Instruction instructions[100]; + size_t length; + int i; + + if (Memory.BlockIsRAM[MemoryBlock]) + { + /* Code in RAM, for now just run interpreter until interrupt */ + Instruction in = { 0 }; + instructions[0] = in; + EmitInterpreterLoop(&instructions[0]); + } + else + { + length = ParseInstructions(StartPC, instructions, sizeof(instructions) / sizeof(instructions[0])); + ScanInstructions(instructions, length); + + for (i = 0; i < length; i++) + EmitOne(&instructions[i]); + } + + __clear_cache(start, CachePtr); + *block = start; +} + +uint8_t **FetchBlock(uint16_t pc) +{ + uint8_t **block = FindBlock(pc); +#ifndef NDEBUG + Metrics.Finds++; + Metrics.InterpretedBlock = false; + + if (!*block) + Metrics.Emits++; +#endif + if (!*block) + Emit(pc, block); + +#ifndef NDEBUG + trace[traceidx++] = (CacheSection << 16) | pc; + trace[traceidx++] = (uintptr_t)*block; + traceidx %= 128; +#endif + + return block; +} + +void S9xMainLoop_Dyna(void) +{ + UpdateSection(); + +#ifdef LAGFIX + do + { +#endif + uint8_t **block = FetchBlock(CPU.PC - CPU.PCBase); + BlockEnter(*block); + + ICPU.Registers.PC = CPU.PC - CPU.PCBase; +#ifndef USE_BLARGG_APU + IAPU.Registers.PC = IAPU.PC - IAPU.RAM; +#endif + +#ifdef LAGFIX + if(!finishedFrame) + { +#endif + S9xPackStatus(); +#ifndef USE_BLARGG_APU + S9xAPUPackStatus(); +#endif + CPU.Flags &= ~SCAN_KEYS_FLAG; +#ifdef LAGFIX + } + else + { + finishedFrame = false; + break; + } + } while(!finishedFrame); +#endif +} + +int DynaInit(void) +{ + static bool inited = false; + + if (!inited) + { + inited = true; +#ifndef NDEBUG + DynaBreak(); +#endif + + Cache = MapRWX((void *)((uintptr_t)DynaInit), BUFFER_SIZE + BUFFER_EXTRA); + + if (!Cache) + return -1; + } + + return 0; +} + +int DynaReset(void) +{ + if (DynaInit()) + return -1; + + printf("Starting dynarec\n"); + CacheEmpty(); + return 0; +} diff --git a/source/arm_dynarec/dynaexec.h b/source/arm_dynarec/dynaexec.h new file mode 100644 index 0000000..b8a807d --- /dev/null +++ b/source/arm_dynarec/dynaexec.h @@ -0,0 +1,14 @@ +#ifndef DYNAEXEC_H +#define DYNAEXEC_H + +#include <stdint.h> +#include <stddef.h> + +void S9xMainLoop_Dyna(); +int DynaInit(void); +int DynaReset(void); +void DynaBreak(void); + +void *MapRWX(void *target, size_t size); + +#endif /* DYNAEXEC_H */ diff --git a/source/arm_dynarec/map_rwx.c b/source/arm_dynarec/map_rwx.c new file mode 100644 index 0000000..d4373f7 --- /dev/null +++ b/source/arm_dynarec/map_rwx.c @@ -0,0 +1,16 @@ +#define _GNU_SOURCE /* MAP_ANONYMOUS */ + +#include <sys/mman.h> +#include <stdlib.h> + +void *MapRWX(void *target, size_t size) { + void *buf = mmap(target, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); + + if (buf == MAP_FAILED) + { + perror("mmap"); + return NULL; + } + + return buf; +} diff --git a/source/arm_dynarec/opdef.h b/source/arm_dynarec/opdef.h new file mode 100644 index 0000000..80606d8 --- /dev/null +++ b/source/arm_dynarec/opdef.h @@ -0,0 +1,281 @@ +OP (A8, XF(Register), XF(MOV), Y, A, NZ, 0, OFNone); +OP (AA, XF(Register), XF(MOV), X, A, NZ, 0, OFNone); +OP (BA, XF(Register), XF(MOV), X, S, NZ, 0, OFNone); +OP (98, MF(Register), MF(MOV), A, Y, NZ, 0, OFNone); +OP (8A, MF(Register), MF(MOV), A, X, NZ, 0, OFNone); +OP (9A, EF(Register), EF(MOVSP), S, X, 0, 0, OFNone); +OP (9B, XF(Register), XF(MOV), Y, X, NZ, 0, OFNone); +OP (BB, XF(Register), XF(MOV), X, Y, NZ, 0, OFNone); +OP (7B, NF16(Register), NF16(MOV), A, D, NZ, 0, OFNone); +OP (5B, NF16(Register), NF16(MOV), D, A, NZ, 0, OFNone); +OP (3B, NF16(Register), NF16(MOV), A, S, NZ, 0, OFNone); +OP (1B, EF(Register), EF(MOVSP), S, A, 0, 0, OFNone); + +OP (A9, MF(Immediate), MF(LD), A, NULL, NZ, 0, OFNone); +OP (A5, MF(ZeroPage), MF(LD), A, NULL, NZ, 0, OFNone); +OP (B5, MF(ZeroPage), MF(LD), A, X, NZ, 0, OFNone); +OP (A3, MF(ZeroPage), MF(LD), A, S, NZ, 0, OFNone); +OP (AD, MF(Absolute), MF(LD), A, NULL, NZ, 0, OFNone); +OP (BD, MF(Absolute), MF(LD), A, X, NZ, 0, OFNone); +OP (B9, MF(Absolute), MF(LD), A, Y, NZ, 0, OFNone); +OP (AF, MF(Long), MF(LD), A, NULL, NZ, 0, OFNone); +OP (BF, MF(Long), MF(LD), A, X, NZ, 0, OFNone); +OP (B2, MF(Indirect), MF(LD), A, NULL, NZ, 0, OFNone); +OP (A1, MF(IndirectX), MF(LD), A, X, NZ, 0, OFNone); +OP (B1, MF(IndirectY), MF(LD), A, Y, NZ, 0, OFNone); +OP (B3, MF(IndirectS), MF(LD), A, Y, NZ, 0, OFNone); +OP (A7, MF(IndirectFar), MF(LD), A, NULL, NZ, 0, OFNone); +OP (B7, MF(IndirectFar), MF(LD), A, Y, NZ, 0, OFNone); +OP (A2, XF(Immediate), XF(LD), X, NULL, NZ, 0, OFNone); +OP (A6, XF(ZeroPage), XF(LD), X, NULL, NZ, 0, OFNone); +OP (B6, XF(ZeroPage), XF(LD), X, Y, NZ, 0, OFNone); +OP (AE, XF(Absolute), XF(LD), X, NULL, NZ, 0, OFNone); +OP (BE, XF(Absolute), XF(LD), X, Y, NZ, 0, OFNone); +OP (A0, XF(Immediate), XF(LD), Y, NULL, NZ, 0, OFNone); +OP (A4, XF(ZeroPage), XF(LD), Y, NULL, NZ, 0, OFNone); +OP (B4, XF(ZeroPage), XF(LD), Y, X, NZ, 0, OFNone); +OP (AC, XF(Absolute), XF(LD), Y, NULL, NZ, 0, OFNone); +OP (BC, XF(Absolute), XF(LD), Y, X, NZ, 0, OFNone); + +OP (64, MF(ZeroPage), MF(ST), Z, NULL, 0, 0, OFNone); +OP (74, MF(ZeroPage), MF(ST), Z, X, 0, 0, OFNone); +OP (9C, MF(Absolute), MF(ST), Z, NULL, 0, 0, OFNone); +OP (9E, MF(Absolute), MF(ST), Z, X, 0, 0, OFNone); +OP (85, MF(ZeroPage), MF(ST), A, NULL, 0, 0, OFNone); +OP (95, MF(ZeroPage), MF(ST), A, X, 0, 0, OFNone); +OP (83, MF(ZeroPage), MF(ST), A, S, 0, 0, OFNone); +OP (8D, MF(Absolute), MF(ST), A, NULL, 0, 0, OFNone); +OP (9D, MF(Absolute), MF(ST), A, X, 0, 0, OFNone); +OP (99, MF(Absolute), MF(ST), A, Y, 0, 0, OFNone); +OP (8F, MF(Long), MF(ST), A, NULL, 0, 0, OFNone); +OP (9F, MF(Long), MF(ST), A, X, 0, 0, OFNone); +OP (81, MF(IndirectX), MF(ST), A, X, 0, 0, OFNone); +OP (91, MF(IndirectY), MF(ST), A, Y, 0, 0, OFNone); +OP (92, MF(Indirect), MF(ST), A, NULL, 0, 0, OFNone); +OP (93, MF(IndirectS), MF(ST), A, Y, 0, 0, OFNone); +OP (87, MF(IndirectFar), MF(ST), A, NULL, 0, 0, OFNone); +OP (97, MF(IndirectFar), MF(ST), A, Y, 0, 0, OFNone); +OP (86, XF(ZeroPage), XF(ST), X, NULL, 0, 0, OFNone); +OP (96, XF(ZeroPage), XF(ST), X, Y, 0, 0, OFNone); +OP (8E, XF(Absolute), XF(ST), X, NULL, 0, 0, OFNone); +OP (84, XF(ZeroPage), XF(ST), Y, NULL, 0, 0, OFNone); +OP (94, XF(ZeroPage), XF(ST), Y, X, 0, 0, OFNone); +OP (8C, XF(Absolute), XF(ST), Y, NULL, 0, 0, OFNone); + +OP (48, MF(Register), MF(PUSH), A, NULL, 0, 0, OFNone); +OP (DA, XF(Register), XF(PUSH), X, NULL, 0, 0, OFNone); +OP (5A, XF(Register), XF(PUSH), Y, NULL, 0, 0, OFNone); +OP (08, NF8(Register), NF8(PUSH), P, NULL, 0, NZCV, OFNone); +OP (8B, NF8(Register), NF8(PUSH), DB, NULL, 0, 0, OFNone); +OP (4B, NF8(Register), NF8(PUSH), PB, NULL, 0, 0, OFNone); +OP (0B, NF16(Register), NF16(PUSH), D, NULL, 0, 0, OFNone); +OP (D4, NF16(ZeroPage), NF16(PUSH), NULL, NULL, 0, 0, OFNone); +OP (F4, NF16(Immediate), NF(PEA), NULL, NULL, 0, 0, OFNone); +OP (62, NF16(Immediate), NF(PER), NULL, NULL, 0, 0, OFNone); +OP (68, MF(Register), MF(POP), A, NULL, NZ, 0, OFNone); +OP (FA, XF(Register), XF(POP), X, NULL, NZ, 0, OFNone); +OP (7A, XF(Register), XF(POP), Y, NULL, NZ, 0, OFNone); +OP (2B, NF16(Register), NF16(POP), D, NULL, NZ, 0, OFNone); +OP (AB, NF8(Register), NF8(POP), DB, NULL, NZ, 0, OFNone); +OP (28, NF8(Register), NF8(POP), P, NULL, NZCV, 0, OFBreak | OFSectionUpdate); + +OP (44, NF16(Immediate), NF(MVP), NULL, NULL, 0, NZCV, OFBreak); +OP (54, NF16(Immediate), NF(MVN), NULL, NULL, 0, NZCV, OFBreak); + +OP (09, MF(Immediate), MF(OR), A, NULL, NZ, 0, OFNone); +OP (05, MF(ZeroPage), MF(OR), A, NULL, NZ, 0, OFNone); +OP (15, MF(ZeroPage), MF(OR), A, X, NZ, 0, OFNone); +OP (0D, MF(Absolute), MF(OR), A, NULL, NZ, 0, OFNone); +OP (1D, MF(Absolute), MF(OR), A, X, NZ, 0, OFNone); +OP (19, MF(Absolute), MF(OR), A, Y, NZ, 0, OFNone); +OP (01, MF(IndirectX), MF(OR), A, X, NZ, 0, OFNone); +OP (11, MF(IndirectY), MF(OR), A, Y, NZ, 0, OFNone); +OP (12, MF(Indirect), MF(OR), A, NULL, NZ, 0, OFNone); +OP (03, MF(ZeroPage), MF(OR), A, S, NZ, 0, OFNone); +OP (13, MF(IndirectS), MF(OR), A, Y, NZ, 0, OFNone); +OP (07, MF(IndirectFar), MF(OR), A, NULL, NZ, 0, OFNone); +OP (17, MF(IndirectFar), MF(OR), A, Y, NZ, 0, OFNone); +OP (0F, MF(Long), MF(OR), A, NULL, NZ, 0, OFNone); +OP (1F, MF(Long), MF(OR), A, X, NZ, 0, OFNone); + +OP (29, MF(Immediate), MF(AND), A, NULL, NZ, 0, OFNone); +OP (25, MF(ZeroPage), MF(AND), A, NULL, NZ, 0, OFNone); +OP (35, MF(ZeroPage), MF(AND), A, X, NZ, 0, OFNone); +OP (2D, MF(Absolute), MF(AND), A, NULL, NZ, 0, OFNone); +OP (3D, MF(Absolute), MF(AND), A, X, NZ, 0, OFNone); +OP (39, MF(Absolute), MF(AND), A, Y, NZ, 0, OFNone); +OP (21, MF(IndirectX), MF(AND), A, X, NZ, 0, OFNone); +OP (31, MF(IndirectY), MF(AND), A, Y, NZ, 0, OFNone); +OP (32, MF(Indirect), MF(AND), A, NULL, NZ, 0, OFNone); +OP (23, MF(ZeroPage), MF(AND), A, S, NZ, 0, OFNone); +OP (33, MF(IndirectS), MF(AND), A, Y, NZ, 0, OFNone); +OP (27, MF(IndirectFar), MF(AND), A, NULL, NZ, 0, OFNone); +OP (37, MF(IndirectFar), MF(AND), A, Y, NZ, 0, OFNone); +OP (2F, MF(Long), MF(AND), A, NULL, NZ, 0, OFNone); +OP (3F, MF(Long), MF(AND), A, X, NZ, 0, OFNone); + +OP (49, MF(Immediate), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (45, MF(ZeroPage), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (55, MF(ZeroPage), MF(EOR), A, X, NZ, 0, OFNone); +OP (4D, MF(Absolute), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (5D, MF(Absolute), MF(EOR), A, X, NZ, 0, OFNone); +OP (59, MF(Absolute), MF(EOR), A, Y, NZ, 0, OFNone); +OP (41, MF(IndirectX), MF(EOR), A, X, NZ, 0, OFNone); +OP (51, MF(IndirectY), MF(EOR), A, Y, NZ, 0, OFNone); +OP (52, MF(Indirect), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (43, MF(ZeroPage), MF(EOR), A, S, NZ, 0, OFNone); +OP (53, MF(IndirectS), MF(EOR), A, Y, NZ, 0, OFNone); +OP (47, MF(IndirectFar), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (57, MF(IndirectFar), MF(EOR), A, Y, NZ, 0, OFNone); +OP (4F, MF(Long), MF(EOR), A, NULL, NZ, 0, OFNone); +OP (5F, MF(Long), MF(EOR), A, X, NZ, 0, OFNone); + +OP (69, MF(Immediate), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (65, MF(ZeroPage), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (75, MF(ZeroPage), MF(ADC), A, X, NZCV, C, OFNone); +OP (6D, MF(Absolute), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (7D, MF(Absolute), MF(ADC), A, X, NZCV, C, OFNone); +OP (79, MF(Absolute), MF(ADC), A, Y, NZCV, C, OFNone); +OP (61, MF(IndirectX), MF(ADC), A, X, NZCV, C, OFNone); +OP (71, MF(IndirectY), MF(ADC), A, Y, NZCV, C, OFNone); +OP (72, MF(Indirect), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (63, MF(ZeroPage), MF(ADC), A, S, NZCV, C, OFNone); +OP (73, MF(IndirectS), MF(ADC), A, Y, NZCV, C, OFNone); +OP (67, MF(IndirectFar), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (77, MF(IndirectFar), MF(ADC), A, Y, NZCV, C, OFNone); +OP (6F, MF(Long), MF(ADC), A, NULL, NZCV, C, OFNone); +OP (7F, MF(Long), MF(ADC), A, X, NZCV, C, OFNone); + +OP (E9, MF(Immediate), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (E5, MF(ZeroPage), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (F5, MF(ZeroPage), MF(SBC), A, X, NZCV, C, OFNone); +OP (ED, MF(Absolute), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (FD, MF(Absolute), MF(SBC), A, X, NZCV, C, OFNone); +OP (F9, MF(Absolute), MF(SBC), A, Y, NZCV, C, OFNone); +OP (E1, MF(IndirectX), MF(SBC), A, X, NZCV, C, OFNone); +OP (F1, MF(IndirectY), MF(SBC), A, Y, NZCV, C, OFNone); +OP (F2, MF(Indirect), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (E3, MF(ZeroPage), MF(SBC), A, S, NZCV, C, OFNone); +OP (F3, MF(IndirectS), MF(SBC), A, Y, NZCV, C, OFNone); +OP (E7, MF(IndirectFar), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (F7, MF(IndirectFar), MF(SBC), A, Y, NZCV, C, OFNone); +OP (EF, MF(Long), MF(SBC), A, NULL, NZCV, C, OFNone); +OP (FF, MF(Long), MF(SBC), A, X, NZCV, C, OFNone); + +OP (C9, MF(Immediate), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (C5, MF(ZeroPage), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (D5, MF(ZeroPage), MF(CMP), A, X, NZC, 0, OFNone); +OP (CD, MF(Absolute), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (DD, MF(Absolute), MF(CMP), A, X, NZC, 0, OFNone); +OP (D9, MF(Absolute), MF(CMP), A, Y, NZC, 0, OFNone); +OP (C1, MF(IndirectX), MF(CMP), A, X, NZC, 0, OFNone); +OP (D1, MF(IndirectY), MF(CMP), A, Y, NZC, 0, OFNone); +OP (D2, MF(Indirect), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (C3, MF(ZeroPage), MF(CMP), A, S, NZC, 0, OFNone); +OP (D3, MF(IndirectS), MF(CMP), A, Y, NZC, 0, OFNone); +OP (C7, MF(IndirectFar), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (D7, MF(IndirectFar), MF(CMP), A, Y, NZC, 0, OFNone); +OP (CF, MF(Long), MF(CMP), A, NULL, NZC, 0, OFNone); +OP (DF, MF(Long), MF(CMP), A, X, NZC, 0, OFNone); + +OP (E0, XF(Immediate), XF(CMP), X, NULL, NZC, 0, OFNone); +OP (E4, XF(ZeroPage), XF(CMP), X, NULL, NZC, 0, OFNone); +OP (EC, XF(Absolute), XF(CMP), X, NULL, NZC, 0, OFNone); + +OP (C0, XF(Immediate), XF(CMP), Y, NULL, NZC, 0, OFNone); +OP (C4, XF(ZeroPage), XF(CMP), Y, NULL, NZC, 0, OFNone); +OP (CC, XF(Absolute), XF(CMP), Y, NULL, NZC, 0, OFNone); + +OP (24, MF(ZeroPage), MF(BIT), A, NULL, NZV, 0, OFNone); +OP (2C, MF(Absolute), MF(BIT), A, NULL, NZV, 0, OFNone); +OP (34, MF(ZeroPage), MF(BIT), A, X, NZV, 0, OFNone); +OP (3C, MF(Absolute), MF(BIT), A, X, NZV, 0, OFNone); +OP (89, MF(Immediate), MF(BIT), A, NULL, Z, 0, OFNone); + +OP (E6, MF(ZeroPage), MF(INC), NULL, NULL, NZ, 0, OFNone); +OP (F6, MF(ZeroPage), MF(INC), NULL, X, NZ, 0, OFNone); +OP (EE, MF(Absolute), MF(INC), NULL, NULL, NZ, 0, OFNone); +OP (FE, MF(Absolute), MF(INC), NULL, X, NZ, 0, OFNone); +OP (E8, XF(Register), XF(INC), X, NULL, NZ, 0, OFNone); +OP (C8, XF(Register), XF(INC), Y, NULL, NZ, 0, OFNone); +OP (1A, MF(Register), MF(INC), A, NULL, NZ, 0, OFNone); + +OP (C6, MF(ZeroPage), MF(DEC), NULL, NULL, NZ, 0, OFNone); +OP (D6, MF(ZeroPage), MF(DEC), NULL, X, NZ, 0, OFNone); +OP (CE, MF(Absolute), MF(DEC), NULL, NULL, NZ, 0, OFNone); +OP (DE, MF(Absolute), MF(DEC), NULL, X, NZ, 0, OFNone); +OP (CA, XF(Register), XF(DEC), X, NULL, NZ, 0, OFNone); +OP (88, XF(Register), XF(DEC), Y, NULL, NZ, 0, OFNone); +OP (3A, MF(Register), MF(DEC), A, NULL, NZ, 0, OFNone); + +OP (04, MF(ZeroPage), MF(TSB), A, NULL, Z, 0, OFNone); +OP (0C, MF(Absolute), MF(TSB), A, NULL, Z, 0, OFNone); +OP (14, MF(ZeroPage), MF(TRB), A, NULL, Z, 0, OFNone); +OP (1C, MF(Absolute), MF(TRB), A, NULL, Z, 0, OFNone); + +OP (0A, MF(Register), MF(ASL), A, NULL, NZC, 0, OFNone); +OP (06, MF(ZeroPage), MF(ASL), NULL, NULL, NZC, 0, OFNone); +OP (16, MF(ZeroPage), MF(ASL), NULL, X, NZC, 0, OFNone); +OP (0E, MF(Absolute), MF(ASL), NULL, NULL, NZC, 0, OFNone); +OP (1E, MF(Absolute), MF(ASL), NULL, X, NZC, 0, OFNone); + +OP (4A, MF(Register), MF(LSR), A, NULL, NZC, 0, OFNone); +OP (46, MF(ZeroPage), MF(LSR), NULL, NULL, NZC, 0, OFNone); +OP (56, MF(ZeroPage), MF(LSR), NULL, X, NZC, 0, OFNone); +OP (4E, MF(Absolute), MF(LSR), NULL, NULL, NZC, 0, OFNone); +OP (5E, MF(Absolute), MF(LSR), NULL, X, NZC, 0, OFNone); + +OP (2A, MF(Register), MF(ROL), A, NULL, NZC, C, OFNone); +OP (26, MF(ZeroPage), MF(ROL), NULL, NULL, NZC, C, OFNone); +OP (36, MF(ZeroPage), MF(ROL), NULL, X, NZC, C, OFNone); +OP (2E, MF(Absolute), MF(ROL), NULL, NULL, NZC, C, OFNone); +OP (3E, MF(Absolute), MF(ROL), NULL, X, NZC, C, OFNone); + +OP (6A, MF(Register), MF(ROR), A, NULL, NZC, C, OFNone); +OP (66, MF(ZeroPage), MF(ROR), NULL, NULL, NZC, C, OFNone); +OP (76, MF(ZeroPage), MF(ROR), NULL, X, NZC, C, OFNone); +OP (6E, MF(Absolute), MF(ROR), NULL, NULL, NZC, C, OFNone); +OP (7E, MF(Absolute), MF(ROR), NULL, X, NZC, C, OFNone); + +OP (80, NF8(Immediate), NF(BRA), NULL, NULL, 0, NZCV, OFBreak); +OP (82, NF16(Immediate), NF(BRL), NULL, NULL, 0, NZCV, OFBreak); +OP (4C, NF16(Immediate), NF(JMP), NULL, NULL, 0, NZCV, OFBreak); +OP (5C, NF(Immediate24), NF(JMP), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (6C, NF16(Absolute), NF(JMP), NULL, NULL, 0, NZCV, OFBreak); +OP (7C, NF16(Absolute), NF(JMP), NULL, X, 0, NZCV, OFBreak); +OP (DC, NF16(Absolute), NF(JML), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (20, NF16(Immediate), NF(JSR), NULL, NULL, 0, NZCV, OFBreak); +OP (22, NF(Immediate24), NF(JSL), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (FC, NF16(IndirectX), NF(JSR), NULL, X, 0, NZCV, OFBreak); +OP (40, NF(None), NF(RTI), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate); +OP (6B, NF(None), NF(RTL), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (60, NF(None), NF(RTS), NULL, NULL, 0, NZCV, OFBreak); + +OP (10, NF8(Immediate), NF(BPL), NULL, NULL, 0, NZCV, OFBreak); +OP (30, NF8(Immediate), NF(BMI), NULL, NULL, 0, NZCV, OFBreak); +OP (50, NF8(Immediate), NF(BVC), NULL, NULL, 0, NZCV, OFBreak); +OP (70, NF8(Immediate), NF(BVS), NULL, NULL, 0, NZCV, OFBreak); +OP (90, NF8(Immediate), NF(BCC), NULL, NULL, 0, NZCV, OFBreak); +OP (B0, NF8(Immediate), NF(BCS), NULL, NULL, 0, NZCV, OFBreak); +OP (D0, NF8(Immediate), NF(BZC), NULL, NULL, 0, NZCV, OFBreak); +OP (F0, NF8(Immediate), NF(BZS), NULL, NULL, 0, NZCV, OFBreak); + +OP (00, NF(None), NF(BRK), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (02, NF(None), NF(COP), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); + +OP (18, NF(None), NF(CLC), NULL, NULL, C, 0, OFNone); +OP (58, NF(None), NF(CLI), NULL, NULL, 0, 0, OFNone); +OP (D8, NF(None), NF(CLD), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (B8, NF(None), NF(CLV), NULL, NULL, V, 0, OFNone); +OP (38, NF(None), NF(SEC), NULL, NULL, C, 0, OFNone); +OP (78, NF(None), NF(SEI), NULL, NULL, 0, 0, OFNone); +OP (F8, NF(None), NF(SED), NULL, NULL, 0, NZCV, OFBreak | OFSectionUpdate); +OP (C2, NF8(Immediate), NF(REP), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate); +OP (E2, NF8(Immediate), NF(SEP), NULL, NULL, NZCV, NZCV, OFBreak | OFSectionUpdate); +OP (FB, NF(None), NF(XCE), NULL, NULL, C, NZCV, OFBreak | OFSectionUpdate); + +OP (DB, NF(None), NF(STP), NULL, NULL, 0, NZCV, OFBreak); +OP (EB, NF(None), NF(XBA), NULL, NULL, NZ, 0, OFNone); +OP (CB, NF(None), NF(WAI), NULL, NULL, 0, NZCV, OFBreak); +OP (42, NF8(Immediate), NF(WDM), NULL, NULL, 0, NZCV, OFBreak); /* Speedhacks, might change PC */ +OP (EA, NF(None), NF(NOP), NULL, NULL, 0, 0, OFNone); diff --git a/source/arm_dynarec/opgen.h b/source/arm_dynarec/opgen.h new file mode 100644 index 0000000..aaa5556 --- /dev/null +++ b/source/arm_dynarec/opgen.h @@ -0,0 +1,26 @@ +/* Macros turn opcode tables into code */ + +#define F8(F) F##8 +#define F16(F) F##16 +#define EF(F) 0, F16(F), F16(F) +#define NF(F) 0, F, F +#define NF8(F) 0, F8(F), F8(F) +#define NF16(F) 0, F16(F), F16(F) +#define MF(F) (CheckEmulation() || CheckMemory()), F8(F), F16(F) +#define XF(F) (CheckEmulation() || CheckIndex()), F8(F), F16(F) + +#define C (Carry) +#define Z (Zero) +#define V (Overflow) +#define NZ (Negative | Zero) +#define NZC (Negative | Zero | Carry) +#define NZV (Negative | Zero | Overflow) +#define NZCV (Negative | Zero | Carry | Overflow) + +switch(opcode = *pc++) { + +#include "opdef.h" + +default: \ + printf("Invalid opcode : 0x%X\n", opcode); \ +} diff --git a/source/cpuexec.c b/source/cpuexec.c index 2bbf90a..15c9157 100644 --- a/source/cpuexec.c +++ b/source/cpuexec.c @@ -11,6 +11,7 @@ #include "fxemu.h" #include "sa1.h" #include "spc7110.h" +#include "arm_dynarec/dynaexec.h" void S9xMainLoop_SA1_SFX(void); void S9xMainLoop_SA1_NoSFX(void); @@ -28,6 +29,13 @@ void S9xMainLoop_NoSA1_NoSFX(void); */ void S9xMainLoop() { +#ifdef USE_DYNAREC + if (Settings.EnableDynarec) + { + S9xMainLoop_Dyna(); + } + else +#endif if (Settings.SA1) { if (Settings.SuperFX) diff --git a/source/globals.c b/source/globals.c index f7cdb85..3ccbd96 100644 --- a/source/globals.c +++ b/source/globals.c @@ -16,8 +16,10 @@ char String[513]; +#ifndef USE_DYNAREC SICPU ICPU; SCPUState CPU; +#endif #ifndef USE_BLARGG_APU SAPU APU; @@ -35,9 +37,11 @@ SnesModel* Model = &M1SNES; int32_t OpAddress = 0; +#ifndef USE_DYNAREC CMemory Memory; uint8_t OpenBus = 0; +#endif FxInit_s SuperFX; diff --git a/source/ppu.c b/source/ppu.c index 1f5df0b..caf47ed 100644 --- a/source/ppu.c +++ b/source/ppu.c @@ -629,6 +629,11 @@ void S9xSetPPU(uint8_t Byte, uint16_t Address) case 0x217e: case 0x217f: #ifndef USE_BLARGG_APU +#ifdef USE_DYNAREC + if (Settings.EnableDynarec) + while (APU.Cycles <= CPU.Cycles) + APU_EXECUTE1(); +#endif Memory.FillRAM [Address] = Byte; IAPU.RAM [(Address & 3) + 0xf4] = Byte; IAPU.APUExecuting = Settings.APUEnabled; @@ -889,6 +894,10 @@ uint8_t S9xGetPPU(uint16_t Address) case 0x217e: case 0x217f: #ifndef USE_BLARGG_APU +#ifdef USE_DYNAREC + if (Settings.EnableDynarec) + APU_EXECUTE(); +#endif IAPU.APUExecuting = Settings.APUEnabled; IAPU.WaitCounter++; diff --git a/source/snes9x.h b/source/snes9x.h index 6dd2e8f..9ce602a 100644 --- a/source/snes9x.h +++ b/source/snes9x.h @@ -214,6 +214,7 @@ typedef struct bool SecondJustifier; int8_t SETA; bool HardDisableAudio; + bool EnableDynarec; } SSettings; extern SSettings Settings; |