From ce34e879e348cecd4e21329be5974cc4162fa6c4 Mon Sep 17 00:00:00 2001 From: neonloop Date: Wed, 9 Feb 2022 07:15:14 +0000 Subject: Adds experimental ARM dynarec Supports ARMv5 and higher, enable with USE_DYNAREC Makefile variable --- source/arm_dynarec/armfn.S | 501 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 source/arm_dynarec/armfn.S (limited to 'source/arm_dynarec/armfn.S') diff --git a/source/arm_dynarec/armfn.S b/source/arm_dynarec/armfn.S new file mode 100644 index 0000000..c5d1b3f --- /dev/null +++ b/source/arm_dynarec/armfn.S @@ -0,0 +1,501 @@ +#define RegA r4 +#define RegX r5 +#define RegY r6 +#define RegP r7 +#define RegCPU r8 +#define RegCPUPC r9 +#define RegCycles r10 +#define RegChecks ip + +#ifndef NDEBUG +#define METRICS_GETSETS_OFF 28 +#define METRICS_SLOWGETSETS_OFF 32 +#endif + +#define CPU_FLAGS_OFF 0 +#define CPU_PC_OFF 12 +#define CPU_PCBASE_OFF 16 +#define CPU_PC_AT_OPCODE_START_OFF 20 +#define CPU_WAIT_ADDRESS_OFF 24 +#define CPU_CYCLES_OFF 32 +#define CPU_NEXT_EVENT_OFF 36 +#define CPU_ICPU_OFF 0x80 +#define CPU_MEMORY_MAP_OFF 0x100 + +#define ICPU_S9X_OPCODES_OFF (CPU_ICPU_OFF + 4) +#define ICPU_REG_PB_OFF (CPU_ICPU_OFF + 8) +#define ICPU_REG_DB_OFF (CPU_ICPU_OFF + 9) +#define ICPU_REG_P_OFF (CPU_ICPU_OFF + 10) +#define ICPU_REG_A_OFF (CPU_ICPU_OFF + 12) +#define ICPU_REG_D_OFF (CPU_ICPU_OFF + 14) +#define ICPU_REG_S_OFF (CPU_ICPU_OFF + 16) +#define ICPU_REG_X_OFF (CPU_ICPU_OFF + 18) +#define ICPU_REG_Y_OFF (CPU_ICPU_OFF + 20) +#define ICPU_REG_PC_OFF (CPU_ICPU_OFF + 22) +#define ICPU_CARRY_OFF (CPU_ICPU_OFF + 24) +#define ICPU_ZERO_OFF (CPU_ICPU_OFF + 25) +#define ICPU_NEGATIVE_OFF (CPU_ICPU_OFF + 26) +#define ICPU_OVERFLOW_OFF (CPU_ICPU_OFF + 27) + +#define MEMORY_MAP_OFF 0x0024 +#define MEMORY_MAP_WRITE_MAP_OFF 0x4000 +#define MEMORY_MAP_SPEED_OFF 0x8000 +#define MEMORY_MAP_BLOCK_IS_RAM_OFF 0x9000 +#define MEMORY_WRITE_MAP_SPEED_OFF 0x4000 +#define MEMORY_WRITE_MAP_BLOCK_IS_RAM_OFF 0x5000 + +#define SA1_OPCODES_OFF 0 +#define SA1_EXECUTING_OFF 24 +#define SA1_WAIT_COUNTER_OFF 52 +#define SA1_WAIT_BYTE_ADDRESS_1_OFF 56 +#define SA1_WAIT_BYTE_ADDRESS_2_OFF 60 + +#define FLAG_DECIMAL 0x08 +#define FLAG_INDEX 0x10 +#define FLAG_MEMORY 0x20 +#define FLAG_EMULATION 0x100 + +#define FUNC(name) \ + .global name ; \ +name: + +.macro S9xCall func + push { ip, lr } + bl S9xRegsToMem + bl \func + bl S9xMemToRegs + pop { ip, pc } +.endm + + .text + .align 2 + +#ifndef NDEBUG +.macro MetricsIncOffset off + push { r0, r1 } + ldr r0, = Metrics + ldr r1, [r0, #\off] + add r1, r1, #1 + str r1, [r0, #\off] + pop { r0, r1 } +.endm + +FUNC(DynaBreak) + bx lr +#endif + +FUNC(BlockEnter) + push { r4-r11, ip, lr } + ldr RegCPU, = CPU + bl S9xMemToRegs + bx r0 + +FUNC(BlockReturn) + bl S9xRegsToMem + pop { r4-r11, ip, pc } + +.macro LoadChecks rs + ldr \rs, [RegCPU, #CPU_NEXT_EVENT_OFF] + ldr RegChecks, [RegCPU, #CPU_FLAGS_OFF] + orr RegChecks, RegChecks, \rs, lsl #16 +.endm + +.macro PushCPUState + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] +.endm + +.macro PopCPUState rs + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + LoadChecks \rs +.endm + +.macro S9xUnpackStatusFast + strh RegP, [RegCPU, #ICPU_REG_P_OFF] + mov r3, #1 + and r2, r3, RegP, lsr #1 + sub r2, r2, #1 + strb r2, [RegCPU, #ICPU_ZERO_OFF] + and r2, RegP, #0x80 + strb r2, [RegCPU, #ICPU_NEGATIVE_OFF] + and r2, RegP, r3 + strb r2, [RegCPU, #ICPU_CARRY_OFF] + and r2, r3, RegP, lsr #6 + strb r2, [RegCPU, #ICPU_OVERFLOW_OFF] +.endm + +.macro S9xPackStatusFast + ldrh RegP, [RegCPU, #ICPU_REG_P_OFF] + ldrb r2, [RegCPU, #ICPU_ZERO_OFF] + ldrb r3, [RegCPU, #ICPU_NEGATIVE_OFF] + bic RegP, RegP, #0xc3 + cmp r2, #0 + ldrb r2, [RegCPU, #ICPU_CARRY_OFF] + orreq RegP, RegP, #0x2 + tst r3, #0x80 + ldrb r3, [RegCPU, #ICPU_OVERFLOW_OFF] + orrne RegP, RegP, #0x80 + orr RegP, RegP, r2 + orr RegP, RegP, r3, lsl #6 + strh RegP, [RegCPU, #ICPU_REG_P_OFF] +.endm + + @@ Callers assume r0 is not touched +FUNC(S9xCallUnpackStatusFast) + S9xUnpackStatusFast + bx lr + + @@ Preserves scratch and r0, can be used as a wrapper +FUNC(S9xRegsToMem) + push { r0, r1, r2, r3, ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + strh RegA, [RegCPU, #ICPU_REG_A_OFF] + strh RegX, [RegCPU, #ICPU_REG_X_OFF] + strh RegY, [RegCPU, #ICPU_REG_Y_OFF] + S9xUnpackStatusFast + pop { r0, r1, r2, r3, ip, pc } + + + @@ Preserves scratch and r0, can be used as a wrapper +FUNC(S9xMemToRegs) + push { r0, r1, r2, r3, ip, lr } + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xPackStatusFast + ldrh RegA, [RegCPU, #ICPU_REG_A_OFF] + ldrh RegX, [RegCPU, #ICPU_REG_X_OFF] + ldrh RegY, [RegCPU, #ICPU_REG_Y_OFF] + pop { r0, r1, r2, r3, ip, lr } + LoadChecks r2 + bx lr + +FUNC(UpdateSection) + push { RegP, RegCPU, lr } + ldr RegCPU, =CPU + ldrh RegP, [RegCPU, #ICPU_REG_P_OFF] + bl S9xCallUpdateSection + pop { RegP, RegCPU, pc } + +FUNC(S9xCallUpdateSection) + ldr r1, =CacheSection + ldrb r0, [RegCPU, #ICPU_REG_PB_OFF] + tst RegP, #FLAG_DECIMAL + lsl r0, #4 + orrne r0, r0, #0x8000 + tst RegP, #FLAG_EMULATION + orrne r0, r0, #0x4000 + bxne lr + tst RegP, #FLAG_INDEX + orrne r0, r0, #0x1000 + tst RegP, #FLAG_MEMORY + orrne r0, r0, #0x2000 + str r0, [r1] + bx lr + +FUNC(LoadChecks) + LoadChecks r2 + bx lr + +FUNC(S9xCallSA1MainLoop) + push { ip, lr } + bl S9xSA1MainLoop + pop { ip, pc } + +FUNC(S9xCallSetPCBase) + push { ip, lr } + bl S9xSetPCBase + pop { ip, pc } + +.macro S9xCallHandleHBlank type +FUNC(S9xCallHandleHBlank\type) + @ changes cycles, maybe PC in IRQ + push { ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl HandleHBlank\type + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, lr } + LoadChecks r2 + bx lr +.endm + +S9xCallHandleHBlank SFX +S9xCallHandleHBlank NoSFX + +.macro S9xCallHandleFlags WFI +FUNC(S9xCallHandleFlags\WFI) + @ needs S, changes flags (unpack status needed), changes PC, changes cycles + push { ip, lr } + str RegCPUPC, [RegCPU, #CPU_PC_OFF] + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xUnpackStatusFast + bl HandleFlags\WFI + ldr RegCPUPC, [RegCPU, #CPU_PC_OFF] + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + S9xPackStatusFast + pop { ip, lr } + LoadChecks r2 + bx lr +.endm + +S9xCallHandleFlags WFI +S9xCallHandleFlags NoWFI + +.macro S9xCallHandleChecks WFI, SFX +FUNC(S9xCallHandleChecks\WFI\SFX) + push { r1, lr } + mov r0, #0 + cmp RegCycles, RegChecks, lsr #16 + blhs S9xCallHandleHBlank\SFX + cmp r0, #0 + popne { r1, lr } + bne BlockReturn + + tst RegChecks, #0x880 + blne S9xCallHandleFlags\WFI + cmp r0, #0 + movne RegCPUPC, r0 + popne { r1, lr } + bne JumpIndirect + + pop { r1, pc } +.endm + +S9xCallHandleChecks WFI, SFX +S9xCallHandleChecks WFI, NoSFX +S9xCallHandleChecks NoWFI, SFX +S9xCallHandleChecks NoWFI, NoSFX + +#ifndef NDEBUG +FUNC(S9xCallCheckInstruction) + S9xCall CheckInstruction +#endif + +FUNC(JumpIndirect) + push { r1, ip } + ldr r1, [RegCPU, #CPU_PCBASE_OFF] + sub r0, RegCPUPC, r1 + bl FetchBlock + pop { r1, ip } + ldr r0, [r0] + bx r0 + +FUNC(JumpDirectChecks) + push { r1, ip } + mov r0, RegCPUPC + sub r1, lr, #4 @ Patch the opcode at the address of this caller + bl PatchJumpDirectChecks + pop { r1, ip } + bx r0 + +FUNC(JumpDirect) + push { r1, ip } + mov r0, RegCPUPC + sub r1, lr, #4 @ Patch the opcode at the address of this caller + bl PatchJumpDirect + pop { r1, ip } + bx r0 + +.macro GetMemFast Type +FUNC(S9xGet\Type\()Fast) + push { r0, r3, ip, lr } +#ifndef NDEBUG + MetricsIncOffset METRICS_GETSETS_OFF +#endif + ldr r2, =0x0fff @ MEMMAP_MASK + .ifc \Type,Word + and r3, r0, r2 + cmp r3, r2 + beq 9f + .endif + + add ip, RegCPU, #CPU_MEMORY_MAP_OFF + and r3, r2, r0, lsr #12 @ MEMMAP_SHIFT + ldr r2, [ip, r3, lsl #2] + add lr, ip, #MEMORY_MAP_BLOCK_IS_RAM_OFF + cmp r2, #18 @ MAP_LAST + blo 9f + + ldrb lr, [lr, r3] + add ip, ip, #MEMORY_MAP_SPEED_OFF + ldrb ip, [ip, r3] + cmp lr, #1 + streq r1, [RegCPU, #CPU_WAIT_ADDRESS_OFF] + + mov r1, r0, lsl #16 + mov r1, r1, lsr #16 + ldrb r0, [r2, r1] + .ifc \Type,Word + add r1, r1, #1 + ldrb r1, [r2, r1] + .endif + .ifc \Type,Byte + add RegCycles, RegCycles, ip + .else + add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord + .endif + .ifc \Type,Word + orr r0, r0, r1, lsl #8 + .endif + pop { r1, r3, ip, pc } +9: +#ifndef NDEBUG + MetricsIncOffset METRICS_SLOWGETSETS_OFF +#endif + str r1, [RegCPU, #CPU_PC_AT_OPCODE_START_OFF] + PushCPUState + bl S9xGet\Type + pop { r1, r3, ip, lr } + PopCPUState r2 + bx lr +.endm + +GetMemFast Byte +GetMemFast Word + + .macro SetMemFast Type, SA1 +FUNC(S9xSet\Type\()Fast\SA1) + push { r1, r3, ip, lr} +#ifndef NDEBUG + MetricsIncOffset METRICS_GETSETS_OFF +#endif + ldr r2, =0x0fff @ MEMMAP_MASK + mov ip, #0 + .ifc \Type,Word + and r3, r1, r2 + cmp r3, r2 + beq 9f + .endif + + str ip, [RegCPU, #CPU_WAIT_ADDRESS_OFF] + + and r3, r2, r1, lsr #12 @ MEMMAP_SHIFT + add ip, RegCPU, #(CPU_MEMORY_MAP_OFF + MEMORY_MAP_WRITE_MAP_OFF) + ldr r2, [ip, r3, lsl #2] + cmp r2, #18 @ MAP_LAST + blo 9f + + add ip, ip, #MEMORY_WRITE_MAP_SPEED_OFF + ldrb ip, [ip, r3] + .ifc \Type,Byte + add RegCycles, RegCycles, ip + .else + add RegCycles, RegCycles, ip, lsl #1 @ * 2 for GetWord + .endif + + + mov r1, r1, lsl #16 + mov r1, r1, lsr #16 + + @@ Handle SA1 WaitAddress if needed + .ifc \SA1,SA1 + push { r1 } + ldr ip, =SA1 + add r1, r1, r2 + ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_1_OFF] + cmp r3, r1 + beq 8f + ldr r3, [ip, #SA1_WAIT_BYTE_ADDRESS_2_OFF] + cmp r3, r1 + beq 8f +2: + pop { r1 } + .endif + + strb r0, [r2, r1] + .ifc \Type,Word + add r1, r1, #1 + mov r0, r0, lsr #8 + strb r0, [r2, r1] + .endif + pop { r1, r3, ip, pc } + + .ifc \SA1,SA1 +8: + ldr r1, [ip, #SA1_OPCODES_OFF] + mov r3, #0 + str r3, [ip, #SA1_WAIT_COUNTER_OFF] + cmp r1, #0 + movne r3, #1 + strb r3, [ip, #SA1_EXECUTING_OFF] + b 2b + .endif +9: +#ifndef NDEBUG + MetricsIncOffset METRICS_SLOWGETSETS_OFF +#endif + PushCPUState + .ifc \Type,Word + mov r0, r0, lsl #16 + mov r0, r0, lsr #16 + .else + and r0, r0, #0xFF + .endif + bl S9xSet\Type + pop { r1, r3, ip, lr } + PopCPUState r2 + bx lr +.endm + +SetMemFast Byte, NoSA1 +SetMemFast Word, NoSA1 + +SetMemFast Byte, SA1 +SetMemFast Word, SA1 + +FUNC(S9xCallFixCycles) + tst RegP, #FLAG_EMULATION + ldrne r0, =S9xOpcodesE1 + bne 9f + + tst RegP, #FLAG_MEMORY + beq 2f + tst RegP, #FLAG_INDEX + ldrne r0, =S9xOpcodesM1X1 + ldreq r0, =S9xOpcodesM1X0 + b 9f +2: + tst RegP, #FLAG_INDEX + ldrne r0, =S9xOpcodesM0X1 + ldreq r0, =S9xOpcodesM0X0 +9: + str r0, [RegCPU, #ICPU_S9X_OPCODES_OFF] + bx lr + +FUNC(S9xCallCPUShutdown) + push { ip, lr } + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl DynaCPUShutdown + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, pc } + +FUNC(S9xCallWAIShutdown) + push { ip, lr } + str RegCycles, [RegCPU, #CPU_CYCLES_OFF] + bl DynaWAIShutdown + ldr RegCycles, [RegCPU, #CPU_CYCLES_OFF] + pop { ip, pc } + + .bss + .align 4 + .global CPU +CPU: + .space 0x80 @ Actual: 0x50 + + .global ICPU +ICPU: + .space 0x40 @ Actual: 0x34 + + .global OpenBus +OpenBus: + .space 0x1 + + @ padding so Memory.Map is at 0x100 + .space 0x3F - 0x24 + + .global Memory +Memory: + .space 0xb468 -- cgit v1.2.3