From cd20f918cf1f7c7b3f63b61d64891d3b03cbe340 Mon Sep 17 00:00:00 2001 From: neonloop Date: Fri, 12 Mar 2021 17:23:02 +0000 Subject: Add FAST_ALIGNED_LSB_WORD_ACCESS optimization From https://github.com/m45t3r/snes9x4d/commit/cf6dedb11f1062588d8c747d13c59e7c1d2fa233 --- src/cpuaddr.h | 10 ++++++++++ src/ppu.c | 17 ++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/cpuaddr.h b/src/cpuaddr.h index d68a328..a9fc85a 100644 --- a/src/cpuaddr.h +++ b/src/cpuaddr.h @@ -145,6 +145,11 @@ static INLINE void AbsoluteLong() { #ifdef FAST_LSB_WORD_ACCESS OpAddress = (*(uint32*) CPU.PC) & 0xffffff; +#elsif FAST_ALIGNED_LSB_WORD_ACCESS + if (((int32_t) CPU.PC & 1) == 0) + OpAddress = (*(uint16_t*) CPU.PC) + (CPU.PC[2] << 16); + else + OpAddress = *CPU.PC + ((*(uint16_t*) (CPU.PC + 1)) << 8); #else OpAddress = *CPU.PC + (*(CPU.PC + 1) << 8) + (*(CPU.PC + 2) << 16); #endif @@ -272,6 +277,11 @@ static INLINE void AbsoluteLongIndexedX() { #ifdef FAST_LSB_WORD_ACCESS OpAddress = (*(uint32*) CPU.PC + Registers.X.W) & 0xffffff; +#elsif FAST_ALIGNED_LSB_WORD_ACCESS + if (((int32_t) CPU.PC & 1) == 0) + OpAddress = ((*(uint16_t*) CPU.PC) + (CPU.PC[2] << 16) + reg->X.W) & 0xFFFFFF; + else + OpAddress = (*CPU.PC + ((*(uint16_t*) (CPU.PC + 1)) << 8) + reg->X.W) & 0xFFFFFF; #else OpAddress = (*CPU.PC + (*(CPU.PC + 1) << 8) + (*(CPU.PC + 2) << 16) + Registers.X.W) & 0xffffff; #endif diff --git a/src/ppu.c b/src/ppu.c index 31d1388..adc7046 100644 --- a/src/ppu.c +++ b/src/ppu.c @@ -257,9 +257,13 @@ void S9xSetCPU(uint8 byte, uint16 Address) { // Multiplicand uint32 res = Memory.FillRAM[0x4202] * byte; - +#if defined FAST_LSB_WORD_ACCESS || defined FAST_ALIGNED_LSB_WORD_ACCESS + /* assume malloc'd memory is 2-byte aligned */ + *((uint16 *)&Memory.FillRAM[0x4216]) = res; +#else Memory.FillRAM[0x4216] = (uint8) res; Memory.FillRAM[0x4217] = (uint8)(res >> 8); +#endif break; } case 0x4204 : @@ -269,15 +273,26 @@ void S9xSetCPU(uint8 byte, uint16 Address) case 0x4206 : { // Divisor +#if defined FAST_LSB_WORD_ACCESS || defined FAST_ALIGNED_LSB_WORD_ACCESS + /* assume malloc'd memory is 2-byte aligned */ + uint16 a = *((uint16 *)&Memory.FillRAM[0x4204]); +#else uint16 a = Memory.FillRAM[0x4204] + (Memory.FillRAM[0x4205] << 8); +#endif uint16 div = byte ? a / byte : 0xffff; uint16 rem = byte ? a % byte : a; +#if defined FAST_LSB_WORD_ACCESS || defined FAST_ALIGNED_LSB_WORD_ACCESS + /* assume malloc'd memory is 2-byte aligned */ + *((uint16 *)&Memory.FillRAM[0x4214]) = div; + *((uint16 *)&Memory.FillRAM[0x4216]) = rem; +#else Memory.FillRAM[0x4214] = (uint8) div; Memory.FillRAM[0x4215] = div >> 8; Memory.FillRAM[0x4216] = (uint8) rem; Memory.FillRAM[0x4217] = rem >> 8; +#endif break; } case 0x4207 : -- cgit v1.2.3