From 09334d546982a354ccf73e64966191aac2d22083 Mon Sep 17 00:00:00 2001 From: Jonathan Teh Date: Tue, 22 Feb 2022 23:01:27 +0000 Subject: gfx: Use simpler equality test for IPPU.FrameCount As it's only incremented here and then reset to 0, avoids integer division, which can be be expensive on CPUs without this instruction such as pre-ARMv7 classic ARMs. --- source/gfx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/gfx.c b/source/gfx.c index 85b58f3..5eac5b9 100644 --- a/source/gfx.c +++ b/source/gfx.c @@ -423,7 +423,7 @@ void S9xStartScreenRefresh(void) GFX.Delta = (GFX.SubScreen - GFX.Screen) >> 1; } - if (++IPPU.FrameCount % Memory.ROMFramesPerSecond == 0) + if (++IPPU.FrameCount == (uint32_t)Memory.ROMFramesPerSecond) IPPU.FrameCount = 0; } -- cgit v1.2.3 From 1e6f7927557cc0af5aead16b0579a6828d94b857 Mon Sep 17 00:00:00 2001 From: Jonathan Teh Date: Tue, 22 Feb 2022 23:28:44 +0000 Subject: dsp: Fix assignment of Op11Xr Fix regression introduced in snes9x-1.40. --- source/dsp1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dsp1.c b/source/dsp1.c index f8a0713..fc7720e 100644 --- a/source/dsp1.c +++ b/source/dsp1.c @@ -352,7 +352,7 @@ void DSP1SetByte(uint8_t byte, uint16_t address) Op11m = (int16_t)(DSP1.parameters [0] | (DSP1.parameters[1] << 8)); Op11Zr = (int16_t)(DSP1.parameters [2] | (DSP1.parameters[3] << 8)); Op11Yr = (int16_t)(DSP1.parameters [4] | (DSP1.parameters[5] << 8)); - Op11Xr = (int16_t)(DSP1.parameters [7] | (DSP1.parameters[7] << 8)); + Op11Xr = (int16_t)(DSP1.parameters [6] | (DSP1.parameters[7] << 8)); DSPOp11(); break; case 0x25: -- cgit v1.2.3 From f9698dddfebda59eff0331d9392f911de9ed7e4c Mon Sep 17 00:00:00 2001 From: Jonathan Teh Date: Tue, 22 Feb 2022 23:42:11 +0000 Subject: fx: Optimise copying registers on LE platforms Use {READ,WRITE}_WORD so that it copies by word on little-endian platforms. Mark a LUT const for const-correctness. --- source/fxemu.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/source/fxemu.c b/source/fxemu.c index 17102c9..6eef2fa 100644 --- a/source/fxemu.c +++ b/source/fxemu.c @@ -2,6 +2,7 @@ #include "fxemu.h" #include "fxinst.h" +#include "memmap.h" #include #include @@ -30,33 +31,27 @@ void fx_updateRamBank(uint8_t Byte) static INLINE void fx_readRegisterSpaceForCheck(void) { - R15 = GSU.pvRegisters[30]; - R15 |= ((uint32_t) GSU.pvRegisters[31]) << 8; + R15 = (uint32_t) READ_WORD(&GSU.pvRegisters[30]); } static void fx_readRegisterSpaceForUse(void) { - static uint32_t avHeight[] = { 128, 160, 192, 256 }; - static uint32_t avMult[] = { 16, 32, 32, 64 }; + static const uint32_t avHeight[] = { 128, 160, 192, 256 }; + static const uint32_t avMult[] = { 16, 32, 32, 64 }; int32_t i; uint8_t* p = GSU.pvRegisters; /* Update R0 - R14 */ - for (i = 0; i < 15; i++) - { - GSU.avReg[i] = *p++; - GSU.avReg[i] += ((uint32_t)(*p++)) << 8; - } + for (i = 0; i < 15; i++, p += 2) + GSU.avReg[i] = (uint32_t) READ_WORD(p); /* Update other registers */ p = GSU.pvRegisters; - GSU.vStatusReg = (uint32_t) GSU.pvRegisters[GSU_SFR]; - GSU.vStatusReg |= ((uint32_t) GSU.pvRegisters[GSU_SFR + 1]) << 8; + GSU.vStatusReg = (uint32_t) READ_WORD(&GSU.pvRegisters[GSU_SFR]); GSU.vPrgBankReg = (uint32_t) GSU.pvRegisters[GSU_PBR]; GSU.vRomBankReg = (uint32_t)p[GSU_ROMBR]; GSU.vRamBankReg = ((uint32_t)p[GSU_RAMBR]) & (FX_RAM_BANKS - 1); - GSU.vCacheBaseReg = (uint32_t)p[GSU_CBR]; - GSU.vCacheBaseReg |= ((uint32_t)p[GSU_CBR + 1]) << 8; + GSU.vCacheBaseReg = (uint32_t) READ_WORD(&p[GSU_CBR]); /* Update status register variables */ GSU.vZero = !(GSU.vStatusReg & FLG_Z); @@ -146,19 +141,15 @@ void fx_computeScreenPointers(void) static INLINE void fx_writeRegisterSpaceAfterCheck(void) { - GSU.pvRegisters[30] = (uint8_t) R15; - GSU.pvRegisters[31] = (uint8_t) (R15 >> 8); + WRITE_WORD(&GSU.pvRegisters[30], R15); } static void fx_writeRegisterSpaceAfterUse(void) { int32_t i; uint8_t* p = GSU.pvRegisters; - for (i = 0; i < 15; i++) - { - *p++ = (uint8_t)GSU.avReg[i]; - *p++ = (uint8_t)(GSU.avReg[i] >> 8); - } + for (i = 0; i < 15; i++, p += 2) + WRITE_WORD(p, GSU.avReg[i]); /* Update status register */ if (USEX16(GSU.vZero) == 0) @@ -179,13 +170,11 @@ static void fx_writeRegisterSpaceAfterUse(void) CF(CY); p = GSU.pvRegisters; - p[GSU_SFR] = (uint8_t) GSU.vStatusReg; - p[GSU_SFR + 1] = (uint8_t) (GSU.vStatusReg >> 8); + WRITE_WORD(&p[GSU_SFR], GSU.vStatusReg); p[GSU_PBR] = (uint8_t) GSU.vPrgBankReg; p[GSU_ROMBR] = (uint8_t)GSU.vRomBankReg; p[GSU_RAMBR] = (uint8_t)GSU.vRamBankReg; - p[GSU_CBR] = (uint8_t)GSU.vCacheBaseReg; - p[GSU_CBR + 1] = (uint8_t)(GSU.vCacheBaseReg >> 8); + WRITE_WORD(&p[GSU_CBR], GSU.vCacheBaseReg); } /* Reset the FxChip */ -- cgit v1.2.3 From a8f26919a2ecb204d6d5f120709c52b6cd320835 Mon Sep 17 00:00:00 2001 From: Jonathan Teh Date: Tue, 22 Feb 2022 23:57:22 +0000 Subject: dsp1: Use clz CPU instruction when normalising fixed-point numbers Use the gcc built-in function, making the operation constant-time and branch-free. --- source/dsp1emu.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/source/dsp1emu.c b/source/dsp1emu.c index 76df362..d83e078 100644 --- a/source/dsp1emu.c +++ b/source/dsp1emu.c @@ -187,11 +187,19 @@ void DSP1_Inverse(int16_t Coefficient, int16_t Exponent, int16_t* iCoefficient, } /* Step Three: Normalize */ +#ifdef __GNUC__ + { + const int shift = __builtin_clz(Coefficient) - (8 * sizeof(int) - 15); + Coefficient <<= shift; + Exponent -= shift; + } +#else while (Coefficient < 0x4000) { Coefficient <<= 1; Exponent--; } +#endif /* Step Four: Special Case */ if (Coefficient == 0x4000) @@ -336,9 +344,18 @@ int16_t DSP1_Cos(int16_t Angle) void DSP1_Normalize(int16_t m, int16_t* Coefficient, int16_t* Exponent) { - int16_t i = 0x4000; int16_t e = 0; +#ifdef __GNUC__ + int16_t n = m < 0 ? ~m : m; + + if (n == 0) + e = 15; + else + e = __builtin_clz(n) - (8 * sizeof(int) - 15); +#else + int16_t i = 0x4000; + if (m < 0) { while ((m & i) && i) @@ -355,6 +372,7 @@ void DSP1_Normalize(int16_t m, int16_t* Coefficient, int16_t* Exponent) e++; } } +#endif if (e > 0) *Coefficient = m * DSP1ROM[0x21 + e] << 1; @@ -368,9 +386,18 @@ void DSP1_NormalizeDouble(int32_t Product, int16_t* Coefficient, int16_t* Expone { int16_t n = Product & 0x7fff; int16_t m = Product >> 15; - int16_t i = 0x4000; int16_t e = 0; +#ifdef __GNUC__ + int16_t t = m < 0 ? ~m : m; + + if (t == 0) + e = 15; + else + e = __builtin_clz(t) - (8 * sizeof(int) - 15); +#else + int16_t i = 0x4000; + if (m < 0) { while ((m & i) && i) @@ -387,6 +414,7 @@ void DSP1_NormalizeDouble(int32_t Product, int16_t* Coefficient, int16_t* Expone e++; } } +#endif if (e > 0) { @@ -396,6 +424,14 @@ void DSP1_NormalizeDouble(int32_t Product, int16_t* Coefficient, int16_t* Expone *Coefficient += n * DSP1ROM[0x0040 - e] >> 15; else { +#ifdef __GNUC__ + t = m < 0 ? ~(n | 0x8000) : n; + + if (t == 0) + e += 15; + else + e += __builtin_clz(t) - (8 * sizeof(int) - 15); +#else i = 0x4000; if (m < 0) @@ -414,6 +450,7 @@ void DSP1_NormalizeDouble(int32_t Product, int16_t* Coefficient, int16_t* Expone e++; } } +#endif if (e > 15) *Coefficient = n * DSP1ROM[0x0012 + e] << 1; -- cgit v1.2.3