From f385752705de73b04cbbda735a71f14c19e241a6 Mon Sep 17 00:00:00 2001 From: Nebuleon Fumika Date: Fri, 1 Feb 2013 00:33:30 -0500 Subject: memcpy vs memmove: memmove correctly handles overlapping source and destination memory buffers, but is slower than memcpy in many implementations. When memory buffers don't overlap, memcpy may be more efficient. The DS2 SDK is such an implementation, so change many memmoves into memcpys. --- source/gfx.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'source/gfx.cpp') diff --git a/source/gfx.cpp b/source/gfx.cpp index d2b8b6e..1b429c6 100644 --- a/source/gfx.cpp +++ b/source/gfx.cpp @@ -154,10 +154,12 @@ extern uint8 Mode7Depths [2]; if (IPPU.DoubleHeightPixels && ((PPU.BGMode != 5 && PPU.BGMode != 6) || !IPPU.Interlace)) \ for (uint32 y = GFX.StartY; y <= GFX.EndY; y++) \ { \ - memmove (SCREEN + (y * 2 + 1) * GFX.Pitch2, \ - SCREEN + y * 2 * GFX.Pitch2, \ - GFX.Pitch2); \ + /* memmove converted: Same malloc, non-overlapping addresses [Neb] */ \ + memcpy (SCREEN + (y * 2 + 1) * GFX.Pitch2, \ + SCREEN + y * 2 * GFX.Pitch2, \ + GFX.Pitch2); \ if(DO_DEPTH){ \ + /* memmove required: Same malloc, potentially overlapping addresses [Neb] */ \ memmove (DEPTH + (y * 2 + 1) * (GFX.PPLx2>>1), \ DEPTH + y * GFX.PPL, \ GFX.PPLx2>>1); \ @@ -1207,6 +1209,7 @@ static void DrawOBJS (bool8 OnMain = FALSE, uint8 D = 0) if(jLeft[clip][4]){ Windows[j].Value = TRUE; } else { + // memmove required: Overlapping addresses [Neb] if(jLeft[clip][4]; Windows[j].Value = TRUE; @@ -1214,6 +1217,7 @@ static void DrawOBJS (bool8 OnMain = FALSE, uint8 D = 0) } for(j=0; jRight[clip][4]; j++); if(j>=i || Windows[j].Pos!=GFX.pCurrentClip->Right[clip][4]){ + // memmove required: Overlapping addresses [Neb] if(jRight[clip][4]; Windows[j].Value = FALSE; @@ -3723,10 +3727,13 @@ void S9xUpdateScreen () // part way down the screen. Scale everything. for (register int32 y = (int32) GFX.StartY - 1; y >= 0; y--) { - memmove (GFX.Screen + y * 2 * GFX.Pitch2, + // memmove converted: Same malloc, different addresses, and identical addresses at line 0 [Neb] + // DS2 DMA notes: This code path is unused [Neb] + memcpy (GFX.Screen + y * 2 * GFX.Pitch2, GFX.Screen + y * GFX.Pitch2, GFX.Pitch2); - memmove (GFX.Screen + (y * 2 + 1) * GFX.Pitch2, + // memmove converted: Same malloc, different addresses [Neb] + memcpy (GFX.Screen + (y * 2 + 1) * GFX.Pitch2, GFX.Screen + y * GFX.Pitch2, GFX.Pitch2); } -- cgit v1.2.3 From b3a7f8f1fceddcd45ec62bcbf75ba128e4f84f5a Mon Sep 17 00:00:00 2001 From: Nebuleon Fumika Date: Sun, 3 Feb 2013 19:26:34 -0500 Subject: Synchronise the controller status more spread out inside a rendered frame: * before rendering a background; * before rendering sprites; * while rendering more than 128 samples of audio at once ("Prefer fluid video"); * after every 16 scanlines of CPU execution instead of every 1; * while waiting for an audio buffer to become available; * while killing time between frames with fast-forward disabled. Controller presses and releases are now combined in a DS button bitfield using a shorter 32-bit algorithm. See entry.cpp:NDSSFCAccumulateJoypad and #define ACCUMULATE_JOYPAD in the source. This is still not suitable for playing platformers frame-perfectly, but it's much better than half a second of latency to press or release a button, and one still needs to press buttons a bit more than just light taps. I'd say 50 milliseconds is the latency now. Platformers requiring more precision can be played with frameskip 0. DMA does not require double-buffered displaying, so synchronise the controller more often by disabling double-buffered displaying again. --- source/gfx.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'source/gfx.cpp') diff --git a/source/gfx.cpp b/source/gfx.cpp index 1b429c6..9ea0a97 100644 --- a/source/gfx.cpp +++ b/source/gfx.cpp @@ -1169,6 +1169,14 @@ void S9xSetupOBJ () static void DrawOBJS (bool8 OnMain = FALSE, uint8 D = 0) { +#ifdef ACCUMULATE_JOYPAD +/* + * This call allows NDSSFC to synchronise the DS controller more often. + * If porting a later version of Snes9x into NDSSFC, it is essential to + * preserve it. + */ + NDSSFCAccumulateJoypad (); +#endif #ifdef MK_DEBUG_RTO if(Settings.BGLayering) fprintf(stderr, "Entering DrawOBJS() for %d-%d\n", GFX.StartY, GFX.EndY); #endif @@ -2314,6 +2322,14 @@ static void DrawBackgroundMode5 (uint32 /* BGMODE */, uint32 bg, uint8 Z1, uint8 static void DrawBackground (uint32 BGMode, uint32 bg, uint8 Z1, uint8 Z2) { +#ifdef ACCUMULATE_JOYPAD +/* + * This call allows NDSSFC to synchronise the DS controller more often. + * If porting a later version of Snes9x into NDSSFC, it is essential to + * preserve it. + */ + NDSSFCAccumulateJoypad (); +#endif GFX.PixSize = 1; BG.TileSize = BGSizes [PPU.BG[bg].BGSize]; -- cgit v1.2.3