aboutsummaryrefslogtreecommitdiff
path: root/backends/platform/psp/memory.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'backends/platform/psp/memory.cpp')
-rw-r--r--backends/platform/psp/memory.cpp457
1 files changed, 368 insertions, 89 deletions
diff --git a/backends/platform/psp/memory.cpp b/backends/platform/psp/memory.cpp
index e134a7d0f4..29d0482d9a 100644
--- a/backends/platform/psp/memory.cpp
+++ b/backends/platform/psp/memory.cpp
@@ -35,129 +35,408 @@
#include "backends/platform/psp/trace.h"
-void Copier::copy(byte *dst, const byte *src, uint32 bytes, PSPPixelFormat *format /* = NULL */) {
+//#define TEST_MEMORY_COPY
+
+extern "C" {
+
+#ifdef TEST_MEMORY_COPY /* we won't be able to run in this case b/c of printouts */
+extern void *__real_memcpy(void *dst, void *src, size_t bytes);
+#endif
+
+void *__wrap_memcpy(void *dst, void *src, size_t bytes) {
+#ifdef TEST_MEMORY_COPY /* we won't be able to run in this case */
+ return __real_memcpy(dst, src, bytes);
+#else
+ PspMemory::fastCopy((byte *)dst, (byte *)src, bytes);
+ return dst;
+#endif
+}
+
+}
+
+void PspMemory::copy(byte *dst, const byte *src, uint32 bytes) {
DEBUG_ENTER_FUNC();
- uint32 prefixDst = (((uint32)dst) & 0x3);
- prefixDst = prefixDst ? 4 - prefixDst : 0; // prefix only if we have address % 4 != 0
- uint32 prefixSrc = (((uint32)src) & 0x3);
- prefixSrc = prefixSrc ? 4 - prefixSrc : 0; // prefix only if we have address % 4 != 0
- uint32 *dst32, *src32;
- bool swapRB = format ? format->swapRB : false; // take swap value from pixelformat if it's given
-#ifdef __PSP_DEBUG_PRINT__
+#ifdef TEST_MEMORY_COPY
uint32 debugBytes = bytes;
const byte *debugDst = dst, *debugSrc = src;
#endif
- uint32 words, remainingBytes;
- //PSP_DEBUG_PRINT("dst[%p], src[%p], bytes[%d], swap[%s], prefixDst[%u], prefixSrc[%u]\n", dst, src, bytes, swapRB ? "true" : "false", prefixDst, prefixSrc);
+ PSP_DEBUG_PRINT("copy(): dst[%p], src[%p], bytes[%d]\n", dst, src, bytes);
- if (prefixDst || prefixSrc) { // we're not aligned to word boundaries
- if (prefixDst != prefixSrc) { // worst case: we can never be aligned. this mode is highly inefficient. try to get engines not to use this mode too much
- PSP_DEBUG_PRINT("misaligned copy of %u bytes from %p to %p\n", bytes, src, dst);
- if ((prefixDst & 1) || (prefixSrc & 1))
- copy8(dst, src, bytes); // no swap is possible on 8 bit
- else
- copy16((uint16 *)dst, (uint16 *)src, bytes, format);
-
- goto test;
- }
-
- // Do the prefix: the part to get us aligned
- if (prefixDst & 1) { // byte
- copy8(dst, src, prefixDst); // no swap available
- } else { // short
- copy16((uint16 *)dst, (uint16 *)src, prefixDst, format);
- }
- if (bytes > prefixDst) // check that we can afford to subtract from bytes
- bytes -= prefixDst;
- else {
+ // align the destination pointer first
+ uint32 prefixDst = (((uint32)dst) & 0x3);
+
+ if (prefixDst) {
+ prefixDst = 4 - prefixDst; // prefix only if we have address % 4 != 0
+ PSP_DEBUG_PRINT("prefixDst[%d]\n", prefixDst);
+
+ bytes -= prefixDst; // remember we assume bytes >= 4
+
+ if (bytes < MIN_AMOUNT_FOR_COMPLEX_COPY) { // check if it's worthwhile to continue
+ copy8(dst, src, bytes + prefixDst);
+#ifdef TEST_MEMORY_COPY
+ testCopy(debugDst, debugSrc, debugBytes);
+#endif
return;
}
- dst32 = (uint32 *)(dst + prefixDst);
- src32 = (uint32 *)(src + prefixSrc);
- } else { // We're aligned to word boundaries
- dst32 = (uint32 *)dst;
- src32 = (uint32 *)src;
+
+ while (prefixDst--) {
+ *dst++ = *src++;
+ }
}
-
- words = bytes >> 2;
- remainingBytes = bytes & 0x3;
-
- if (swapRB) { // need to swap
- for (; words > 0; words--) {
- *dst32 = format->swapRedBlue32(*src32);
- dst32++;
- src32++;
- }
- } else { // no swapping
- for (; words > 0; words--) {
- *dst32 = *src32;
- dst32++;
- src32++;
- }
+
+ // check the source pointer alignment now
+ uint32 alignSrc = (((uint32)src) & 0x3);
+
+ if (alignSrc) { // we'll need to realign our reads
+ copy32Misaligned((uint32 *)dst, src, bytes, alignSrc);
+ } else {
+ copy32Aligned((uint32 *)dst, (uint32 *)src, bytes);
}
- // Do any remaining bytes
- if (remainingBytes) {
- if (remainingBytes & 1) // we have bytes left
- copy8((byte *)dst32, (byte *)src32, remainingBytes);
- else // 16bits left
- copy16((uint16*)dst32, (uint16 *)src32, remainingBytes, format);
- }
+#ifdef TEST_MEMORY_COPY
+ testCopy(debugDst, debugSrc, debugBytes);
+#endif
+}
-test:
- // debug
-#ifdef __PSP_DEBUG_PRINT__
+void PspMemory::testCopy(const byte *debugDst, const byte *debugSrc, uint32 debugBytes) {
+
bool mismatch = false;
+ PSP_INFO_PRINT("testing fastCopy...");
for (uint32 i = 0; i < debugBytes; i++) {
if (debugDst[i] != debugSrc[i]) {
- if (mismatch == false) {
- PSP_DEBUG_PRINT_SAMELN("mismatch in copy:\n");
- PSP_DEBUG_PRINT("dst[%p], src[%p], bytes[%u], swap[%s], prefixDst[%u], prefixSrc[%u]\n", debugDst, debugSrc, debugBytes, swapRB ? "true" : "false", prefixDst, prefixSrc);
+ if (!mismatch) {
+ PSP_INFO_PRINT("**** mismatch in copy! ****\n");
+ PSP_INFO_PRINT("dst[%p], src[%p], bytes[%u]\n", debugDst, debugSrc, debugBytes);
mismatch = true;
}
- PSP_DEBUG_PRINT_SAMELN("%x!=%x ", debugSrc[i], debugDst[i]);
+ PSP_INFO_PRINT("[%d]%x!=%x ", i, debugSrc[i], debugDst[i]);
}
}
- if (mismatch)
- PSP_DEBUG_PRINT("\n");
+ if (mismatch) {
+ PSP_INFO_PRINT("\n");
+ } else {
+ PSP_INFO_PRINT("ok\n");
+ }
+}
+
+//
+// used to swap red and blue
+void PspMemory::swap(uint16 *dst16, const uint16 *src16, uint32 bytes, PSPPixelFormat &format) {
+ DEBUG_ENTER_FUNC();
+
+#ifdef TEST_MEMORY_COPY
+ uint32 debugBytes = bytes;
+ const uint16 *debugDst = dst16, *debugSrc = src16;
#endif
+
+ // align the destination pointer first
+ uint32 prefixDst = (((uint32)dst16) & 0x3); // for swap, we can only have 2 or 0 as our prefix
+
+ if (prefixDst) {
+ bytes -= prefixDst; // remember we assume bytes > 4
+ *dst16++ = format.swapRedBlue16(*src16++);
+
+ if (bytes < MIN_AMOUNT_FOR_COMPLEX_COPY) { // check if it's worthwhile to continue
+ swap16(dst16, src16, bytes, format);
+
+#ifdef TEST_MEMORY_COPY
+ testSwap(debugDst, debugSrc, debugBytes, format);
+#endif
+ return;
+ }
+ }
+
+ // check the source pointer alignment now
+ uint32 alignSrc = (((uint32)src16) & 0x3);
+
+ if (alignSrc) { // we'll need to realign our reads
+ PSP_DEBUG_PRINT("misaligned copy of %u bytes from %p to %p\n", bytes, src16, dst16);
+ swap32Misaligned((uint32 *)dst16, src16, bytes, format);
+ } else {
+ swap32Aligned((uint32 *)dst16, (const uint32 *)src16, bytes, format);
+ }
+
+#ifdef TEST_MEMORY_COPY
+ testSwap(debugDst, debugSrc, debugBytes, format);
+#endif
+
+}
+
+void PspMemory::testSwap(const uint16 *debugDst, const uint16 *debugSrc, uint32 debugBytes, PSPPixelFormat &format) {
+
+ bool mismatch = false;
+ PSP_INFO_PRINT("testing fastSwap...");
+
+ uint32 shorts = debugBytes >> 1;
+
+ for (uint32 i = 0; i < shorts; i++) {
+ if (debugDst[i] != format.swapRedBlue16(debugSrc[i])) {
+ if (!mismatch) {
+ PSP_INFO_PRINT("**** mismatch in swap! ****\n");
+ PSP_INFO_PRINT("dst[%p], src[%p], bytes[%u]\n", debugDst, debugSrc, debugBytes);
+ mismatch = true;
+ }
+ PSP_INFO_PRINT("[%d]%x!=%x ", i<<1, format.swapRedBlue16(debugSrc[i]), debugDst[i]);
+ }
+ }
+ if (mismatch) {
+ PSP_INFO_PRINT("\n");
+ } else {
+ PSP_INFO_PRINT("ok\n");
+ }
+}
+
+
+void PspMemory::copy32Aligned(uint32 *dst32, const uint32 *src32, uint32 bytes) {
+ PSP_DEBUG_PRINT("copy32Aligned(): dst32[%p], src32[%p], bytes[%d]\n", dst32, src32, bytes);
+
+ int words8 = bytes >> 5;
+
+ // try blocks of 8 words at a time
+ if (words8) {
+ while (words8--) {
+ uint32 a, b, c, d;
+ a = src32[0];
+ b = src32[1];
+ c = src32[2];
+ d = src32[3];
+ dst32[0] = a;
+ dst32[1] = b;
+ dst32[2] = c;
+ dst32[3] = d;
+ a = src32[4];
+ b = src32[5];
+ c = src32[6];
+ d = src32[7];
+ dst32[4] = a;
+ dst32[5] = b;
+ dst32[6] = c;
+ dst32[7] = d;
+ dst32 += 8;
+ src32 += 8;
+ }
+ }
+
+ int words4 = (bytes & 0x1F) >> 4;
+
+ // try blocks of 4 words at a time
+ if (words4) {
+ uint32 a, b, c, d;
+ a = src32[0];
+ b = src32[1];
+ c = src32[2];
+ d = src32[3];
+ dst32[0] = a;
+ dst32[1] = b;
+ dst32[2] = c;
+ dst32[3] = d;
+ dst32 += 4;
+ src32 += 4;
+ }
+
+ int bytesLeft = (bytes & 0xF); // only look at bytes left after we did the above
+ int wordsLeft = bytesLeft >> 2;
+
+ // now just do single words
+ while (wordsLeft) {
+ *dst32++ = *src32++;
+ wordsLeft--;
+ }
- return; // So we have something to jump to with the label
+ bytesLeft = bytes & 0x3; // get remaining bytes
+
+ PSP_DEBUG_PRINT("bytesLeft[%d]\n", bytesLeft);
+
+ byte *dst = (byte *)dst32;
+ byte *src = (byte *)src32;
+
+ while (bytesLeft--) {
+ *dst++ = *src++;
+ }
}
-inline void Copier::copy8(byte *dst, const byte *src, uint32 bytes) {
- for (; bytes > 0; bytes--) {
- *dst = *src;
- dst++;
- src++;
+void PspMemory::swap32Aligned(uint32 *dst32, const uint32 *src32, uint32 bytes, PSPPixelFormat &format) {
+ DEBUG_ENTER_FUNC();
+ int words4 = bytes >> 4;
+
+ // try blocks of 4 words at a time
+ while (words4--) {
+ uint32 a, b, c, d;
+ a = format.swapRedBlue32(src32[0]);
+ b = format.swapRedBlue32(src32[1]);
+ c = format.swapRedBlue32(src32[2]);
+ d = format.swapRedBlue32(src32[3]);
+ dst32[0] = a;
+ dst32[1] = b;
+ dst32[2] = c;
+ dst32[3] = d;
+ dst32 += 4;
+ src32 += 4;
+ }
+
+ uint32 bytesLeft = bytes & 0xF;
+ uint32 words = bytesLeft >> 2;
+
+ // now just do words
+ while (words--) {
+ *dst32++ = format.swapRedBlue32(*src32++);
+ }
+
+ bytesLeft = bytes & 0x3;
+
+ if (bytesLeft) { // for swap, can only be 1 short left
+ *((uint16 *)dst32) = format.swapRedBlue16(*((uint16 *)src32));
}
}
-inline void Copier::copy16(uint16 *dst, const uint16 *src, uint32 bytes, PSPPixelFormat *format /* = NULL */) {
- uint32 shorts = bytes >> 1;
- uint32 remainingBytes = bytes & 1;
- bool swapRB = format ? format->swapRB : false;
- if (swapRB) {
- for (; shorts > 0 ; shorts--) {
- *dst = format->swapRedBlue16(*src);
- dst++;
- src++;
+// More challenging -- need to shift
+// Assume dst is aligned
+void PspMemory::copy32Misaligned(uint32 *dst32, const byte *src, uint32 bytes, uint32 alignSrc) {
+ PSP_DEBUG_PRINT("copy32Misaligned: dst32[%p], src[%p], bytes[%d], alignSrc[%d]\n", dst32, src, bytes, alignSrc);
+
+ uint32 *src32 = (uint32 *)(((uint32)src) & 0xFFFFFFFC); // remove misalignment
+ uint32 shiftValue, lastShiftValue;
+
+ switch (alignSrc) {
+ case 1:
+ shiftValue = 8;
+ lastShiftValue = 24;
+ break;
+ case 2:
+ shiftValue = 16;
+ lastShiftValue = 16;
+ break;
+ default: /* 3 */
+ shiftValue = 24;
+ lastShiftValue = 8;
+ break;
+ }
+
+ uint32 dstWord, srcWord;
+
+ // Try to do groups of 4 words
+ uint32 words4 = bytes >> 4;
+
+ srcWord = *src32; // preload 1st word so we read ahead
+
+ for (; words4; words4--) {
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[1];
+ dstWord |= srcWord << lastShiftValue;
+ dst32[0] = dstWord;
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[2];
+ dstWord |= srcWord << lastShiftValue;
+ dst32[1] = dstWord;
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[3];
+ dstWord |= srcWord << lastShiftValue;
+ dst32[2] = dstWord;
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[4];
+ dstWord |= srcWord << lastShiftValue;
+ dst32[3] = dstWord;
+ src32 += 4;
+ dst32 += 4;
+ }
+
+ uint32 words = (bytes & 0xF) >> 2; // now get remaining words
+
+ // we read one word ahead of what we write
+ // setup the first read
+
+ for (; words ;words--) {
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[1]; // we still go one ahead
+ src32++;
+ dstWord |= srcWord << lastShiftValue;
+ *dst32++ = dstWord;
+ }
+
+ uint32 bytesLeft = bytes & 3; // and remaining bytes
+
+ if (bytesLeft) {
+ byte *dst8 = (byte *)dst32;
+ byte *src8 = ((byte *)src32) + ((uint32)src & 0x3); // get exact location we should be at
+
+ for(; bytesLeft; bytesLeft--) {
+ *dst8++ = *src8++;
}
- } else {
- for (; shorts > 0 ; shorts--) {
- *dst = *src;
- dst++;
- src++;
+ }
+}
+
+// More challenging -- need to shift
+// We assume dst is aligned
+void PspMemory::swap32Misaligned(uint32 *dst32, const uint16 *src16, uint32 bytes, PSPPixelFormat &format) {
+ DEBUG_ENTER_FUNC();
+
+ const uint32 shiftValue = 16;
+ uint32 *src32 = (uint32 *)(((uint32)src16) & 0xFFFFFFFC); // remove misalignment
+
+ // Try to do groups of 4 words
+ uint32 words4 = bytes >> 4;
+ uint32 srcWord = src32[0]; // preload
+
+ while (words4--) {
+ uint32 dstWord = srcWord >> shiftValue;
+ srcWord = src32[1];
+ dstWord |= srcWord << shiftValue;
+ dst32[0] = format.swapRedBlue32(dstWord);
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[2];
+ dstWord |= srcWord << shiftValue;
+ dst32[1] = format.swapRedBlue32(dstWord);
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[3];
+ dstWord |= srcWord << shiftValue;
+ dst32[2] = format.swapRedBlue32(dstWord);
+ dstWord = srcWord >> shiftValue;
+ srcWord = src32[4];
+ dstWord |= srcWord << shiftValue;
+ dst32[3] = format.swapRedBlue32(dstWord);
+ src32 += 4;
+ dst32 += 4;
+ }
+
+ uint32 words = (bytes & 0xF) >> 2;
+
+ // we read one word ahead of what we write
+ // setup the first read
+ if (words) {
+ //srcWord = *src32++; // don't need this. already loaded
+ src32++; // we already have the value loaded in
+
+ while (words--) {
+ uint32 dstWord = srcWord >> shiftValue;
+ srcWord = *src32++;
+ dstWord |= srcWord << shiftValue;
+ *dst32++ = format.swapRedBlue32(dstWord);
}
}
- if (remainingBytes)
- *(byte *)dst = *(byte *)src;
+
+ uint32 bytesLeft = bytes & 3;
+
+ if (bytesLeft) { // for swap, can only be 1 short left
+ *((uint16 *)dst32) = format.swapRedBlue16((uint16)(srcWord >> shiftValue));
+ }
}
+inline void PspMemory::copy16(uint16 *dst16, const uint16 *src16, uint32 bytes) {
+ PSP_DEBUG_PRINT("copy16(): dst16[%p], src16[%p], bytes[%d]\n", dst16, src16, bytes);
+
+ uint32 shorts = bytes >> 1;
+ uint32 remainingBytes = bytes & 1;
+
+ for (; shorts > 0 ; shorts--) {
+ *dst16++ = *src16++;
+ }
+ if (remainingBytes)
+ *(byte *)dst16 = *(byte *)src16;
+}
// Class VramAllocator -----------------------------------