aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornotaz2011-11-14 23:58:15 +0200
committernotaz2011-11-14 23:58:15 +0200
commit30f6e5edd98efed9c1624205e5c40c383e0db966 (patch)
tree08d803a5e539d0acfd54e0b7115fe8e47e47f79d
parent587a5b1caa1ceb8ea085d4dec0d2b4fbf69226c1 (diff)
downloadpcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.tar.gz
pcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.tar.bz2
pcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.zip
frontend: use pld in blitters
preloading a couple of cachelines ahead seems to give best results, close to 30% speedup.
-rw-r--r--frontend/cspace_neon.s6
1 files changed, 6 insertions, 0 deletions
diff --git a/frontend/cspace_neon.s b/frontend/cspace_neon.s
index d131923..abc0381 100644
--- a/frontend/cspace_neon.s
+++ b/frontend/cspace_neon.s
@@ -13,11 +13,13 @@
.global bgr555_to_rgb565
bgr555_to_rgb565:
+ pld [r1]
mov r3, #0x07c0
vdup.16 q15, r3
subs r2, r2, #64
blt btr16_end64
0:
+ pld [r1, #64*2]
vldmia r1!, {q0-q3}
vshl.u16 q4, q0, #11
vshl.u16 q5, q1, #11
@@ -74,12 +76,14 @@ btr16_end16:
.global bgr888_to_rgb888
bgr888_to_rgb888:
+ pld [r1]
@ r2 /= 48
mov r2, r2, lsr #4
movw r3, #0x5556
movt r3, #0x5555
umull r12,r2, r3, r2
0:
+ pld [r1, #48*3]
vld3.8 {d0-d2}, [r1, :64]!
vld3.8 {d3-d5}, [r1, :64]!
vswp d0, d2
@@ -94,6 +98,7 @@ bgr888_to_rgb888:
.global bgr888_to_rgb565
bgr888_to_rgb565:
+ pld [r1]
@ r2 /= 48
mov r2, r2, lsr #4
movw r3, #0x5556
@@ -103,6 +108,7 @@ bgr888_to_rgb565:
mov r3, #0x07e0
vdup.16 q15, r3
0:
+ pld [r1, #48*3]
vld3.8 {d1-d3}, [r1, :64]!
vld3.8 {d5-d7}, [r1, :64]!