diff options
author | notaz | 2011-11-14 23:58:15 +0200 |
---|---|---|
committer | notaz | 2011-11-14 23:58:15 +0200 |
commit | 30f6e5edd98efed9c1624205e5c40c383e0db966 (patch) | |
tree | 08d803a5e539d0acfd54e0b7115fe8e47e47f79d | |
parent | 587a5b1caa1ceb8ea085d4dec0d2b4fbf69226c1 (diff) | |
download | pcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.tar.gz pcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.tar.bz2 pcsx_rearmed-30f6e5edd98efed9c1624205e5c40c383e0db966.zip |
frontend: use pld in blitters
preloading a couple of cachelines ahead seems to give best results,
close to 30% speedup.
-rw-r--r-- | frontend/cspace_neon.s | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/frontend/cspace_neon.s b/frontend/cspace_neon.s index d131923..abc0381 100644 --- a/frontend/cspace_neon.s +++ b/frontend/cspace_neon.s @@ -13,11 +13,13 @@ .global bgr555_to_rgb565 bgr555_to_rgb565: + pld [r1] mov r3, #0x07c0 vdup.16 q15, r3 subs r2, r2, #64 blt btr16_end64 0: + pld [r1, #64*2] vldmia r1!, {q0-q3} vshl.u16 q4, q0, #11 vshl.u16 q5, q1, #11 @@ -74,12 +76,14 @@ btr16_end16: .global bgr888_to_rgb888 bgr888_to_rgb888: + pld [r1] @ r2 /= 48 mov r2, r2, lsr #4 movw r3, #0x5556 movt r3, #0x5555 umull r12,r2, r3, r2 0: + pld [r1, #48*3] vld3.8 {d0-d2}, [r1, :64]! vld3.8 {d3-d5}, [r1, :64]! vswp d0, d2 @@ -94,6 +98,7 @@ bgr888_to_rgb888: .global bgr888_to_rgb565 bgr888_to_rgb565: + pld [r1] @ r2 /= 48 mov r2, r2, lsr #4 movw r3, #0x5556 @@ -103,6 +108,7 @@ bgr888_to_rgb565: mov r3, #0x07e0 vdup.16 q15, r3 0: + pld [r1, #48*3] vld3.8 {d1-d3}, [r1, :64]! vld3.8 {d5-d7}, [r1, :64]! |