From 92eab56ae97cb8cd2a198f960b61cc8f1086eaf8 Mon Sep 17 00:00:00 2001 From: Justin Weiss Date: Sun, 1 Mar 2020 15:40:25 -0800 Subject: Preserve uSrc MSB across lighting and blending This saves a few cycles because gcc stores / loads srcMSB when using ARM-optimized lighting. --- plugins/gpu_unai/gpu_inner_blend_arm.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'plugins/gpu_unai/gpu_inner_blend_arm.h') diff --git a/plugins/gpu_unai/gpu_inner_blend_arm.h b/plugins/gpu_unai/gpu_inner_blend_arm.h index 4db105a..5ddbdbb 100644 --- a/plugins/gpu_unai/gpu_inner_blend_arm.h +++ b/plugins/gpu_unai/gpu_inner_blend_arm.h @@ -15,7 +15,7 @@ // Where '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u16 gpuBlendingARM(u16 uSrc, u16 uDst) +GPU_INLINE uint_fast16_t gpuBlendingARM(uint_fast16_t uSrc, uint_fast16_t uDst) { // These use Blargg's bitwise modulo-clamping: // http://blargg.8bitalley.com/info/rgb_mixing.html @@ -23,7 +23,7 @@ GPU_INLINE u16 gpuBlendingARM(u16 uSrc, u16 uDst) // http://blargg.8bitalley.com/info/rgb_clamped_sub.html - u16 mix; + uint_fast16_t mix; asm ("bic %[uDst], %[uDst], #0x8000" : [uDst] "+r" (uDst)); @@ -89,6 +89,12 @@ GPU_INLINE u16 gpuBlendingARM(u16 uSrc, u16 uDst) : [diff] "=&r" (diff), [mix] "=&r" (mix) : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x8420)); } + + // There's not a case where we can get into this function, + // SKIP_USRC_MSB_MASK is false, and the msb of uSrc is unset. + if (!SKIP_USRC_MSB_MASK) { + asm ("orr %[mix], %[mix], #0x8000" : [mix] "+r" (mix)); + } return mix; } -- cgit v1.2.3