From 788f5e89c29daab31f1099f081ca92d72e507bf1 Mon Sep 17 00:00:00 2001 From: Justin Weiss Date: Thu, 27 Feb 2020 23:42:43 -0800 Subject: WIP: Add ARM-assembly versions of lighting and blending --- plugins/gpu_unai/gpu_inner_blend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plugins/gpu_unai/gpu_inner_blend.h') diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h index 93c268b..a469541 100644 --- a/plugins/gpu_unai/gpu_inner_blend.h +++ b/plugins/gpu_unai/gpu_inner_blend.h @@ -37,7 +37,7 @@ // Where '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u16 gpuBlending(u16 uSrc, u16 uDst) +GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst) { // These use Blargg's bitwise modulo-clamping: // http://blargg.8bitalley.com/info/rgb_mixing.html -- cgit v1.2.3 From 92eab56ae97cb8cd2a198f960b61cc8f1086eaf8 Mon Sep 17 00:00:00 2001 From: Justin Weiss Date: Sun, 1 Mar 2020 15:40:25 -0800 Subject: Preserve uSrc MSB across lighting and blending This saves a few cycles because gcc stores / loads srcMSB when using ARM-optimized lighting. --- plugins/gpu_unai/gpu_inner_blend.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'plugins/gpu_unai/gpu_inner_blend.h') diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h index a469541..febc7ed 100644 --- a/plugins/gpu_unai/gpu_inner_blend.h +++ b/plugins/gpu_unai/gpu_inner_blend.h @@ -37,14 +37,14 @@ // Where '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst) +GPU_INLINE uint_fast16_t gpuBlendingGeneric(uint_fast16_t uSrc, uint_fast16_t uDst) { // These use Blargg's bitwise modulo-clamping: // http://blargg.8bitalley.com/info/rgb_mixing.html // http://blargg.8bitalley.com/info/rgb_clamped_add.html // http://blargg.8bitalley.com/info/rgb_clamped_sub.html - u16 mix; + uint_fast16_t mix; // 0.5 x Back + 0.5 x Forward if (BLENDMODE==0) { @@ -113,7 +113,7 @@ GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst) // ^ bit 31 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// -GPU_INLINE u32 gpuGetRGB24(u16 uSrc) +GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc) { return ((uSrc & 0x7C00)<<14) | ((uSrc & 0x03E0)<< 9) @@ -137,7 +137,7 @@ GPU_INLINE u32 gpuGetRGB24(u16 uSrc) // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care //////////////////////////////////////////////////////////////////////////////// template -GPU_INLINE u32 gpuBlending24(u32 uSrc24, u16 uDst) +GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst) { // These use techniques adapted from Blargg's techniques mentioned in // in gpuBlending() comments above. Not as much bitwise trickery is -- cgit v1.2.3