From 788f5e89c29daab31f1099f081ca92d72e507bf1 Mon Sep 17 00:00:00 2001
From: Justin Weiss
Date: Thu, 27 Feb 2020 23:42:43 -0800
Subject: WIP: Add ARM-assembly versions of lighting and blending

---
 plugins/gpu_unai/gpu_inner_blend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'plugins/gpu_unai/gpu_inner_blend.h')
diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h
index 93c268b..a469541 100644
--- a/plugins/gpu_unai/gpu_inner_blend.h
+++ b/plugins/gpu_unai/gpu_inner_blend.h
@@ -37,7 +37,7 @@
 // Where '0' is zero-padding, and '-' is don't care
 ////////////////////////////////////////////////////////////////////////////////
 template <int BLENDMODE, bool SKIP_USRC_MSB_MASK>
-GPU_INLINE u16 gpuBlending(u16 uSrc, u16 uDst)
+GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst)
 {
 	// These use Blargg's bitwise modulo-clamping:
 	//  http://blargg.8bitalley.com/info/rgb_mixing.html
-- 
cgit v1.2.3


From 92eab56ae97cb8cd2a198f960b61cc8f1086eaf8 Mon Sep 17 00:00:00 2001
From: Justin Weiss
Date: Sun, 1 Mar 2020 15:40:25 -0800
Subject: Preserve uSrc MSB across lighting and blending

This saves a few cycles because gcc stores / loads srcMSB when
using ARM-optimized lighting.
---
 plugins/gpu_unai/gpu_inner_blend.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'plugins/gpu_unai/gpu_inner_blend.h')

diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h
index a469541..febc7ed 100644
--- a/plugins/gpu_unai/gpu_inner_blend.h
+++ b/plugins/gpu_unai/gpu_inner_blend.h
@@ -37,14 +37,14 @@
 // Where '0' is zero-padding, and '-' is don't care
 ////////////////////////////////////////////////////////////////////////////////
 template <int BLENDMODE, bool SKIP_USRC_MSB_MASK>
-GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst)
+GPU_INLINE uint_fast16_t gpuBlendingGeneric(uint_fast16_t uSrc, uint_fast16_t uDst)
 {
 	// These use Blargg's bitwise modulo-clamping:
 	//  http://blargg.8bitalley.com/info/rgb_mixing.html
 	//  http://blargg.8bitalley.com/info/rgb_clamped_add.html
 	//  http://blargg.8bitalley.com/info/rgb_clamped_sub.html
 
-	u16 mix;
+	uint_fast16_t mix;
 
 	// 0.5 x Back + 0.5 x Forward
 	if (BLENDMODE==0) {
@@ -113,7 +113,7 @@ GPU_INLINE u16 gpuBlendingGeneric(u16 uSrc, u16 uDst)
 //                     ^ bit 31
 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
 ////////////////////////////////////////////////////////////////////////////////
-GPU_INLINE u32 gpuGetRGB24(u16 uSrc)
+GPU_INLINE u32 gpuGetRGB24(uint_fast16_t uSrc)
 {
 	return ((uSrc & 0x7C00)<<14)
 	     | ((uSrc & 0x03E0)<< 9)
@@ -137,7 +137,7 @@ GPU_INLINE u32 gpuGetRGB24(u16 uSrc)
 // Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care
 ////////////////////////////////////////////////////////////////////////////////
 template <int BLENDMODE>
-GPU_INLINE u32 gpuBlending24(u32 uSrc24, u16 uDst)
+GPU_INLINE u32 gpuBlending24(u32 uSrc24, uint_fast16_t uDst)
 {
 	// These use techniques adapted from Blargg's techniques mentioned in
 	//  in gpuBlending() comments above. Not as much bitwise trickery is
-- 
cgit v1.2.3