aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_unai/gpu_fixedpoint.h
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_unai/gpu_fixedpoint.h')
-rw-r--r--plugins/gpu_unai/gpu_fixedpoint.h107
1 files changed, 55 insertions, 52 deletions
diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h
index e72fda1..5df42cf 100644
--- a/plugins/gpu_unai/gpu_fixedpoint.h
+++ b/plugins/gpu_unai/gpu_fixedpoint.h
@@ -21,60 +21,73 @@
#ifndef FIXED_H
#define FIXED_H
-#include "arm_features.h"
-
typedef s32 fixed;
-#ifdef GPU_TABLE_10_BITS
-#define TABLE_BITS 10
-#else
-#define TABLE_BITS 16
-#endif
-
-#define FIXED_BITS 16
+//senquack - The gpu_drhell poly routines I adapted use 22.10 fixed point,
+// while original Unai used 16.16: (see README_senquack.txt)
+//#define FIXED_BITS 16
+#define FIXED_BITS 10
#define fixed_ZERO ((fixed)0)
#define fixed_ONE ((fixed)1<<FIXED_BITS)
#define fixed_TWO ((fixed)2<<FIXED_BITS)
#define fixed_HALF ((fixed)((1<<FIXED_BITS)>>1))
-// big precision inverse table.
-s32 s_invTable[(1<<TABLE_BITS)];
+#define fixed_LOMASK ((fixed)((1<<FIXED_BITS)-1))
+#define fixed_HIMASK ((fixed)(~fixed_LOMASK))
+
+// int<->fixed conversions:
+#define i2x(x) ((x)<<FIXED_BITS)
+#define x2i(x) ((x)>>FIXED_BITS)
+
+INLINE fixed FixedCeil(const fixed x)
+{
+ return (x + (fixed_ONE - 1)) & fixed_HIMASK;
+}
-INLINE fixed i2x(const int _x) { return ((_x)<<FIXED_BITS); }
-INLINE fixed x2i(const fixed _x) { return ((_x)>>FIXED_BITS); }
+INLINE s32 FixedCeilToInt(const fixed x)
+{
+ return (x + (fixed_ONE - 1)) >> FIXED_BITS;
+}
-/*
-INLINE u32 Log2(u32 _a)
+//senquack - float<->fixed conversions:
+#define f2x(x) ((s32)((x) * (float)(1<<FIXED_BITS)))
+#define x2f(x) ((float)(x) / (float)(1<<FIXED_BITS))
+
+//senquack - floating point reciprocal:
+//NOTE: These assume x is always != 0 !!!
+#ifdef GPU_UNAI_USE_FLOATMATH
+#if defined(_MIPS_ARCH_MIPS32R2) || (__mips == 64)
+INLINE float FloatInv(const float x)
+{
+ float res;
+ asm("recip.s %0,%1" : "=f" (res) : "f" (x));
+ return res;
+}
+#else
+INLINE float FloatInv(const float x)
{
- u32 c = 0; // result of log2(v) will go here
- if (_a & 0xFFFF0000) { _a >>= 16; c |= 16; }
- if (_a & 0xFF00) { _a >>= 8; c |= 8; }
- if (_a & 0xF0) { _a >>= 4; c |= 4; }
- if (_a & 0xC) { _a >>= 2; c |= 2; }
- if (_a & 0x2) { _a >>= 1; c |= 1; }
- return c;
+ return (1.0f / x);
}
-*/
+#endif
+#endif
-#ifdef HAVE_ARMV5
+///////////////////////////////////////////////////////////////////////////
+// --- BEGIN INVERSE APPROXIMATION SECTION ---
+///////////////////////////////////////////////////////////////////////////
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+
+// big precision inverse table.
+#define TABLE_BITS 16
+s32 s_invTable[(1<<TABLE_BITS)];
+
+//senquack - MIPS32 happens to have same instruction/format:
+#if defined(__arm__) || (__mips == 32)
INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
#else
INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; }
#endif
-#ifdef GPU_TABLE_10_BITS
-INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
-{
- u32 uD = (_b<0) ? -_b : _b ;
- u32 uLog = Log2(uD);
- uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0;
- u32 uDen = uD>>uLog;
- iFactor_ = s_invTable[uDen];
- iFactor_ = (_b<0) ? -iFactor_ :iFactor_;
- iShift_ = 15+uLog;
-}
-#else
INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
u32 uD = (_b<0) ? -_b : _b;
@@ -82,10 +95,12 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
u32 uLog = Log2(uD);
uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0;
- u32 uDen = (uD>>uLog)-1;
+ u32 uDen = (uD>>uLog);
iFactor_ = s_invTable[uDen];
iFactor_ = (_b<0) ? -iFactor_ :iFactor_;
- iShift_ = 15+uLog;
+ //senquack - Adapted to 22.10 fixed point (originally 16.16):
+ //iShift_ = 15+uLog;
+ iShift_ = 21+uLog;
}
else
{
@@ -93,7 +108,6 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
iShift_ = 0;
}
}
-#endif
INLINE fixed xInvMulx (const fixed _a, const s32 _iFact, const s32 _iShift)
{
@@ -112,20 +126,9 @@ INLINE fixed xLoDivx (const fixed _a, const fixed _b)
xInv(_b, iFact, iShift);
return xInvMulx(_a, iFact, iShift);
}
-
+#endif // GPU_UNAI_USE_INT_DIV_MULTINV
///////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; }
-
-template<typename T>
-INLINE T Min3 (const T _a, const T _b, const T _c) { return Min2(Min2(_a,_b),_c); }
-
+// --- END INVERSE APPROXIMATION SECTION ---
///////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE T Max2 (const T _a, const T _b) { return (_a>_b)?_a:_b; }
-template<typename T>
-INLINE T Max3 (const T _a, const T _b, const T _c) { return Max2(Max2(_a,_b),_c); }
-
-///////////////////////////////////////////////////////////////////////////
#endif //FIXED_H