diff options
Diffstat (limited to 'graphics')
-rw-r--r-- | graphics/scaler/hq2x.cpp | 55 | ||||
-rw-r--r-- | graphics/scaler/hq2x.h | 96 | ||||
-rw-r--r-- | graphics/scaler/hq3x.cpp | 54 | ||||
-rw-r--r-- | graphics/scaler/hq3x.h | 96 |
4 files changed, 0 insertions, 301 deletions
diff --git a/graphics/scaler/hq2x.cpp b/graphics/scaler/hq2x.cpp index 567a2fcf53..727ae207d5 100644 --- a/graphics/scaler/hq2x.cpp +++ b/graphics/scaler/hq2x.cpp @@ -42,35 +42,6 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, #else -#ifdef HAS_ALTIVEC - -#ifdef __amigaos4__ -#include <proto/exec.h> -#include <altivec.h> -static bool isAltiVecAvailable() { - uint32 vecUnit; - IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE); - if (vecUnit == VECTORTYPE_NONE) - return false; - else - return true; -} -#else - -#include <sys/sysctl.h> - -static bool isAltiVecAvailable() { - int selectors[2] = { CTL_HW, HW_VECTORUNIT }; - int hasVectorUnit = 0; - size_t length = sizeof(hasVectorUnit); - int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); - if ( 0 == error ) - return hasVectorUnit != 0; - return false; -} -#endif -#endif - #define PIXEL00_0 *(q) = w5; #define PIXEL00_10 *(q) = interpolate16_2<bitFormat,3,1>(w5, w1); #define PIXEL00_11 *(q) = interpolate16_2<bitFormat,3,1>(w5, w4); @@ -139,33 +110,7 @@ void HQ2x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit #undef bitFormat -#ifdef HAS_ALTIVEC - #define USE_ALTIVEC 1 - - #define bitFormat 565 - void HQ2x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - #include "graphics/scaler/hq2x.h" - } - #undef bitFormat - - #define bitFormat 555 - void HQ2x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - #include "graphics/scaler/hq2x.h" - } - #undef bitFormat -#endif - void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { -#ifdef HAS_ALTIVEC - if (isAltiVecAvailable()) { - if (gBitFormat == 565) - HQ2x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); - else - HQ2x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); - return; - } -#endif - if (gBitFormat == 565) HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height); else diff --git a/graphics/scaler/hq2x.h b/graphics/scaler/hq2x.h index a59f108048..5a541b660d 100644 --- a/graphics/scaler/hq2x.h +++ b/graphics/scaler/hq2x.h @@ -46,24 +46,6 @@ // | w7 | w8 | w9 | // +----+----+----+ -#ifdef USE_ALTIVEC - // The YUV threshold. - static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706); - - // Bit pattern mask. - static const vector signed int vPatternMask1 = (vector signed int)(0x01, 0x02, 0x04, 0x08); - static const vector signed int vPatternMask2 = (vector signed int)(0x10, 0x20, 0x40, 0x80); - - // Permutation masks for the incremental vector loading (see below for more information). - static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19); - static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31); - - // The YUV vectors. - vector signed char vecYUV5555; - vector signed char vecYUV1234; - vector signed char vecYUV6789; -#endif - while (height--) { w1 = *(p - 1 - nextlineSrc); w4 = *(p - 1); @@ -73,15 +55,6 @@ w5 = *(p); w8 = *(p + nextlineSrc); -#ifdef USE_ALTIVEC - // Load inital values of vecYUV1234 / vecYUV6789 - const int arr1234[4] = {0, YUV(1), YUV(2), 0}; - const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)}; - - vecYUV1234 = *(const vector signed char *)arr1234; - vecYUV6789 = *(const vector signed char *)arr6789; -#endif - int tmpWidth = width; while (tmpWidth--) { p++; @@ -91,74 +64,6 @@ w9 = *(p + nextlineSrc); int pattern = 0; - -#ifdef USE_ALTIVEC - /* - Consider this peephole into the image buffer: - +----+----+----+----+ - | | | | | - | w00| w01| w02| w03| - +----+----+----+----+ - | | | | | - | w10| w11| w12| w13| - +----+----+----+----+ - | | | | | - | w20| w21| w22| w23| - +----+----+----+----+ - - In the previous loop iteration, w11 was the center point, and our - vectors contain the following data from the previous iteration: - vecYUV5555 = { w11, w11, w11, w11 } - vecYUV1234 = { w00, w01, w02, w10 } - vecYUV6789 = { w12, w20, w21, w22 } - - Now we have the new center point w12, and we would like to have - the following values in our vectors: - vecYUV5555 = { w12, w12, w12, w12 } - vecYUV1234 = { w01, w02, w03, w11 } - vecYUV6789 = { w13, w21, w22, w23 } - - To this end we load a single new vector: - vTmp = { w11, w03, w13, w23 } - - We then can compute all the new vector values using permutations only: - vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] } - vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] } - vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] } - - Beautiful, isn't it? :-) - */ - - // Load the new values into a temporary vector (see above for an explanation) - const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)}; - vector signed char vTmp = *(const vector signed char *)tmpArr; - - // Next update the data vectors - vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0); - vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234); - vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789); - - // Compute the absolute difference between the center point's YUV and the outer points - const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234)); - const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789)); - - // Compare the difference to the threshold (byte-wise) - const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold); - const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold); - - // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0. - // Then and in the patter masks. The idea is that for 0 components, we get 0, - // while for the other components we get exactly the mask value. - const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1); - const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2); - - // Now sum up the components of all vectors. Since our pattern mask values - // are all "orthogonal", this is effectively the same as ORing them all - // together. In the end, the rightmost word of vSum contains the 'pattern' - vector signed int vSum = vec_sums(vPattern1, (vector signed int)0); - vSum = vec_sums(vPattern2, vSum); - pattern = ((int *)&vSum)[3]; -#else const int yuv5 = YUV(5); if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; @@ -168,7 +73,6 @@ if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; -#endif switch (pattern) { case 0: diff --git a/graphics/scaler/hq3x.cpp b/graphics/scaler/hq3x.cpp index 64bd17834d..2d0f50bd76 100644 --- a/graphics/scaler/hq3x.cpp +++ b/graphics/scaler/hq3x.cpp @@ -43,34 +43,6 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, #else -#ifdef HAS_ALTIVEC - -#ifdef __amigaos4__ -#include <proto/exec.h> -static bool isAltiVecAvailable() { - uint32 vecUnit; - IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE); - if (vecUnit == VECTORTYPE_NONE) - return false; - else - return true; -} -#else - -#include <sys/sysctl.h> - -static bool isAltiVecAvailable() { - int selectors[2] = { CTL_HW, HW_VECTORUNIT }; - int hasVectorUnit = 0; - size_t length = sizeof(hasVectorUnit); - int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); - if ( 0 == error ) - return hasVectorUnit != 0; - return false; -} -#endif -#endif - #define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w5, w1); #define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w5, w2); #define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w5, w4); @@ -141,33 +113,7 @@ void HQ3x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit #undef bitFormat -#ifdef HAS_ALTIVEC - #define USE_ALTIVEC 1 - - #define bitFormat 565 - void HQ3x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - #include "graphics/scaler/hq3x.h" - } - #undef bitFormat - - #define bitFormat 555 - void HQ3x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - #include "graphics/scaler/hq3x.h" - } - #undef bitFormat -#endif - void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { -#ifdef HAS_ALTIVEC - if (isAltiVecAvailable()) { - if (gBitFormat == 565) - HQ3x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); - else - HQ3x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); - return; - } -#endif - if (gBitFormat == 565) HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height); else diff --git a/graphics/scaler/hq3x.h b/graphics/scaler/hq3x.h index 7fda8d5105..f7c9fda469 100644 --- a/graphics/scaler/hq3x.h +++ b/graphics/scaler/hq3x.h @@ -47,24 +47,6 @@ // | w7 | w8 | w9 | // +----+----+----+ -#ifdef USE_ALTIVEC - // The YUV threshold. - static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706); - - // Bit pattern mask. - static const vector signed int vPatternMask1 = (vector signed int)(0x01,0x02,0x04,0x08); - static const vector signed int vPatternMask2 = (vector signed int)(0x10,0x20,0x40,0x80); - - // Permutation masks for the incremental vector loading (see below for more information). - static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19); - static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31); - - // The YUV vectors. - vector signed char vecYUV5555; - vector signed char vecYUV1234; - vector signed char vecYUV6789; -#endif - while (height--) { w1 = *(p - 1 - nextlineSrc); w4 = *(p - 1); @@ -74,15 +56,6 @@ w5 = *(p); w8 = *(p + nextlineSrc); -#ifdef USE_ALTIVEC - // Load inital values of vecYUV1234 / vecYUV6789 - const int arr1234[4] = {0, YUV(1), YUV(2), 0}; - const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)}; - - vecYUV1234 = *(const vector signed char *)arr1234; - vecYUV6789 = *(const vector signed char *)arr6789; -#endif - int tmpWidth = width; while (tmpWidth--) { p++; @@ -92,74 +65,6 @@ w9 = *(p + nextlineSrc); int pattern = 0; - -#ifdef USE_ALTIVEC - /* - Consider this peephole into the image buffer: - +----+----+----+----+ - | | | | | - | w00| w01| w02| w03| - +----+----+----+----+ - | | | | | - | w10| w11| w12| w13| - +----+----+----+----+ - | | | | | - | w20| w21| w22| w23| - +----+----+----+----+ - - In the previous loop iteration, w11 was the center point, and our - vectors contain the following data from the previous iteration: - vecYUV5555 = { w11, w11, w11, w11 } - vecYUV1234 = { w00, w01, w02, w10 } - vecYUV6789 = { w12, w20, w21, w22 } - - Now we have the new center point w12, and we would like to have - the following values in our vectors: - vecYUV5555 = { w12, w12, w12, w12 } - vecYUV1234 = { w01, w02, w03, w11 } - vecYUV6789 = { w13, w21, w22, w23 } - - To this end we load a single new vector: - vTmp = { w11, w03, w13, w23 } - - We then can compute all the new vector values using permutations only: - vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] } - vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] } - vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] } - - Beautiful, isn't it? :-) - */ - - // Load the new values into a temporary vector (see above for an explanation) - const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)}; - vector signed char vTmp = *(const vector signed char *)tmpArr; - - // Next update the data vectors - vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0); - vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234); - vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789); - - // Compute the absolute difference between the center point's YUV and the outer points - const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234)); - const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789)); - - // Compare the difference to the threshold (byte-wise) - const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold); - const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold); - - // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0. - // Then and in the patter masks. The idea is that for 0 components, we get 0, - // while for the other components we get exactly the mask value. - const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1); - const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2); - - // Now sum up the components of all vectors. Since our pattern mask values - // are all "orthogonal", this is effectively the same as ORing them all - // together. In the end, the rightmost word of vSum contains the 'pattern' - vector signed int vSum = vec_sums(vPattern1, (vector signed int)0); - vSum = vec_sums(vPattern2, vSum); - pattern = ((int *)&vSum)[3]; -#else const int yuv5 = YUV(5); if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; @@ -169,7 +74,6 @@ if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; -#endif switch (pattern) { case 0: |