Removed my Altivec version of the hqx blitters, since it (a) didn't work with newer GCC versions, (b) speed was improved on some systems, but actually slower on others, and (c) even on my old 400 Mhz G4 hqx is fast enough w/o it

svn-id: r21961
author: Max Horn 2006-04-17 08:38:26 +0000
committer: Max Horn 2006-04-17 08:38:26 +0000
commit: de250812e5e2a15fc3a1dddc9faae09f1c2df6d6 (patch)
tree: a612e6479a9a3e78e43b68f88d23cbf9f725dbdb
parent: 092893a16d021a2391a3adfbb07db5f8bf9ee073 (diff)
download: scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.tar.gz
scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.tar.bz2
scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.zip
6 files changed, 1 insertions, 329 deletions
diff --git a/Makefile b/Makefile
index b33b9b0685..6bd6d7552b 100644
--- a/Makefile
+++ b/Makefile
@@ -91,8 +91,7 @@ bundle: scummvm-static
 OSXOPT=/sw
 # Special target to create a static linked binary for Mac OS X.
 # We use -force_cpusubtype_ALL to ensure the binary runs on every
-# PowerPC machine, even though we use Altivec code (we dynamically
-# check whether Altivec is available before trying to run it).
+# PowerPC machine.
 scummvm-static: $(OBJS)
 	$(CXX) $(LDFLAGS) -force_cpusubtype_ALL -o scummvm-static $(OBJS) \
 		`sdl-config --static-libs` \
diff --git a/configure b/configure
index cdb1548ff0..93c47ed234 100755
--- a/configure
+++ b/configure
@@ -62,7 +62,6 @@ _build_cine=no
 _need_memalign=no
 _build_plugins=no
 _nasm=auto
-_altivec=auto
 _build_hq_scalers=yes
 _build_scalers=yes
 # more defaults
@@ -400,7 +399,6 @@ for ac_option in $@; do
       --enable-plugins)         _build_plugins=yes ;;
       --enable-mt32emu)         _mt32emu=yes    ;;
       --disable-mt32emu)        _mt32emu=no     ;;
-      --disable-altivec)        _altivec=no     ;;
       --with-fluidsynth-prefix=*)
         arg=`echo $ac_option | cut -d '=' -f 2`
         FLUIDSYNTH_CFLAGS="-I$arg/include"
@@ -1233,30 +1231,6 @@ test -z "$_bindir" && _bindir="$_prefix/bin"
 test -z "$_mandir" && _mandir="$_prefix/man"
 
 #
-# Check for Altivec, if on MacOS X
-#
-case $_host_os in
-	darwin*)
-		echocheck "Altivec"
-		if test "$_altivec" = auto ; then
-			_altivec=no
-			cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
-			cc_check $LDFLAGS $CXXFLAGS -maltivec && _altivec=yes
-		fi
-		if test "$_altivec" = yes ; then
-				DEFINES="$DEFINES -DHAS_ALTIVEC"
-				CXXFLAGS="$CXXFLAGS -faltivec"
-				CFLAGS="$CFLAGS -faltivec"
-		fi  
-		echo "$_altivec"
-		rm -f $TMPC $TMPO$EXEEXT
-	;;
-esac
-
-
-#
 # Check which engines ("frontends") are to be built
 #
 echo
diff --git a/graphics/scaler/hq2x.cpp b/graphics/scaler/hq2x.cpp
index 567a2fcf53..727ae207d5 100644
--- a/graphics/scaler/hq2x.cpp
+++ b/graphics/scaler/hq2x.cpp
@@ -42,35 +42,6 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
 
 #else
 
-#ifdef HAS_ALTIVEC
-
-#ifdef __amigaos4__
-#include <proto/exec.h>
-#include <altivec.h>
-static bool isAltiVecAvailable() {
-	uint32 vecUnit;
-	IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
-	if (vecUnit == VECTORTYPE_NONE)
-		return false;
-	else
-		return true;
-}
-#else
-
-#include <sys/sysctl.h>
-
-static bool isAltiVecAvailable()  {
-	int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-	int hasVectorUnit = 0;
-	size_t length = sizeof(hasVectorUnit);
-	int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
-	if ( 0 == error )
-		return hasVectorUnit != 0;
-	return false;
-}
-#endif
-#endif
-
 #define PIXEL00_0	*(q) = w5;
 #define PIXEL00_10	*(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
 #define PIXEL00_11	*(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
@@ -139,33 +110,7 @@ void HQ2x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
 #undef bitFormat
 
 
-#ifdef HAS_ALTIVEC
-	#define USE_ALTIVEC	1
-
-	#define bitFormat 565
-	void HQ2x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq2x.h"
-	}
-	#undef bitFormat
-
-	#define bitFormat 555
-	void HQ2x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq2x.h"
-	}
-	#undef bitFormat
-#endif
-
 void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef HAS_ALTIVEC
-	if (isAltiVecAvailable()) {
-		if (gBitFormat == 565)
-			HQ2x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		else
-			HQ2x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		return;
-	}
-#endif
-
 	if (gBitFormat == 565)
 		HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 	else
diff --git a/graphics/scaler/hq2x.h b/graphics/scaler/hq2x.h
index a59f108048..5a541b660d 100644
--- a/graphics/scaler/hq2x.h
+++ b/graphics/scaler/hq2x.h
@@ -46,24 +46,6 @@
 	//	 | w7 | w8 | w9 |
 	//	 +----+----+----+
 
-#ifdef USE_ALTIVEC
-	// The YUV threshold.
-	static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
-
-	// Bit pattern mask.
-	static const vector signed int vPatternMask1 = (vector signed int)(0x01, 0x02, 0x04, 0x08);
-	static const vector signed int vPatternMask2 = (vector signed int)(0x10, 0x20, 0x40, 0x80);
-
-	// Permutation masks for the incremental vector loading (see below for more information).
-	static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7,  8,9,10,11,  20,21,22,23,  16,17,18,19);
-	static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27,  8,9,10,11,  12,13,14,15,  28,29,30,31);
-
-	// The YUV vectors.
-	vector signed char vecYUV5555;
-	vector signed char vecYUV1234;
-	vector signed char vecYUV6789;
-#endif
-
 	while (height--) {
 		w1 = *(p - 1 - nextlineSrc);
 		w4 = *(p - 1);
@@ -73,15 +55,6 @@
 		w5 = *(p);
 		w8 = *(p + nextlineSrc);
 
-#ifdef USE_ALTIVEC
-		// Load inital values of vecYUV1234 / vecYUV6789
-		const int arr1234[4] = {0, YUV(1), YUV(2), 0};
-		const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
-
-		vecYUV1234 = *(const vector signed char *)arr1234;
-		vecYUV6789 = *(const vector signed char *)arr6789;
-#endif
-
 		int tmpWidth = width;
 		while (tmpWidth--) {
 			p++;
@@ -91,74 +64,6 @@
 			w9 = *(p + nextlineSrc);
 
 			int pattern = 0;
-
-#ifdef USE_ALTIVEC
-			/*
-			Consider this peephole into the image buffer:
-			+----+----+----+----+
-			|    |    |    |    |
-			| w00| w01| w02| w03|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w10| w11| w12| w13|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w20| w21| w22| w23|
-			+----+----+----+----+
-
-			In the previous loop iteration, w11 was the center point, and our
-			vectors contain the following data from the previous iteration:
-			vecYUV5555 = { w11, w11, w11, w11 }
-			vecYUV1234 = { w00, w01, w02, w10 }
-			vecYUV6789 = { w12, w20, w21, w22 }
-
-			Now we have the new center point w12, and we would like to have
-			the following values in our vectors:
-			vecYUV5555 = { w12, w12, w12, w12 }
-			vecYUV1234 = { w01, w02, w03, w11 }
-			vecYUV6789 = { w13, w21, w22, w23 }
-
-			To this end we load a single new vector:
-			vTmp = { w11, w03, w13, w23 }
-
-			We then can compute all the new vector values using permutations only:
-			vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
-			vecYUV1234 = { vecYUV1234[1], vecYUV1234[2],  vTmp[1],  vTmp[0] }
-			vecYUV6789 = {  vTmp[2], vecYUV6789[2], vecYUV6789[3],  vTmp[3] }
-
-			Beautiful, isn't it? :-)
-			*/
-
-			// Load the new values into a temporary vector (see above for an explanation)
-			const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
-			vector signed char vTmp = *(const vector signed char *)tmpArr;
-
-			// Next update the data vectors
-			vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
-			vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
-			vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
-
-			// Compute the absolute difference between the center point's YUV and the outer points
-			const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
-			const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
-
-			// Compare the difference to the threshold (byte-wise)
-			const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
-			const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
-
-			// Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
-			// Then and in the patter masks. The idea is that for 0 components, we get 0,
-			// while for the other components we get exactly the mask value.
-			const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
-			const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
-
-			// Now sum up the components of all vectors. Since our pattern mask values
-			// are all "orthogonal", this is effectively the same as ORing them all
-			// together. In the end, the rightmost word of vSum contains the 'pattern'
-			vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
-			vSum = vec_sums(vPattern2, vSum);
-			pattern = ((int *)&vSum)[3];
-#else
 			const int yuv5 = YUV(5);
 			if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
 			if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
@@ -168,7 +73,6 @@
 			if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
 			if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
 			if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
-#endif
 
 			switch (pattern) {
 			case 0:
diff --git a/graphics/scaler/hq3x.cpp b/graphics/scaler/hq3x.cpp
index 64bd17834d..2d0f50bd76 100644
--- a/graphics/scaler/hq3x.cpp
+++ b/graphics/scaler/hq3x.cpp
@@ -43,34 +43,6 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
 
 #else
 
-#ifdef HAS_ALTIVEC
-
-#ifdef __amigaos4__
-#include <proto/exec.h>
-static bool isAltiVecAvailable() {
-	uint32 vecUnit;
-	IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
-	if (vecUnit == VECTORTYPE_NONE)
-		return false;
-	else
-		return true;
-}
-#else
-
-#include <sys/sysctl.h>
-
-static bool isAltiVecAvailable()  {
-	int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-	int hasVectorUnit = 0;
-	size_t length = sizeof(hasVectorUnit);
-	int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
-	if ( 0 == error )
-		return hasVectorUnit != 0;
-	return false;
-}
-#endif
-#endif
-
 #define PIXEL00_1M  *(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
 #define PIXEL00_1U  *(q) = interpolate16_2<bitFormat,3,1>(w5, w2);
 #define PIXEL00_1L  *(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
@@ -141,33 +113,7 @@ void HQ3x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
 #undef bitFormat
 
 
-#ifdef HAS_ALTIVEC
-	#define USE_ALTIVEC	1
-
-	#define bitFormat 565
-	void HQ3x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq3x.h"
-	}
-	#undef bitFormat
-
-	#define bitFormat 555
-	void HQ3x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq3x.h"
-	}
-	#undef bitFormat
-#endif
-
 void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef HAS_ALTIVEC
-	if (isAltiVecAvailable()) {
-		if (gBitFormat == 565)
-			HQ3x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		else
-			HQ3x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		return;
-	}
-#endif
-
 	if (gBitFormat == 565)
 		HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 	else
diff --git a/graphics/scaler/hq3x.h b/graphics/scaler/hq3x.h
index 7fda8d5105..f7c9fda469 100644
--- a/graphics/scaler/hq3x.h
+++ b/graphics/scaler/hq3x.h
@@ -47,24 +47,6 @@
 	//	 | w7 | w8 | w9 |
 	//	 +----+----+----+
 
-#ifdef USE_ALTIVEC
-	// The YUV threshold.
-	static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
-
-	// Bit pattern mask.
-	static const vector signed int vPatternMask1 = (vector signed int)(0x01,0x02,0x04,0x08);
-	static const vector signed int vPatternMask2 = (vector signed int)(0x10,0x20,0x40,0x80);
-
-	// Permutation masks for the incremental vector loading (see below for more information).
-	static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7,  8,9,10,11,  20,21,22,23,  16,17,18,19);
-	static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27,  8,9,10,11,  12,13,14,15,  28,29,30,31);
-
-	// The YUV vectors.
-	vector signed char vecYUV5555;
-	vector signed char vecYUV1234;
-	vector signed char vecYUV6789;
-#endif
-
 	while (height--) {
 		w1 = *(p - 1 - nextlineSrc);
 		w4 = *(p - 1);
@@ -74,15 +56,6 @@
 		w5 = *(p);
 		w8 = *(p + nextlineSrc);
 
-#ifdef USE_ALTIVEC
-		// Load inital values of vecYUV1234 / vecYUV6789
-		const int arr1234[4] = {0, YUV(1), YUV(2), 0};
-		const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
-
-		vecYUV1234 = *(const vector signed char *)arr1234;
-		vecYUV6789 = *(const vector signed char *)arr6789;
-#endif
-
 		int tmpWidth = width;
 		while (tmpWidth--) {
 			p++;
@@ -92,74 +65,6 @@
 			w9 = *(p + nextlineSrc);
 
 			int pattern = 0;
-
-#ifdef USE_ALTIVEC
-			/*
-			Consider this peephole into the image buffer:
-			+----+----+----+----+
-			|    |    |    |    |
-			| w00| w01| w02| w03|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w10| w11| w12| w13|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w20| w21| w22| w23|
-			+----+----+----+----+
-
-			In the previous loop iteration, w11 was the center point, and our
-			vectors contain the following data from the previous iteration:
-			vecYUV5555 = { w11, w11, w11, w11 }
-			vecYUV1234 = { w00, w01, w02, w10 }
-			vecYUV6789 = { w12, w20, w21, w22 }
-
-			Now we have the new center point w12, and we would like to have
-			the following values in our vectors:
-			vecYUV5555 = { w12, w12, w12, w12 }
-			vecYUV1234 = { w01, w02, w03, w11 }
-			vecYUV6789 = { w13, w21, w22, w23 }
-
-			To this end we load a single new vector:
-			vTmp = { w11, w03, w13, w23 }
-
-			We then can compute all the new vector values using permutations only:
-			vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
-			vecYUV1234 = { vecYUV1234[1], vecYUV1234[2],  vTmp[1],  vTmp[0] }
-			vecYUV6789 = {  vTmp[2], vecYUV6789[2], vecYUV6789[3],  vTmp[3] }
-
-			Beautiful, isn't it? :-)
-			*/
-
-			// Load the new values into a temporary vector (see above for an explanation)
-			const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
-			vector signed char vTmp = *(const vector signed char *)tmpArr;
-
-			// Next update the data vectors
-			vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
-			vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
-			vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
-
-			// Compute the absolute difference between the center point's YUV and the outer points
-			const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
-			const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
-
-			// Compare the difference to the threshold (byte-wise)
-			const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
-			const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
-
-			// Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
-			// Then and in the patter masks. The idea is that for 0 components, we get 0,
-			// while for the other components we get exactly the mask value.
-			const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
-			const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
-
-			// Now sum up the components of all vectors. Since our pattern mask values
-			// are all "orthogonal", this is effectively the same as ORing them all
-			// together. In the end, the rightmost word of vSum contains the 'pattern'
-			vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
-			vSum = vec_sums(vPattern2, vSum);
-			pattern = ((int *)&vSum)[3];
-#else
 			const int yuv5 = YUV(5);
 			if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
 			if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
@@ -169,7 +74,6 @@
 			if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
 			if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
 			if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
-#endif
 
 			switch (pattern) {
 			case 0:
author	Max Horn	2006-04-17 08:38:26 +0000
committer	Max Horn	2006-04-17 08:38:26 +0000
commit	de250812e5e2a15fc3a1dddc9faae09f1c2df6d6 (patch)
tree	a612e6479a9a3e78e43b68f88d23cbf9f725dbdb
parent	092893a16d021a2391a3adfbb07db5f8bf9ee073 (diff)
download	scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.tar.gz scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.tar.bz2 scummvm-rg350-de250812e5e2a15fc3a1dddc9faae09f1c2df6d6.zip