diff options
| -rw-r--r-- | common/scaler.cpp | 4 | ||||
| -rw-r--r-- | common/scaler/scale2x.cpp | 339 | 
2 files changed, 2 insertions, 341 deletions
| diff --git a/common/scaler.cpp b/common/scaler.cpp index 0ef78768fa..8b53560afc 100644 --- a/common/scaler.cpp +++ b/common/scaler.cpp @@ -173,7 +173,7 @@ void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit   */  void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,  							 int width, int height) { -	scale(2, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height); +	scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);  }  /** @@ -182,7 +182,7 @@ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi   */  void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,  							 int width, int height) { -	scale(3, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height); +	scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);  }  template<int bitFormat> diff --git a/common/scaler/scale2x.cpp b/common/scaler/scale2x.cpp index 7fac295cb5..5ba92c143f 100644 --- a/common/scaler/scale2x.cpp +++ b/common/scaler/scale2x.cpp @@ -204,68 +204,7 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*  	assert(count >= 16);  	assert(count % 8 == 0); -	/* always do the first and last run */ -	count -= 2*8; -  	__asm__ __volatile__( -/* first run */ -		/* set the current, current_pre, current_next registers */ -		"movq 0(%1), %%mm0\n" -		"movq 0(%1), %%mm7\n" -		"movq 8(%1), %%mm1\n" -		"psllq $56, %%mm0\n" -		"psllq $56, %%mm1\n" -		"psrlq $56, %%mm0\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $8, %%mm2\n" -		"psrlq $8, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqb %%mm6, %%mm2\n" -		"pcmpeqb %%mm6, %%mm4\n" -		"pcmpeqb (%2), %%mm3\n" -		"pcmpeqb (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqb %%mm1, %%mm2\n" -		"pcmpeqb %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpcklbw %%mm4, %%mm2\n" -		"punpckhbw %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" - -		/* next */ -		"addl $8, %0\n" -		"addl $8, %1\n" -		"addl $8, %2\n" -		"addl $16, %3\n" -  /* central runs */  		"shrl $3, %4\n"  		"jz 1f\n" @@ -332,58 +271,6 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*  		"jnz 0b\n"  		"1:\n" -/* final run */ -		/* set the current, current_pre, current_next registers */ -		"movq (%1), %%mm1\n" -		"movq (%1), %%mm7\n" -		"movq -8(%1), %%mm0\n" -		"psrlq $56, %%mm1\n" -		"psrlq $56, %%mm0\n" -		"psllq $56, %%mm1\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $8, %%mm2\n" -		"psrlq $8, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqb %%mm6, %%mm2\n" -		"pcmpeqb %%mm6, %%mm4\n" -		"pcmpeqb (%2), %%mm3\n" -		"pcmpeqb (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqb %%mm1, %%mm2\n" -		"pcmpeqb %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpcklbw %%mm4, %%mm2\n" -		"punpckhbw %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" -  		: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)  		:  		: "cc" @@ -395,68 +282,7 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint  	assert(count >= 8);  	assert(count % 4 == 0); -	/* always do the first and last run */ -	count -= 2*4; -  	__asm__ __volatile__( -/* first run */ -		/* set the current, current_pre, current_next registers */ -		"movq 0(%1), %%mm0\n" -		"movq 0(%1), %%mm7\n" -		"movq 8(%1), %%mm1\n" -		"psllq $48, %%mm0\n" -		"psllq $48, %%mm1\n" -		"psrlq $48, %%mm0\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $16, %%mm2\n" -		"psrlq $16, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqw %%mm6, %%mm2\n" -		"pcmpeqw %%mm6, %%mm4\n" -		"pcmpeqw (%2), %%mm3\n" -		"pcmpeqw (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqw %%mm1, %%mm2\n" -		"pcmpeqw %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpcklwd %%mm4, %%mm2\n" -		"punpckhwd %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" - -		/* next */ -		"addl $8, %0\n" -		"addl $8, %1\n" -		"addl $8, %2\n" -		"addl $16, %3\n" -  /* central runs */  		"shrl $2, %4\n"  		"jz 1f\n" @@ -523,58 +349,6 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint  		"jnz 0b\n"  		"1:\n" -/* final run */ -		/* set the current, current_pre, current_next registers */ -		"movq (%1), %%mm1\n" -		"movq (%1), %%mm7\n" -		"movq -8(%1), %%mm0\n" -		"psrlq $48, %%mm1\n" -		"psrlq $48, %%mm0\n" -		"psllq $48, %%mm1\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $16, %%mm2\n" -		"psrlq $16, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqw %%mm6, %%mm2\n" -		"pcmpeqw %%mm6, %%mm4\n" -		"pcmpeqw (%2), %%mm3\n" -		"pcmpeqw (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqw %%mm1, %%mm2\n" -		"pcmpeqw %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpcklwd %%mm4, %%mm2\n" -		"punpckhwd %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" -  		: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)  		:  		: "cc" @@ -586,68 +360,7 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint  	assert(count >= 4);  	assert(count % 2 == 0); -	/* always do the first and last run */ -	count -= 2*2; -  	__asm__ __volatile__( -/* first run */ -		/* set the current, current_pre, current_next registers */ -		"movq 0(%1), %%mm0\n" -		"movq 0(%1), %%mm7\n" -		"movq 8(%1), %%mm1\n" -		"psllq $32, %%mm0\n" -		"psllq $32, %%mm1\n" -		"psrlq $32, %%mm0\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $32, %%mm2\n" -		"psrlq $32, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqd %%mm6, %%mm2\n" -		"pcmpeqd %%mm6, %%mm4\n" -		"pcmpeqd (%2), %%mm3\n" -		"pcmpeqd (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqd %%mm1, %%mm2\n" -		"pcmpeqd %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpckldq %%mm4, %%mm2\n" -		"punpckhdq %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" - -		/* next */ -		"addl $8, %0\n" -		"addl $8, %1\n" -		"addl $8, %2\n" -		"addl $16, %3\n" -  /* central runs */  		"shrl $1, %4\n"  		"jz 1f\n" @@ -714,58 +427,6 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint  		"jnz 0b\n"  		"1:\n" -/* final run */ -		/* set the current, current_pre, current_next registers */ -		"movq (%1), %%mm1\n" -		"movq (%1), %%mm7\n" -		"movq -8(%1), %%mm0\n" -		"psrlq $32, %%mm1\n" -		"psrlq $32, %%mm0\n" -		"psllq $32, %%mm1\n" -		"movq %%mm7, %%mm2\n" -		"movq %%mm7, %%mm3\n" -		"psllq $32, %%mm2\n" -		"psrlq $32, %%mm3\n" -		"por %%mm2, %%mm0\n" -		"por %%mm3, %%mm1\n" - -		/* current_upper */ -		"movq (%0), %%mm6\n" - -		/* compute the upper-left pixel for dst on %%mm2 */ -		/* compute the upper-right pixel for dst on %%mm4 */ -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"movq %%mm0, %%mm3\n" -		"movq %%mm1, %%mm5\n" -		"pcmpeqd %%mm6, %%mm2\n" -		"pcmpeqd %%mm6, %%mm4\n" -		"pcmpeqd (%2), %%mm3\n" -		"pcmpeqd (%2), %%mm5\n" -		"pandn %%mm2, %%mm3\n" -		"pandn %%mm4, %%mm5\n" -		"movq %%mm0, %%mm2\n" -		"movq %%mm1, %%mm4\n" -		"pcmpeqd %%mm1, %%mm2\n" -		"pcmpeqd %%mm0, %%mm4\n" -		"pandn %%mm3, %%mm2\n" -		"pandn %%mm5, %%mm4\n" -		"movq %%mm2, %%mm3\n" -		"movq %%mm4, %%mm5\n" -		"pand %%mm6, %%mm2\n" -		"pand %%mm6, %%mm4\n" -		"pandn %%mm7, %%mm3\n" -		"pandn %%mm7, %%mm5\n" -		"por %%mm3, %%mm2\n" -		"por %%mm5, %%mm4\n" - -		/* set *dst */ -		"movq %%mm2, %%mm3\n" -		"punpckldq %%mm4, %%mm2\n" -		"punpckhdq %%mm4, %%mm3\n" -		"movq %%mm2, (%3)\n" -		"movq %%mm3, 8(%3)\n" -  		: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)  		:  		: "cc" | 
