diff options
-rw-r--r-- | common/scaler.cpp | 4 | ||||
-rw-r--r-- | common/scaler/scale2x.cpp | 339 |
2 files changed, 2 insertions, 341 deletions
diff --git a/common/scaler.cpp b/common/scaler.cpp index 0ef78768fa..8b53560afc 100644 --- a/common/scaler.cpp +++ b/common/scaler.cpp @@ -173,7 +173,7 @@ void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit */ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - scale(2, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height); + scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height); } /** @@ -182,7 +182,7 @@ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi */ void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { - scale(3, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height); + scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height); } template<int bitFormat> diff --git a/common/scaler/scale2x.cpp b/common/scaler/scale2x.cpp index 7fac295cb5..5ba92c143f 100644 --- a/common/scaler/scale2x.cpp +++ b/common/scaler/scale2x.cpp @@ -204,68 +204,7 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* assert(count >= 16); assert(count % 8 == 0); - /* always do the first and last run */ - count -= 2*8; - __asm__ __volatile__( -/* first run */ - /* set the current, current_pre, current_next registers */ - "movq 0(%1), %%mm0\n" - "movq 0(%1), %%mm7\n" - "movq 8(%1), %%mm1\n" - "psllq $56, %%mm0\n" - "psllq $56, %%mm1\n" - "psrlq $56, %%mm0\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $8, %%mm2\n" - "psrlq $8, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqb %%mm6, %%mm2\n" - "pcmpeqb %%mm6, %%mm4\n" - "pcmpeqb (%2), %%mm3\n" - "pcmpeqb (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqb %%mm1, %%mm2\n" - "pcmpeqb %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpcklbw %%mm4, %%mm2\n" - "punpckhbw %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - - /* next */ - "addl $8, %0\n" - "addl $8, %1\n" - "addl $8, %2\n" - "addl $16, %3\n" - /* central runs */ "shrl $3, %4\n" "jz 1f\n" @@ -332,58 +271,6 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* "jnz 0b\n" "1:\n" -/* final run */ - /* set the current, current_pre, current_next registers */ - "movq (%1), %%mm1\n" - "movq (%1), %%mm7\n" - "movq -8(%1), %%mm0\n" - "psrlq $56, %%mm1\n" - "psrlq $56, %%mm0\n" - "psllq $56, %%mm1\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $8, %%mm2\n" - "psrlq $8, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqb %%mm6, %%mm2\n" - "pcmpeqb %%mm6, %%mm4\n" - "pcmpeqb (%2), %%mm3\n" - "pcmpeqb (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqb %%mm1, %%mm2\n" - "pcmpeqb %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpcklbw %%mm4, %%mm2\n" - "punpckhbw %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) : : "cc" @@ -395,68 +282,7 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint assert(count >= 8); assert(count % 4 == 0); - /* always do the first and last run */ - count -= 2*4; - __asm__ __volatile__( -/* first run */ - /* set the current, current_pre, current_next registers */ - "movq 0(%1), %%mm0\n" - "movq 0(%1), %%mm7\n" - "movq 8(%1), %%mm1\n" - "psllq $48, %%mm0\n" - "psllq $48, %%mm1\n" - "psrlq $48, %%mm0\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $16, %%mm2\n" - "psrlq $16, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqw %%mm6, %%mm2\n" - "pcmpeqw %%mm6, %%mm4\n" - "pcmpeqw (%2), %%mm3\n" - "pcmpeqw (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqw %%mm1, %%mm2\n" - "pcmpeqw %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpcklwd %%mm4, %%mm2\n" - "punpckhwd %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - - /* next */ - "addl $8, %0\n" - "addl $8, %1\n" - "addl $8, %2\n" - "addl $16, %3\n" - /* central runs */ "shrl $2, %4\n" "jz 1f\n" @@ -523,58 +349,6 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint "jnz 0b\n" "1:\n" -/* final run */ - /* set the current, current_pre, current_next registers */ - "movq (%1), %%mm1\n" - "movq (%1), %%mm7\n" - "movq -8(%1), %%mm0\n" - "psrlq $48, %%mm1\n" - "psrlq $48, %%mm0\n" - "psllq $48, %%mm1\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $16, %%mm2\n" - "psrlq $16, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqw %%mm6, %%mm2\n" - "pcmpeqw %%mm6, %%mm4\n" - "pcmpeqw (%2), %%mm3\n" - "pcmpeqw (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqw %%mm1, %%mm2\n" - "pcmpeqw %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpcklwd %%mm4, %%mm2\n" - "punpckhwd %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) : : "cc" @@ -586,68 +360,7 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint assert(count >= 4); assert(count % 2 == 0); - /* always do the first and last run */ - count -= 2*2; - __asm__ __volatile__( -/* first run */ - /* set the current, current_pre, current_next registers */ - "movq 0(%1), %%mm0\n" - "movq 0(%1), %%mm7\n" - "movq 8(%1), %%mm1\n" - "psllq $32, %%mm0\n" - "psllq $32, %%mm1\n" - "psrlq $32, %%mm0\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $32, %%mm2\n" - "psrlq $32, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqd %%mm6, %%mm2\n" - "pcmpeqd %%mm6, %%mm4\n" - "pcmpeqd (%2), %%mm3\n" - "pcmpeqd (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqd %%mm1, %%mm2\n" - "pcmpeqd %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpckldq %%mm4, %%mm2\n" - "punpckhdq %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - - /* next */ - "addl $8, %0\n" - "addl $8, %1\n" - "addl $8, %2\n" - "addl $16, %3\n" - /* central runs */ "shrl $1, %4\n" "jz 1f\n" @@ -714,58 +427,6 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint "jnz 0b\n" "1:\n" -/* final run */ - /* set the current, current_pre, current_next registers */ - "movq (%1), %%mm1\n" - "movq (%1), %%mm7\n" - "movq -8(%1), %%mm0\n" - "psrlq $32, %%mm1\n" - "psrlq $32, %%mm0\n" - "psllq $32, %%mm1\n" - "movq %%mm7, %%mm2\n" - "movq %%mm7, %%mm3\n" - "psllq $32, %%mm2\n" - "psrlq $32, %%mm3\n" - "por %%mm2, %%mm0\n" - "por %%mm3, %%mm1\n" - - /* current_upper */ - "movq (%0), %%mm6\n" - - /* compute the upper-left pixel for dst on %%mm2 */ - /* compute the upper-right pixel for dst on %%mm4 */ - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "movq %%mm0, %%mm3\n" - "movq %%mm1, %%mm5\n" - "pcmpeqd %%mm6, %%mm2\n" - "pcmpeqd %%mm6, %%mm4\n" - "pcmpeqd (%2), %%mm3\n" - "pcmpeqd (%2), %%mm5\n" - "pandn %%mm2, %%mm3\n" - "pandn %%mm4, %%mm5\n" - "movq %%mm0, %%mm2\n" - "movq %%mm1, %%mm4\n" - "pcmpeqd %%mm1, %%mm2\n" - "pcmpeqd %%mm0, %%mm4\n" - "pandn %%mm3, %%mm2\n" - "pandn %%mm5, %%mm4\n" - "movq %%mm2, %%mm3\n" - "movq %%mm4, %%mm5\n" - "pand %%mm6, %%mm2\n" - "pand %%mm6, %%mm4\n" - "pandn %%mm7, %%mm3\n" - "pandn %%mm7, %%mm5\n" - "por %%mm3, %%mm2\n" - "por %%mm5, %%mm4\n" - - /* set *dst */ - "movq %%mm2, %%mm3\n" - "punpckldq %%mm4, %%mm2\n" - "punpckhdq %%mm4, %%mm3\n" - "movq %%mm2, (%3)\n" - "movq %%mm3, 8(%3)\n" - : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) : : "cc" |