aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/scaler.cpp4
-rw-r--r--common/scaler/scale2x.cpp339
2 files changed, 2 insertions, 341 deletions
diff --git a/common/scaler.cpp b/common/scaler.cpp
index 0ef78768fa..8b53560afc 100644
--- a/common/scaler.cpp
+++ b/common/scaler.cpp
@@ -173,7 +173,7 @@ void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
*/
void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
int width, int height) {
- scale(2, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height);
+ scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
}
/**
@@ -182,7 +182,7 @@ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi
*/
void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
int width, int height) {
- scale(3, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height);
+ scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
}
template<int bitFormat>
diff --git a/common/scaler/scale2x.cpp b/common/scaler/scale2x.cpp
index 7fac295cb5..5ba92c143f 100644
--- a/common/scaler/scale2x.cpp
+++ b/common/scaler/scale2x.cpp
@@ -204,68 +204,7 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*
assert(count >= 16);
assert(count % 8 == 0);
- /* always do the first and last run */
- count -= 2*8;
-
__asm__ __volatile__(
-/* first run */
- /* set the current, current_pre, current_next registers */
- "movq 0(%1), %%mm0\n"
- "movq 0(%1), %%mm7\n"
- "movq 8(%1), %%mm1\n"
- "psllq $56, %%mm0\n"
- "psllq $56, %%mm1\n"
- "psrlq $56, %%mm0\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $8, %%mm2\n"
- "psrlq $8, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqb %%mm6, %%mm2\n"
- "pcmpeqb %%mm6, %%mm4\n"
- "pcmpeqb (%2), %%mm3\n"
- "pcmpeqb (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqb %%mm1, %%mm2\n"
- "pcmpeqb %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpcklbw %%mm4, %%mm2\n"
- "punpckhbw %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
- /* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
-
/* central runs */
"shrl $3, %4\n"
"jz 1f\n"
@@ -332,58 +271,6 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*
"jnz 0b\n"
"1:\n"
-/* final run */
- /* set the current, current_pre, current_next registers */
- "movq (%1), %%mm1\n"
- "movq (%1), %%mm7\n"
- "movq -8(%1), %%mm0\n"
- "psrlq $56, %%mm1\n"
- "psrlq $56, %%mm0\n"
- "psllq $56, %%mm1\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $8, %%mm2\n"
- "psrlq $8, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqb %%mm6, %%mm2\n"
- "pcmpeqb %%mm6, %%mm4\n"
- "pcmpeqb (%2), %%mm3\n"
- "pcmpeqb (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqb %%mm1, %%mm2\n"
- "pcmpeqb %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpcklbw %%mm4, %%mm2\n"
- "punpckhbw %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
:
: "cc"
@@ -395,68 +282,7 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint
assert(count >= 8);
assert(count % 4 == 0);
- /* always do the first and last run */
- count -= 2*4;
-
__asm__ __volatile__(
-/* first run */
- /* set the current, current_pre, current_next registers */
- "movq 0(%1), %%mm0\n"
- "movq 0(%1), %%mm7\n"
- "movq 8(%1), %%mm1\n"
- "psllq $48, %%mm0\n"
- "psllq $48, %%mm1\n"
- "psrlq $48, %%mm0\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $16, %%mm2\n"
- "psrlq $16, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqw %%mm6, %%mm2\n"
- "pcmpeqw %%mm6, %%mm4\n"
- "pcmpeqw (%2), %%mm3\n"
- "pcmpeqw (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqw %%mm1, %%mm2\n"
- "pcmpeqw %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpcklwd %%mm4, %%mm2\n"
- "punpckhwd %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
- /* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
-
/* central runs */
"shrl $2, %4\n"
"jz 1f\n"
@@ -523,58 +349,6 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint
"jnz 0b\n"
"1:\n"
-/* final run */
- /* set the current, current_pre, current_next registers */
- "movq (%1), %%mm1\n"
- "movq (%1), %%mm7\n"
- "movq -8(%1), %%mm0\n"
- "psrlq $48, %%mm1\n"
- "psrlq $48, %%mm0\n"
- "psllq $48, %%mm1\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $16, %%mm2\n"
- "psrlq $16, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqw %%mm6, %%mm2\n"
- "pcmpeqw %%mm6, %%mm4\n"
- "pcmpeqw (%2), %%mm3\n"
- "pcmpeqw (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqw %%mm1, %%mm2\n"
- "pcmpeqw %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpcklwd %%mm4, %%mm2\n"
- "punpckhwd %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
:
: "cc"
@@ -586,68 +360,7 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint
assert(count >= 4);
assert(count % 2 == 0);
- /* always do the first and last run */
- count -= 2*2;
-
__asm__ __volatile__(
-/* first run */
- /* set the current, current_pre, current_next registers */
- "movq 0(%1), %%mm0\n"
- "movq 0(%1), %%mm7\n"
- "movq 8(%1), %%mm1\n"
- "psllq $32, %%mm0\n"
- "psllq $32, %%mm1\n"
- "psrlq $32, %%mm0\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $32, %%mm2\n"
- "psrlq $32, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqd %%mm6, %%mm2\n"
- "pcmpeqd %%mm6, %%mm4\n"
- "pcmpeqd (%2), %%mm3\n"
- "pcmpeqd (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqd %%mm1, %%mm2\n"
- "pcmpeqd %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpckldq %%mm4, %%mm2\n"
- "punpckhdq %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
- /* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
-
/* central runs */
"shrl $1, %4\n"
"jz 1f\n"
@@ -714,58 +427,6 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint
"jnz 0b\n"
"1:\n"
-/* final run */
- /* set the current, current_pre, current_next registers */
- "movq (%1), %%mm1\n"
- "movq (%1), %%mm7\n"
- "movq -8(%1), %%mm0\n"
- "psrlq $32, %%mm1\n"
- "psrlq $32, %%mm0\n"
- "psllq $32, %%mm1\n"
- "movq %%mm7, %%mm2\n"
- "movq %%mm7, %%mm3\n"
- "psllq $32, %%mm2\n"
- "psrlq $32, %%mm3\n"
- "por %%mm2, %%mm0\n"
- "por %%mm3, %%mm1\n"
-
- /* current_upper */
- "movq (%0), %%mm6\n"
-
- /* compute the upper-left pixel for dst on %%mm2 */
- /* compute the upper-right pixel for dst on %%mm4 */
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "movq %%mm0, %%mm3\n"
- "movq %%mm1, %%mm5\n"
- "pcmpeqd %%mm6, %%mm2\n"
- "pcmpeqd %%mm6, %%mm4\n"
- "pcmpeqd (%2), %%mm3\n"
- "pcmpeqd (%2), %%mm5\n"
- "pandn %%mm2, %%mm3\n"
- "pandn %%mm4, %%mm5\n"
- "movq %%mm0, %%mm2\n"
- "movq %%mm1, %%mm4\n"
- "pcmpeqd %%mm1, %%mm2\n"
- "pcmpeqd %%mm0, %%mm4\n"
- "pandn %%mm3, %%mm2\n"
- "pandn %%mm5, %%mm4\n"
- "movq %%mm2, %%mm3\n"
- "movq %%mm4, %%mm5\n"
- "pand %%mm6, %%mm2\n"
- "pand %%mm6, %%mm4\n"
- "pandn %%mm7, %%mm3\n"
- "pandn %%mm7, %%mm5\n"
- "por %%mm3, %%mm2\n"
- "por %%mm5, %%mm4\n"
-
- /* set *dst */
- "movq %%mm2, %%mm3\n"
- "punpckldq %%mm4, %%mm2\n"
- "punpckhdq %%mm4, %%mm3\n"
- "movq %%mm2, (%3)\n"
- "movq %%mm3, 8(%3)\n"
-
: "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
:
: "cc"