aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/module.mk5
-rw-r--r--common/scaler.cpp94
-rw-r--r--common/scaler/scale2x.cpp927
-rw-r--r--common/scaler/scale2x.h52
-rw-r--r--common/scaler/scale3x.cpp386
-rw-r--r--common/scaler/scale3x.h33
-rw-r--r--common/scaler/scalebit.cpp391
-rw-r--r--common/scaler/scalebit.h43
8 files changed, 1839 insertions, 92 deletions
diff --git a/common/module.mk b/common/module.mk
index 6d9d8a8079..60f4d22bf6 100644
--- a/common/module.mk
+++ b/common/module.mk
@@ -14,7 +14,10 @@ MODULE_OBJS := \
common/scaler/2xsai.o \
common/scaler/aspect.o \
common/scaler/hq2x.o \
- common/scaler/hq3x.o
+ common/scaler/hq3x.o \
+ common/scaler/scale2x.o \
+ common/scaler/scale3x.o \
+ common/scaler/scalebit.o
ifdef HAVE_NASM
MODULE_OBJS += \
diff --git a/common/scaler.cpp b/common/scaler.cpp
index 7557402bfa..0ef78768fa 100644
--- a/common/scaler.cpp
+++ b/common/scaler.cpp
@@ -21,6 +21,7 @@
*/
#include "common/scaler/intern.h"
+#include "common/scaler/scalebit.h"
#include "common/util.h"
@@ -172,46 +173,7 @@ void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
*/
void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
int width, int height) {
- const uint32 nextlineSrc = srcPitch / sizeof(uint16);
- const uint16 *p = (const uint16 *)srcPtr;
-
- const uint32 nextlineDst = dstPitch / sizeof(uint16);
- uint16 *q = (uint16 *)dstPtr;
-
- uint16 A, B, C;
- uint16 D, E, F;
- uint16 G, H, I;
-
- while (height--) {
- B = *(p - 1 - nextlineSrc);
- E = *(p - 1);
- H = *(p - 1 + nextlineSrc);
- C = *(p - nextlineSrc);
- F = *(p);
- I = *(p + nextlineSrc);
- int tmpWidth = width;
- while (tmpWidth--) {
- p++;
- A = B; B = C; C = *(p - nextlineSrc);
- D = E; E = F; F = *(p);
- G = H; H = I; I = *(p + nextlineSrc);
-
- if (B != H && D != F) {
- *(q + 0) = D == B ? D : E;
- *(q + 1) = B == F ? F : E;
- *(q + nextlineDst + 0) = D == H ? D : E;
- *(q + nextlineDst + 1) = H == F ? F : E;
- } else {
- *(q + 0) = E;
- *(q + 1) = E;
- *(q + nextlineDst + 0) = E;
- *(q + nextlineDst + 1) = E;
- }
- q += 2;
- }
- p += nextlineSrc - width;
- q += (nextlineDst - width) << 1;
- }
+ scale(2, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height);
}
/**
@@ -220,57 +182,7 @@ void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPi
*/
void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
int width, int height) {
- const uint32 nextlineSrc = srcPitch / sizeof(uint16);
- const uint16 *p = (const uint16 *)srcPtr;
-
- const uint32 nextlineDst = dstPitch / sizeof(uint16);
- uint16 *q = (uint16 *)dstPtr;
-
- uint16 A, B, C;
- uint16 D, E, F;
- uint16 G, H, I;
-
- while (height--) {
- B = *(p - 1 - nextlineSrc);
- E = *(p - 1);
- H = *(p - 1 + nextlineSrc);
- C = *(p - nextlineSrc);
- F = *(p);
- I = *(p + nextlineSrc);
- int tmpWidth = width;
- while (tmpWidth--) {
- p++;
- A = B; B = C; C = *(p - nextlineSrc);
- D = E; E = F; F = *(p);
- G = H; H = I; I = *(p + nextlineSrc);
-
- if (B != H && D != F) {
- *(q + 0) = D == B ? D : E;
- *(q + 1) = (D == B && E != C) || (B == F && E != A) ? B : E;
- *(q + 2) = B == F ? F : E;
- *(q + nextlineDst + 0) = (D == B && E != G) || (D == B && E != A) ? D : E;
- *(q + nextlineDst + 1) = E;
- *(q + nextlineDst + 2) = (B == F && E != I) || (H == F && E != C) ? F : E;
- *(q + 2 * nextlineDst + 0) = D == H ? D : E;
- *(q + 2 * nextlineDst + 1) = (D == H && E != I) || (H == F && E != G) ? H : E;
- *(q + 2 * nextlineDst + 2) = H == F ? F : E;
- } else {
- *(q + 0) = E;
- *(q + 1) = E;
- *(q + 2) = E;
- *(q + nextlineDst + 0) = E;
- *(q + nextlineDst + 1) = E;
- *(q + nextlineDst + 2) = E;
- *(q + 2 * nextlineDst + 0) = E;
- *(q + 2 * nextlineDst + 1) = E;
- *(q + 2 * nextlineDst + 2) = E;
- }
-
- q += 3;
- }
- p += nextlineSrc - width;
- q += (nextlineDst - width) * 3;
- }
+ scale(3, dstPtr, dstPitch, srcPtr, srcPitch, 2, width, height);
}
template<int bitFormat>
diff --git a/common/scaler/scale2x.cpp b/common/scaler/scale2x.cpp
new file mode 100644
index 0000000000..3684c94348
--- /dev/null
+++ b/common/scaler/scale2x.cpp
@@ -0,0 +1,927 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "scale2x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale2x C implementation */
+
+static inline void scale2x_8_def_single(scale2x_uint8* __restrict__ dst, const scale2x_uint8* __restrict__ src0, const scale2x_uint8* __restrict__ src1, const scale2x_uint8* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[1] = src0[0];
+ else
+ dst[1] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+}
+
+static inline void scale2x_16_def_single(scale2x_uint16* __restrict__ dst, const scale2x_uint16* __restrict__ src0, const scale2x_uint16* __restrict__ src1, const scale2x_uint16* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[1] = src0[0];
+ else
+ dst[1] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+}
+
+static inline void scale2x_32_def_single(scale2x_uint32* __restrict__ dst, const scale2x_uint32* __restrict__ src0, const scale2x_uint32* __restrict__ src1, const scale2x_uint32* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[1] = src0[0];
+ else
+ dst[1] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale2x_8_def_single(dst0, src0, src1, src2, count);
+ scale2x_8_def_single(dst1, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 16 bits.
+ * This function operates like scale2x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale2x_16_def_single(dst0, src0, src1, src2, count);
+ scale2x_16_def_single(dst1, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 32 bits.
+ * This function operates like scale2x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale2x_32_def_single(dst0, src0, src1, src2, count);
+ scale2x_32_def_single(dst1, src2, src1, src0, count);
+}
+
+/***************************************************************************/
+/* Scale2x MMX implementation */
+
+#if defined(__GNUC__) && defined(__i386__)
+
+/*
+ * Apply the Scale2x effect at a single row.
+ * This function must be called only by the other scale2x functions.
+ *
+ * Considering the pixel map :
+ *
+ * ABC (src0)
+ * DEF (src1)
+ * GHI (src2)
+ *
+ * this functions compute 2 new pixels in substitution of the source pixel E
+ * like this map :
+ *
+ * ab (dst)
+ *
+ * with these variables :
+ *
+ * &current -> E
+ * &current_left -> D
+ * &current_right -> F
+ * &current_upper -> B
+ * &current_lower -> H
+ *
+ * %0 -> current_upper
+ * %1 -> current
+ * %2 -> current_lower
+ * %3 -> dst
+ * %4 -> counter
+ *
+ * %mm0 -> *current_left
+ * %mm1 -> *current_next
+ * %mm2 -> tmp0
+ * %mm3 -> tmp1
+ * %mm4 -> tmp2
+ * %mm5 -> tmp3
+ * %mm6 -> *current_upper
+ * %mm7 -> *current
+ */
+static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ assert(count >= 16);
+ assert(count % 8 == 0);
+
+ /* always do the first and last run */
+ count -= 2*8;
+
+ __asm__ __volatile__(
+/* first run */
+ /* set the current, current_pre, current_next registers */
+ "movq 0(%1), %%mm0\n"
+ "movq 0(%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psllq $56, %%mm0\n"
+ "psllq $56, %%mm1\n"
+ "psrlq $56, %%mm0\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $8, %%mm2\n"
+ "psrlq $8, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqb %%mm6, %%mm2\n"
+ "pcmpeqb %%mm6, %%mm4\n"
+ "pcmpeqb (%2), %%mm3\n"
+ "pcmpeqb (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqb %%mm1, %%mm2\n"
+ "pcmpeqb %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklbw %%mm4, %%mm2\n"
+ "punpckhbw %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+/* central runs */
+ "shrl $3, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $56, %%mm0\n"
+ "psllq $56, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $8, %%mm2\n"
+ "psrlq $8, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqb %%mm6, %%mm2\n"
+ "pcmpeqb %%mm6, %%mm4\n"
+ "pcmpeqb (%2), %%mm3\n"
+ "pcmpeqb (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqb %%mm1, %%mm2\n"
+ "pcmpeqb %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklbw %%mm4, %%mm2\n"
+ "punpckhbw %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+/* final run */
+ /* set the current, current_pre, current_next registers */
+ "movq (%1), %%mm1\n"
+ "movq (%1), %%mm7\n"
+ "movq -8(%1), %%mm0\n"
+ "psrlq $56, %%mm1\n"
+ "psrlq $56, %%mm0\n"
+ "psllq $56, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $8, %%mm2\n"
+ "psrlq $8, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqb %%mm6, %%mm2\n"
+ "pcmpeqb %%mm6, %%mm4\n"
+ "pcmpeqb (%2), %%mm3\n"
+ "pcmpeqb (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqb %%mm1, %%mm2\n"
+ "pcmpeqb %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklbw %%mm4, %%mm2\n"
+ "punpckhbw %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ assert(count >= 8);
+ assert(count % 4 == 0);
+
+ /* always do the first and last run */
+ count -= 2*4;
+
+ __asm__ __volatile__(
+/* first run */
+ /* set the current, current_pre, current_next registers */
+ "movq 0(%1), %%mm0\n"
+ "movq 0(%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psllq $48, %%mm0\n"
+ "psllq $48, %%mm1\n"
+ "psrlq $48, %%mm0\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $16, %%mm2\n"
+ "psrlq $16, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqw %%mm6, %%mm2\n"
+ "pcmpeqw %%mm6, %%mm4\n"
+ "pcmpeqw (%2), %%mm3\n"
+ "pcmpeqw (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqw %%mm1, %%mm2\n"
+ "pcmpeqw %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklwd %%mm4, %%mm2\n"
+ "punpckhwd %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+/* central runs */
+ "shrl $2, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $48, %%mm0\n"
+ "psllq $48, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $16, %%mm2\n"
+ "psrlq $16, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqw %%mm6, %%mm2\n"
+ "pcmpeqw %%mm6, %%mm4\n"
+ "pcmpeqw (%2), %%mm3\n"
+ "pcmpeqw (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqw %%mm1, %%mm2\n"
+ "pcmpeqw %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklwd %%mm4, %%mm2\n"
+ "punpckhwd %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+/* final run */
+ /* set the current, current_pre, current_next registers */
+ "movq (%1), %%mm1\n"
+ "movq (%1), %%mm7\n"
+ "movq -8(%1), %%mm0\n"
+ "psrlq $48, %%mm1\n"
+ "psrlq $48, %%mm0\n"
+ "psllq $48, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $16, %%mm2\n"
+ "psrlq $16, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqw %%mm6, %%mm2\n"
+ "pcmpeqw %%mm6, %%mm4\n"
+ "pcmpeqw (%2), %%mm3\n"
+ "pcmpeqw (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqw %%mm1, %%mm2\n"
+ "pcmpeqw %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklwd %%mm4, %%mm2\n"
+ "punpckhwd %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ assert(count >= 4);
+ assert(count % 2 == 0);
+
+ /* always do the first and last run */
+ count -= 2*2;
+
+ __asm__ __volatile__(
+/* first run */
+ /* set the current, current_pre, current_next registers */
+ "movq 0(%1), %%mm0\n"
+ "movq 0(%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psllq $32, %%mm0\n"
+ "psllq $32, %%mm1\n"
+ "psrlq $32, %%mm0\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $32, %%mm2\n"
+ "psrlq $32, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqd %%mm6, %%mm2\n"
+ "pcmpeqd %%mm6, %%mm4\n"
+ "pcmpeqd (%2), %%mm3\n"
+ "pcmpeqd (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqd %%mm1, %%mm2\n"
+ "pcmpeqd %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpckldq %%mm4, %%mm2\n"
+ "punpckhdq %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+/* central runs */
+ "shrl $1, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $32, %%mm0\n"
+ "psllq $32, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $32, %%mm2\n"
+ "psrlq $32, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqd %%mm6, %%mm2\n"
+ "pcmpeqd %%mm6, %%mm4\n"
+ "pcmpeqd (%2), %%mm3\n"
+ "pcmpeqd (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqd %%mm1, %%mm2\n"
+ "pcmpeqd %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpckldq %%mm4, %%mm2\n"
+ "punpckhdq %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+/* final run */
+ /* set the current, current_pre, current_next registers */
+ "movq (%1), %%mm1\n"
+ "movq (%1), %%mm7\n"
+ "movq -8(%1), %%mm0\n"
+ "psrlq $32, %%mm1\n"
+ "psrlq $32, %%mm0\n"
+ "psllq $32, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $32, %%mm2\n"
+ "psrlq $32, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqd %%mm6, %%mm2\n"
+ "pcmpeqd %%mm6, %%mm4\n"
+ "pcmpeqd (%2), %%mm3\n"
+ "pcmpeqd (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqd %%mm1, %%mm2\n"
+ "pcmpeqd %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpckldq %%mm4, %%mm2\n"
+ "punpckhdq %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 8 bits.
+ * This is a very fast MMX implementation.
+ * The implementation uses a combination of cmp/and/not operations to
+ * completly remove the need of conditional jumps. This trick give the
+ * major speed improvement.
+ * Also, using the 8 bytes MMX registers more than one pixel are computed
+ * at the same time.
+ * Before calling this function you must ensure that the currenct CPU supports
+ * the MMX instruction set. After calling it you must be sure to call the EMMS
+ * instruction before any floating-point operation.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 16 and a multiple of 8.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ if (count % 8 != 0 || count < 16) {
+ scale2x_8_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 16);
+ assert(count % 8 == 0);
+
+ scale2x_8_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_8_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 16 bits.
+ * This function operates like scale2x_8_mmx() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 8 and a multiple of 4.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ if (count % 4 != 0 || count < 8) {
+ scale2x_16_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 8);
+ assert(count % 4 == 0);
+
+ scale2x_16_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_16_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 32 bits.
+ * This function operates like scale2x_8_mmx() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 4 and a multiple of 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ if (count % 2 != 0 || count < 4) {
+ scale2x_32_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 4);
+ assert(count % 2 == 0);
+
+ scale2x_32_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_32_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+#endif
+
diff --git a/common/scaler/scale2x.h b/common/scaler/scale2x.h
new file mode 100644
index 0000000000..54bfac5b8f
--- /dev/null
+++ b/common/scaler/scale2x.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __SCALE2X_H
+#define __SCALE2X_H
+
+typedef unsigned char scale2x_uint8;
+typedef unsigned short scale2x_uint16;
+typedef unsigned scale2x_uint32;
+
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+#if defined(__GNUC__) && defined(__i386__)
+
+void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+/**
+ * End the use of the MMX instructions.
+ * This function must be called before using any floating-point operations.
+ */
+static inline void scale2x_mmx_emms(void)
+{
+ __asm__ __volatile__ (
+ "emms"
+ );
+}
+
+#endif
+
+#endif
+
diff --git a/common/scaler/scale3x.cpp b/common/scaler/scale3x.cpp
new file mode 100644
index 0000000000..16e484d02b
--- /dev/null
+++ b/common/scaler/scale3x.cpp
@@ -0,0 +1,386 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "scale3x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale3x C implementation */
+
+static inline void scale3x_8_def_border(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[2] = src0[0];
+ else
+ dst[2] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+static inline void scale3x_8_def_center(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src0[0] != src2[0]) {
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[2] = src1[0];
+ }
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src0[0] != src2[0]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ }
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+static inline void scale3x_16_def_border(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[2] = src0[0];
+ else
+ dst[2] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+static inline void scale3x_16_def_center(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src0[0] != src2[0]) {
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[2] = src1[0];
+ }
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src0[0] != src2[0]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ }
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+static inline void scale3x_32_def_border(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src1[1] == src0[0] && src2[0] != src0[0])
+ dst[2] = src0[0];
+ else
+ dst[2] = src1[0];
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src1[-1] == src0[0] && src2[0] != src0[0])
+ dst[0] = src0[0];
+ else
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+static inline void scale3x_32_def_center(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count)
+{
+ assert(count >= 2);
+
+ /* first pixel */
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ if (src0[0] != src2[0]) {
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[2] = src1[0];
+ }
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+
+ /* central pixels */
+ count -= 2;
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+
+ /* last pixel */
+ if (src0[0] != src2[0]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ }
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale3x_8_def_border(dst0, src0, src1, src2, count);
+ scale3x_8_def_center(dst1, src0, src1, src2, count);
+ scale3x_8_def_border(dst2, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 16 bits.
+ * This function operates like scale3x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale3x_16_def_border(dst0, src0, src1, src2, count);
+ scale3x_16_def_center(dst1, src0, src1, src2, count);
+ scale3x_16_def_border(dst2, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 32 bits.
+ * This function operates like scale3x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
+{
+ assert(count >= 2);
+
+ scale3x_32_def_border(dst0, src0, src1, src2, count);
+ scale3x_32_def_center(dst1, src0, src1, src2, count);
+ scale3x_32_def_border(dst2, src2, src1, src0, count);
+}
+
diff --git a/common/scaler/scale3x.h b/common/scaler/scale3x.h
new file mode 100644
index 0000000000..14655d00fa
--- /dev/null
+++ b/common/scaler/scale3x.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __SCALE3X_H
+#define __SCALE3X_H
+
+typedef unsigned char scale3x_uint8;
+typedef unsigned short scale3x_uint16;
+typedef unsigned scale3x_uint32;
+
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count);
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count);
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count);
+
+#endif
+
diff --git a/common/scaler/scalebit.cpp b/common/scaler/scalebit.cpp
new file mode 100644
index 0000000000..d540ca7317
--- /dev/null
+++ b/common/scaler/scalebit.cpp
@@ -0,0 +1,391 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define HAVE_ALLOCA 1
+#define HAVE_ALLOCA_H 1
+
+#include "scale2x.h"
+#include "scale3x.h"
+
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+
+#define DST(bits, num) (scale2x_uint ## bits *)dst ## num
+#define SRC(bits, num) (const scale2x_uint ## bits *)src ## num
+
+/**
+ * Apply the Scale2x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+ switch (pixel) {
+#if defined(__GNUC__) && defined(__i386__)
+ case 1 : scale2x_8_mmx(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale2x_16_mmx(DST(8,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale2x_32_mmx(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+#else
+ case 1 : scale2x_8_def(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale2x_16_def(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale2x_32_def(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+#endif
+ }
+}
+
+/**
+ * Apply the Scale3x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale3x(void* dst0, void* dst1, void* dst2, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+ switch (pixel) {
+ case 1 : scale3x_8_def(DST(8,0), DST(8,1), DST(8,2), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale3x_16_def(DST(16,0), DST(16,1), DST(16,2), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale3x_32_def(DST(32,0), DST(32,1), DST(32,2), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+ }
+}
+
+/**
+ * Apply the Scale4x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale4x(void* dst0, void* dst1, void* dst2, void* dst3, const void* src0, const void* src1, const void* src2, const void* src3, unsigned pixel, unsigned pixel_per_row)
+{
+ stage_scale2x(dst0, dst1, src0, src1, src2, pixel, 2 * pixel_per_row);
+ stage_scale2x(dst2, dst3, src1, src2, src3, pixel, 2 * pixel_per_row);
+}
+
+#define SCDST(i) (dst+(i)*dst_slice)
+#define SCSRC(i) (src+(i)*src_slice)
+#define SCMID(i) (mid[(i)])
+
+/**
+ * Apply the Scale2x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 2x2 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale2x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+
+ assert(height >= 2);
+
+ count = height;
+
+ stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+ dst = SCDST(2);
+
+ count -= 2;
+ while (count) {
+ stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+ dst = SCDST(2);
+ src = SCSRC(1);
+
+ --count;
+ }
+
+ stage_scale2x(SCDST(0), SCDST(1), SCSRC(1-1), SCSRC(2-1), SCSRC(2-1), pixel, width);
+
+#if defined(__GNUC__) && defined(__i386__)
+ scale2x_mmx_emms();
+#endif
+}
+
+/**
+ * Apply the Scale32x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 3x3 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale3x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+
+ assert(height >= 2);
+
+ count = height;
+
+ stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+
+ dst = SCDST(3);
+
+ count -= 2;
+ while (count) {
+ stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+ dst = SCDST(3);
+ src = SCSRC(1);
+
+ --count;
+ }
+
+ stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(1-1), SCSRC(2-1), SCSRC(2-1), pixel, width);
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function requires also a small buffer bitmap used internally to store
+ * intermediate results. This bitmap must have at least an horizontal size in bytes of 2*width*pixel,
+ * and a vertical size of 6 rows. The memory of this buffer must not be allocated
+ * in video memory because it's also read and not only written. Generally
+ * a heap (malloc) or a stack (alloca) buffer is the best choices.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_mid Pointer at the first pixel of the buffer bitmap.
+ * \param mid_slice Size in bytes of a buffer bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x_buf(void* void_dst, unsigned dst_slice, void* void_mid, unsigned mid_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+ unsigned char* mid[6];
+
+ assert(height >= 4);
+
+ count = height;
+
+ /* set the 6 buffer pointers */
+ mid[0] = (unsigned char*)void_mid;
+ mid[1] = mid[0] + mid_slice;
+ mid[2] = mid[1] + mid_slice;
+ mid[3] = mid[2] + mid_slice;
+ mid[4] = mid[3] + mid_slice;
+ mid[5] = mid[4] + mid_slice;
+
+ stage_scale2x(SCMID(-2+6), SCMID(-1+6), SCSRC(0), SCSRC(0), SCSRC(1), pixel, width);
+ stage_scale2x(SCMID(0), SCMID(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+ stage_scale2x(SCMID(2), SCMID(3), SCSRC(1), SCSRC(2), SCSRC(3), pixel, width);
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(-2+6), SCMID(-2+6), SCMID(-1+6), SCMID(0), pixel, width);
+
+ dst = SCDST(4);
+
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(-1+6), SCMID(0), SCMID(1), SCMID(2), pixel, width);
+
+ dst = SCDST(4);
+
+ count -= 4;
+ while (count) {
+ unsigned char* tmp;
+
+ stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(4), pixel, width);
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width);
+
+ dst = SCDST(4);
+ src = SCSRC(1);
+
+ tmp = SCMID(0); /* shift by 2 position */
+ SCMID(0) = SCMID(2);
+ SCMID(2) = SCMID(4);
+ SCMID(4) = tmp;
+ tmp = SCMID(1);
+ SCMID(1) = SCMID(3);
+ SCMID(3) = SCMID(5);
+ SCMID(5) = tmp;
+
+ --count;
+ }
+
+ stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(3), pixel, width);
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width);
+
+ dst = SCDST(4);
+
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(3), SCMID(4), SCMID(5), SCMID(5), pixel, width);
+
+#if defined(__GNUC__) && defined(__i386__)
+ scale2x_mmx_emms();
+#endif
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function operates like ::scale4x_buf() but the intermediate buffer is
+ * automatically allocated in the stack.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned mid_slice;
+ void* mid;
+
+ mid_slice = 2 * pixel * width; /* required space for 1 row buffer */
+
+ mid_slice = (mid_slice + 0x7) & ~0x7; /* align to 8 bytes */
+
+#if HAVE_ALLOCA
+ mid = alloca(6 * mid_slice); /* allocate space for 6 row buffers */
+
+ assert(mid != 0); /* alloca should never fails */
+#else
+ mid = malloc(6 * mid_slice); /* allocate space for 6 row buffers */
+
+ if (!mid)
+ return;
+#endif
+
+ scale4x_buf(void_dst, dst_slice, mid, mid_slice, void_src, src_slice, pixel, width, height);
+
+#if !HAVE_ALLOCA
+ free(mid);
+#endif
+}
+
+/**
+ * Check if the scale implementation is applicable at the given arguments.
+ * \param scale Scale factor. 2, 3 or 4.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ * \return
+ * - -1 on precondition violated.
+ * - 0 on success.
+ */
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height)
+{
+ if (scale != 2 && scale != 3 && scale != 4)
+ return -1;
+
+ if (pixel != 1 && pixel != 2 && pixel != 4)
+ return -1;
+
+ switch (scale) {
+ case 2 :
+ case 3 :
+ if (height < 2)
+ return -1;
+ break;
+ case 4 :
+ if (height < 4)
+ return -1;
+ break;
+ }
+
+#if defined(__GNUC__) && defined(__i386__)
+ switch (scale) {
+ case 2 :
+ case 4 :
+ if (width < (16 / pixel))
+ return -1;
+ if (width % (8 / pixel) != 0)
+ return -1;
+ break;
+ case 3 :
+ if (width < 2)
+ return -1;
+ break;
+ }
+#else
+ if (width < 2)
+ return -1;
+#endif
+
+ return 0;
+}
+
+/**
+ * Apply the Scale effect on a bitmap.
+ * This function is simply a common interface for ::scale2x(), ::scale3x() and ::scale4x().
+ * \param scale Scale factor. 2, 3 or 4.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ switch (scale) {
+ case 2 :
+ scale2x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ case 3 :
+ scale3x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ case 4 :
+ scale4x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ }
+}
+
diff --git a/common/scaler/scalebit.h b/common/scaler/scalebit.h
new file mode 100644
index 0000000000..dd46883f97
--- /dev/null
+++ b/common/scaler/scalebit.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#ifndef __SCALEBIT_H
+#define __SCALEBIT_H
+
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height);
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height);
+
+#endif
+