aboutsummaryrefslogtreecommitdiff
path: root/graphics
diff options
context:
space:
mode:
authorMax Horn2006-02-20 20:29:02 +0000
committerMax Horn2006-02-20 20:29:02 +0000
commit7335d348ba43c1deaf5e97f222959de3d3e30cbc (patch)
treed4d7876916bf3639b198a8d22620fc71e19e1758 /graphics
parent6160a17b3005353781143cd7cc475b112ad68f03 (diff)
downloadscummvm-rg350-7335d348ba43c1deaf5e97f222959de3d3e30cbc.tar.gz
scummvm-rg350-7335d348ba43c1deaf5e97f222959de3d3e30cbc.tar.bz2
scummvm-rg350-7335d348ba43c1deaf5e97f222959de3d3e30cbc.zip
Moved common/scaler* to graphics/
svn-id: r20797
Diffstat (limited to 'graphics')
-rw-r--r--graphics/module.mk36
-rw-r--r--graphics/scaler.cpp322
-rw-r--r--graphics/scaler.h84
-rw-r--r--graphics/scaler/2xsai.cpp404
-rw-r--r--graphics/scaler/aspect.cpp196
-rw-r--r--graphics/scaler/hq2x.cpp175
-rw-r--r--graphics/scaler/hq2x.h1977
-rw-r--r--graphics/scaler/hq2x_i386.asm1842
-rw-r--r--graphics/scaler/hq3x.cpp176
-rw-r--r--graphics/scaler/hq3x.h2951
-rw-r--r--graphics/scaler/hq3x_i386.asm2434
-rw-r--r--graphics/scaler/intern.h171
-rw-r--r--graphics/scaler/scale2x.cpp521
-rw-r--r--graphics/scaler/scale2x.h56
-rw-r--r--graphics/scaler/scale3x.cpp238
-rw-r--r--graphics/scaler/scale3x.h37
-rw-r--r--graphics/scaler/scalebit.cpp357
-rw-r--r--graphics/scaler/scalebit.h43
-rw-r--r--graphics/scaler/thumbnail.cpp194
19 files changed, 12209 insertions, 5 deletions
diff --git a/graphics/module.mk b/graphics/module.mk
index 8c5697d5a1..7ded354cdb 100644
--- a/graphics/module.mk
+++ b/graphics/module.mk
@@ -6,16 +6,42 @@ MODULE_OBJS := \
font.o \
fontman.o \
ilbm.o \
- newfont.o \
+ imagedec.o \
+ imageman.o \
newfont_big.o \
+ newfont.o \
primitives.o \
+ scaler.o \
+ scaler/thumbnail.o \
scummfont.o \
- surface.o \
- imageman.o \
- imagedec.o
+ surface.o
+
+ifndef DISABLE_SCALERS
+MODULE_OBJS += \
+ scaler/2xsai.o \
+ scaler/aspect.o \
+ scaler/scale2x.o \
+ scaler/scale3x.o \
+ scaler/scalebit.o
+
+ifndef DISABLE_HQ_SCALERS
+MODULE_OBJS += \
+ scaler/hq2x.o \
+ scaler/hq3x.o
+
+ifdef HAVE_NASM
+MODULE_OBJS += \
+ scaler/hq2x_i386.o \
+ scaler/hq3x_i386.o
+endif
+
+endif
+
+endif
MODULE_DIRS += \
- graphics
+ graphics \
+ graphics/scaler
# Include common rules
include $(srcdir)/common.rules
diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp
new file mode 100644
index 0000000000..5364e39a2f
--- /dev/null
+++ b/graphics/scaler.cpp
@@ -0,0 +1,322 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "graphics/scaler/intern.h"
+#include "graphics/scaler/scalebit.h"
+#include "common/util.h"
+
+
+int gBitFormat = 565;
+
+#ifndef DISABLE_HQ_SCALERS
+// RGB-to-YUV lookup table
+extern "C" {
+
+#ifdef USE_NASM
+// NOTE: if your compiler uses different mangled names, add another
+// condition here
+
+#ifndef _WIN32
+#define RGBtoYUV _RGBtoYUV
+#define LUT16to32 _LUT16to32
+#endif
+
+#endif
+
+// FIXME/TODO: The following two tables suck up 512 KB.
+// They should at least be allocated on the heap, to reduce the size of the
+// binary.
+//
+// Note: a memory lookup table is *not* necessarily faster than computing
+// these things on the fly, because of its size. Both tables together, plus
+// the code, plus the input/output GFX data, won't fit in the cache on many
+// systems, so main memory has to be accessed, which is about the worst thing
+// that can happen to code which tries to be fast...
+//
+// So we should think about ways to get these smaller / removed. The LUT16to32
+// is only used by the HQX asm right now; maybe somebody can modify the code
+// there to work w/o it (and do some benchmarking, too?). To do that, just
+// do the conversion on the fly, or even do w/o it (as the C++ code manages to),
+// by making different versions of the code based on gBitFormat (or by writing
+// bit masks into registers which are computed based on gBitFormat).
+//
+// RGBtoYUV is also used by the C(++) version of the HQX code. Maybe we can
+// use the same technique which is employed by our MPEG code to reduce the
+// size of the lookup tables at the cost of some additional computations? That
+// might actually result in a speedup, too, if done right (and the code code
+// might actually be suitable for AltiVec/MMX/SSE speedup).
+//
+// Of course, the above is largely a conjecture, and the actual speed
+// differences are likely to vary a lot between different architectures and
+// CPUs.
+uint RGBtoYUVstorage[65536];
+uint *RGBtoYUV = RGBtoYUVstorage;
+uint LUT16to32[65536];
+}
+#endif
+
+static const uint16 dotmatrix_565[16] = {
+ 0x01E0, 0x0007, 0x3800, 0x0000,
+ 0x39E7, 0x0000, 0x39E7, 0x0000,
+ 0x3800, 0x0000, 0x01E0, 0x0007,
+ 0x39E7, 0x0000, 0x39E7, 0x0000
+};
+static const uint16 dotmatrix_555[16] = {
+ 0x00E0, 0x0007, 0x1C00, 0x0000,
+ 0x1CE7, 0x0000, 0x1CE7, 0x0000,
+ 0x1C00, 0x0000, 0x00E0, 0x0007,
+ 0x1CE7, 0x0000, 0x1CE7, 0x0000
+};
+static const uint16 *dotmatrix;
+
+static void InitLUT(uint32 BitFormat);
+
+void InitScalers(uint32 BitFormat) {
+ if (BitFormat == 565) {
+ dotmatrix = dotmatrix_565;
+ } else if (BitFormat == 555) {
+ dotmatrix = dotmatrix_555;
+ } else {
+ error("Unknown bit format %d", BitFormat);
+ }
+
+ gBitFormat = BitFormat;
+ InitLUT(BitFormat);
+}
+
+void InitLUT(uint32 BitFormat) {
+#ifndef DISABLE_HQ_SCALERS
+ int r, g, b;
+ int Y, u, v;
+ int gInc, gShift;
+
+ for (int i = 0; i < 65536; i++) {
+ LUT16to32[i] = ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3);
+ }
+
+ if (BitFormat == 565) {
+ gInc = 256 >> 6;
+ gShift = 6 - 3;
+ } else {
+ gInc = 256 >> 5;
+ gShift = 5 - 3;
+ }
+
+ for (r = 0; r < 256; r += 8) {
+ for (g = 0; g < 256; g += gInc) {
+ for (b = 0; b < 256; b += 8) {
+ Y = (r + g + b) >> 2;
+ u = 128 + ((r - b) >> 2);
+ v = 128 + ((-r + 2 * g - b) >> 3);
+ RGBtoYUV[ (r << (5 + gShift)) + (g << gShift) + (b >> 3) ] = (Y << 16) + (u << 8) + v;
+ }
+ }
+ }
+#endif
+}
+
+/**
+ * Trivial 'scaler' - in fact it doesn't do any scaling but just copies the
+ * source to the destionation.
+ */
+void Normal1x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ while (height--) {
+ memcpy(dstPtr, srcPtr, 2 * width);
+ srcPtr += srcPitch;
+ dstPtr += dstPitch;
+ }
+}
+
+#ifndef DISABLE_SCALERS
+/**
+ * Trivial nearest-neighbour 2x scaler.
+ */
+void Normal2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ uint8 *r;
+
+ assert(((long)dstPtr & 3) == 0);
+ while (height--) {
+ r = dstPtr;
+ for (int i = 0; i < width; ++i, r += 4) {
+ uint32 color = *(((const uint16 *)srcPtr) + i);
+
+ color |= color << 16;
+
+ *(uint32 *)(r) = color;
+ *(uint32 *)(r + dstPitch) = color;
+ }
+ srcPtr += srcPitch;
+ dstPtr += dstPitch << 1;
+ }
+}
+
+/**
+ * Trivial nearest-neighbour 3x scaler.
+ */
+void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ uint8 *r;
+ const uint32 dstPitch2 = dstPitch * 2;
+ const uint32 dstPitch3 = dstPitch * 3;
+
+ assert(((long)dstPtr & 1) == 0);
+ while (height--) {
+ r = dstPtr;
+ for (int i = 0; i < width; ++i, r += 6) {
+ uint16 color = *(((const uint16 *)srcPtr) + i);
+
+ *(uint16 *)(r + 0) = color;
+ *(uint16 *)(r + 2) = color;
+ *(uint16 *)(r + 4) = color;
+ *(uint16 *)(r + 0 + dstPitch) = color;
+ *(uint16 *)(r + 2 + dstPitch) = color;
+ *(uint16 *)(r + 4 + dstPitch) = color;
+ *(uint16 *)(r + 0 + dstPitch2) = color;
+ *(uint16 *)(r + 2 + dstPitch2) = color;
+ *(uint16 *)(r + 4 + dstPitch2) = color;
+ }
+ srcPtr += srcPitch;
+ dstPtr += dstPitch3;
+ }
+}
+
+#define INTERPOLATE INTERPOLATE<bitFormat>
+#define Q_INTERPOLATE Q_INTERPOLATE<bitFormat>
+
+/**
+ * Trivial nearest-neighbour 1.5x scaler.
+ */
+template<int bitFormat>
+void Normal1o5xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ uint8 *r;
+ const uint32 dstPitch2 = dstPitch * 2;
+ const uint32 dstPitch3 = dstPitch * 3;
+ const uint32 srcPitch2 = srcPitch * 2;
+
+ assert(((long)dstPtr & 1) == 0);
+ while (height > 0) {
+ r = dstPtr;
+ for (int i = 0; i < width; i += 2, r += 6) {
+ uint16 color0 = *(((const uint16 *)srcPtr) + i);
+ uint16 color1 = *(((const uint16 *)srcPtr) + i + 1);
+ uint16 color2 = *(((const uint16 *)(srcPtr + srcPitch)) + i);
+ uint16 color3 = *(((const uint16 *)(srcPtr + srcPitch)) + i + 1);
+
+ *(uint16 *)(r + 0) = color0;
+ *(uint16 *)(r + 2) = INTERPOLATE(color0, color1);
+ *(uint16 *)(r + 4) = color1;
+ *(uint16 *)(r + 0 + dstPitch) = INTERPOLATE(color0, color2);
+ *(uint16 *)(r + 2 + dstPitch) = Q_INTERPOLATE(color0, color1, color2, color3);
+ *(uint16 *)(r + 4 + dstPitch) = INTERPOLATE(color1, color3);
+ *(uint16 *)(r + 0 + dstPitch2) = color2;
+ *(uint16 *)(r + 2 + dstPitch2) = INTERPOLATE(color2, color3);
+ *(uint16 *)(r + 4 + dstPitch2) = color3;
+ }
+ srcPtr += srcPitch2;
+ dstPtr += dstPitch3;
+ height -= 2;
+ }
+}
+MAKE_WRAPPER(Normal1o5x)
+
+/**
+ * The Scale2x filter, also known as AdvMame2x.
+ * See also http://scale2x.sourceforge.net
+ */
+void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
+}
+
+/**
+ * The Scale3x filter, also known as AdvMame3x.
+ * See also http://scale2x.sourceforge.net
+ */
+void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height);
+}
+
+template<int bitFormat>
+void TV2xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ const uint32 nextlineSrc = srcPitch / sizeof(uint16);
+ const uint16 *p = (const uint16 *)srcPtr;
+
+ const uint32 nextlineDst = dstPitch / sizeof(uint16);
+ uint16 *q = (uint16 *)dstPtr;
+
+ while (height--) {
+ for (int i = 0, j = 0; i < width; ++i, j += 2) {
+ uint16 p1 = *(p + i);
+ uint32 pi;
+
+ pi = (((p1 & redblueMask) * 7) >> 3) & redblueMask;
+ pi |= (((p1 & greenMask) * 7) >> 3) & greenMask;
+
+ *(q + j) = p1;
+ *(q + j + 1) = p1;
+ *(q + j + nextlineDst) = (uint16)pi;
+ *(q + j + nextlineDst + 1) = (uint16)pi;
+ }
+ p += nextlineSrc;
+ q += nextlineDst << 1;
+ }
+}
+MAKE_WRAPPER(TV2x)
+
+static inline uint16 DOT_16(uint16 c, int j, int i) {
+ return c - ((c >> 2) & *(dotmatrix + ((j & 3) << 2) + (i & 3)));
+}
+
+// FIXME: This scaler doesn't quite work. Either it needs to know where on the
+// screen it's drawing, or the dirty rects will have to be adjusted so that
+// access to the dotmatrix array are made in a consistent way. (Doing that in
+// a way that also works together with aspect-ratio correction is left as an
+// exercise for the reader.)
+
+void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
+ int width, int height) {
+ const uint32 nextlineSrc = srcPitch / sizeof(uint16);
+ const uint16 *p = (const uint16 *)srcPtr;
+
+ const uint32 nextlineDst = dstPitch / sizeof(uint16);
+ uint16 *q = (uint16 *)dstPtr;
+
+ for (int j = 0, jj = 0; j < height; ++j, jj += 2) {
+ for (int i = 0, ii = 0; i < width; ++i, ii += 2) {
+ uint16 c = *(p + i);
+ *(q + ii) = DOT_16(c, jj, ii);
+ *(q + ii + 1) = DOT_16(c, jj, ii + 1);
+ *(q + ii + nextlineDst) = DOT_16(c, jj + 1, ii);
+ *(q + ii + nextlineDst + 1) = DOT_16(c, jj + 1, ii + 1);
+ }
+ p += nextlineSrc;
+ q += nextlineDst << 1;
+ }
+}
+
+#endif
diff --git a/graphics/scaler.h b/graphics/scaler.h
new file mode 100644
index 0000000000..9f743216ac
--- /dev/null
+++ b/graphics/scaler.h
@@ -0,0 +1,84 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2002-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ */
+
+#ifndef COMMON_SCALER_H
+#define COMMON_SCALER_H
+
+#include "common/stdafx.h"
+#include "common/scummsys.h"
+#include "graphics/surface.h"
+
+extern void InitScalers(uint32 BitFormat);
+
+typedef void ScalerProc(const uint8 *srcPtr, uint32 srcPitch,
+ uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+#define DECLARE_SCALER(x) \
+ extern void x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, \
+ uint32 dstPitch, int width, int height)
+
+DECLARE_SCALER(_2xSaI);
+DECLARE_SCALER(Super2xSaI);
+DECLARE_SCALER(SuperEagle);
+DECLARE_SCALER(AdvMame2x);
+DECLARE_SCALER(AdvMame3x);
+DECLARE_SCALER(Normal1x);
+DECLARE_SCALER(Normal2x);
+DECLARE_SCALER(Normal3x);
+DECLARE_SCALER(Normal1o5x);
+DECLARE_SCALER(TV2x);
+DECLARE_SCALER(DotMatrix);
+
+#ifndef DISABLE_HQ_SCALERS
+DECLARE_SCALER(HQ2x);
+DECLARE_SCALER(HQ3x);
+#endif
+
+FORCEINLINE int real2Aspect(int y) {
+ return y + (y + 1) / 5;
+}
+
+FORCEINLINE int aspect2Real(int y) {
+ return (y * 5 + 4) / 6;
+}
+
+extern void makeRectStretchable(int &x, int &y, int &w, int &h);
+
+extern int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY);
+
+// creates a 160x100 thumbnail for 320x200 games
+// and 160x120 thumbnail for 320x240 and 640x480 games
+// only 565 mode
+enum {
+ kThumbnailWidth = 160,
+ kThumbnailHeight1 = 100,
+ kThumbnailHeight2 = 120
+};
+extern void createThumbnail(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height);
+
+/**
+ * creates a thumbnail from the current screen (without overlay)
+ * @param surf a surface (will always have 16 bpp after this for now)
+ * @return false if a error occured
+ */
+extern bool createThumbnailFromScreen(Graphics::Surface* surf);
+
+#endif
diff --git a/graphics/scaler/2xsai.cpp b/graphics/scaler/2xsai.cpp
new file mode 100644
index 0000000000..c8e360083f
--- /dev/null
+++ b/graphics/scaler/2xsai.cpp
@@ -0,0 +1,404 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "common/scaler/intern.h"
+
+
+
+static inline int GetResult(uint32 A, uint32 B, uint32 C, uint32 D) {
+ const bool ac = (A==C);
+ const bool bc = (B==C);
+ const int x1 = ac;
+ const int y1 = (bc & !ac);
+ const bool ad = (A==D);
+ const bool bd = (B==D);
+ const int x2 = ad;
+ const int y2 = (bd & !ad);
+ const int x = x1+x2;
+ const int y = y1+y2;
+ static const int rmap[3][3] = {
+ {0, 0, -1},
+ {0, 0, -1},
+ {1, 1, 0}
+ };
+ return rmap[y][x];
+}
+
+#define INTERPOLATE INTERPOLATE<bitFormat>
+#define Q_INTERPOLATE Q_INTERPOLATE<bitFormat>
+
+template<int bitFormat>
+void Super2xSaITemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ const uint16 *bP;
+ uint16 *dP;
+ const uint32 nextlineSrc = srcPitch >> 1;
+
+ while (height--) {
+ bP = (const uint16 *)srcPtr;
+ dP = (uint16 *)dstPtr;
+
+ for (int i = 0; i < width; ++i) {
+ uint32 color4, color5, color6;
+ uint32 color1, color2, color3;
+ uint32 colorA0, colorA1, colorA2, colorA3;
+ uint32 colorB0, colorB1, colorB2, colorB3;
+ uint32 colorS1, colorS2;
+ uint32 product1a, product1b, product2a, product2b;
+
+//--------------------------------------- B1 B2
+// 4 5 6 S2
+// 1 2 3 S1
+// A1 A2
+
+ colorB0 = *(bP - nextlineSrc - 1);
+ colorB1 = *(bP - nextlineSrc);
+ colorB2 = *(bP - nextlineSrc + 1);
+ colorB3 = *(bP - nextlineSrc + 2);
+
+ color4 = *(bP - 1);
+ color5 = *(bP);
+ color6 = *(bP + 1);
+ colorS2 = *(bP + 2);
+
+ color1 = *(bP + nextlineSrc - 1);
+ color2 = *(bP + nextlineSrc);
+ color3 = *(bP + nextlineSrc + 1);
+ colorS1 = *(bP + nextlineSrc + 2);
+
+ colorA0 = *(bP + 2 * nextlineSrc - 1);
+ colorA1 = *(bP + 2 * nextlineSrc);
+ colorA2 = *(bP + 2 * nextlineSrc + 1);
+ colorA3 = *(bP + 2 * nextlineSrc + 2);
+
+//--------------------------------------
+ if (color2 == color6 && color5 != color3) {
+ product2b = product1b = color2;
+ } else if (color5 == color3 && color2 != color6) {
+ product2b = product1b = color5;
+ } else if (color5 == color3 && color2 == color6) {
+ register int r = 0;
+
+ r += GetResult(color6, color5, color1, colorA1);
+ r += GetResult(color6, color5, color4, colorB1);
+ r += GetResult(color6, color5, colorA2, colorS1);
+ r += GetResult(color6, color5, colorB2, colorS2);
+
+ if (r > 0)
+ product2b = product1b = color6;
+ else if (r < 0)
+ product2b = product1b = color5;
+ else {
+ product2b = product1b = INTERPOLATE(color5, color6);
+ }
+ } else {
+ if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
+ product2b = Q_INTERPOLATE(color3, color3, color3, color2);
+ else if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
+ product2b = Q_INTERPOLATE(color2, color2, color2, color3);
+ else
+ product2b = INTERPOLATE(color2, color3);
+
+ if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
+ product1b = Q_INTERPOLATE(color6, color6, color6, color5);
+ else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
+ product1b = Q_INTERPOLATE(color6, color5, color5, color5);
+ else
+ product1b = INTERPOLATE(color5, color6);
+ }
+
+ if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
+ product2a = INTERPOLATE(color2, color5);
+ else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
+ product2a = INTERPOLATE(color2, color5);
+ else
+ product2a = color2;
+
+ if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
+ product1a = INTERPOLATE(color2, color5);
+ else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
+ product1a = INTERPOLATE(color2, color5);
+ else
+ product1a = color5;
+
+ *(dP + 0) = (uint16) product1a;
+ *(dP + 1) = (uint16) product1b;
+ *(dP + dstPitch/2 + 0) = (uint16) product2a;
+ *(dP + dstPitch/2 + 1) = (uint16) product2b;
+
+ bP += 1;
+ dP += 2;
+ }
+
+ srcPtr += srcPitch;
+ dstPtr += dstPitch * 2;
+ }
+}
+
+MAKE_WRAPPER(Super2xSaI)
+
+template<int bitFormat>
+void SuperEagleTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ const uint16 *bP;
+ uint16 *dP;
+ const uint32 nextlineSrc = srcPitch >> 1;
+
+ while (height--) {
+ bP = (const uint16 *)srcPtr;
+ dP = (uint16 *)dstPtr;
+ for (int i = 0; i < width; ++i) {
+ uint32 color4, color5, color6;
+ uint32 color1, color2, color3;
+ uint32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2;
+ uint32 product1a, product1b, product2a, product2b;
+
+ colorB1 = *(bP - nextlineSrc);
+ colorB2 = *(bP - nextlineSrc + 1);
+
+ color4 = *(bP - 1);
+ color5 = *(bP);
+ color6 = *(bP + 1);
+ colorS2 = *(bP + 2);
+
+ color1 = *(bP + nextlineSrc - 1);
+ color2 = *(bP + nextlineSrc);
+ color3 = *(bP + nextlineSrc + 1);
+ colorS1 = *(bP + nextlineSrc + 2);
+
+ colorA1 = *(bP + 2 * nextlineSrc);
+ colorA2 = *(bP + 2 * nextlineSrc + 1);
+
+ // --------------------------------------
+ if (color5 != color3) {
+ if (color2 == color6) {
+ product1b = product2a = color2;
+ if ((color1 == color2) || (color6 == colorB2)) {
+ product1a = INTERPOLATE(color2, color5);
+ product1a = INTERPOLATE(color2, product1a);
+ } else {
+ product1a = INTERPOLATE(color5, color6);
+ }
+
+ if ((color6 == colorS2) || (color2 == colorA1)) {
+ product2b = INTERPOLATE(color2, color3);
+ product2b = INTERPOLATE(color2, product2b);
+ } else {
+ product2b = INTERPOLATE(color2, color3);
+ }
+ } else {
+ product2b = product1a = INTERPOLATE(color2, color6);
+ product2b = Q_INTERPOLATE(color3, color3, color3, product2b);
+ product1a = Q_INTERPOLATE(color5, color5, color5, product1a);
+
+ product2a = product1b = INTERPOLATE(color5, color3);
+ product2a = Q_INTERPOLATE(color2, color2, color2, product2a);
+ product1b = Q_INTERPOLATE(color6, color6, color6, product1b);
+ }
+ } else {
+ if (color2 != color6) {
+ product2b = product1a = color5;
+
+ if ((colorB1 == color5) || (color3 == colorS1)) {
+ product1b = INTERPOLATE(color5, color6);
+ product1b = INTERPOLATE(color5, product1b);
+ } else {
+ product1b = INTERPOLATE(color5, color6);
+ }
+
+ if ((color3 == colorA2) || (color4 == color5)) {
+ product2a = INTERPOLATE(color5, color2);
+ product2a = INTERPOLATE(color5, product2a);
+ } else {
+ product2a = INTERPOLATE(color2, color3);
+ }
+ } else {
+ register int r = 0;
+
+ r += GetResult(color6, color5, color1, colorA1);
+ r += GetResult(color6, color5, color4, colorB1);
+ r += GetResult(color6, color5, colorA2, colorS1);
+ r += GetResult(color6, color5, colorB2, colorS2);
+
+ if (r > 0) {
+ product1b = product2a = color2;
+ product1a = product2b = INTERPOLATE(color5, color6);
+ } else if (r < 0) {
+ product2b = product1a = color5;
+ product1b = product2a = INTERPOLATE(color5, color6);
+ } else {
+ product2b = product1a = color5;
+ product1b = product2a = color2;
+ }
+ }
+ }
+
+ *(dP + 0) = (uint16) product1a;
+ *(dP + 1) = (uint16) product1b;
+ *(dP + dstPitch/2 + 0) = (uint16) product2a;
+ *(dP + dstPitch/2 + 1) = (uint16) product2b;
+
+ bP += 1;
+ dP += 2;
+ }
+
+ srcPtr += srcPitch;
+ dstPtr += dstPitch * 2;
+ }
+}
+
+MAKE_WRAPPER(SuperEagle)
+
+template<int bitFormat>
+void _2xSaITemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ const uint16 *bP;
+ uint16 *dP;
+ const uint32 nextlineSrc = srcPitch >> 1;
+
+ while (height--) {
+ bP = (const uint16 *)srcPtr;
+ dP = (uint16 *)dstPtr;
+
+ for (int i = 0; i < width; ++i) {
+
+ register uint32 colorA, colorB;
+ uint32 colorC, colorD,
+ colorE, colorF, colorG, colorH, colorI, colorJ, colorK, colorL, colorM, colorN, colorO, colorP;
+ uint32 product, product1, product2;
+
+//---------------------------------------
+// Map of the pixels: I|E F|J
+// G|A B|K
+// H|C D|L
+// M|N O|P
+ colorI = *(bP - nextlineSrc - 1);
+ colorE = *(bP - nextlineSrc);
+ colorF = *(bP - nextlineSrc + 1);
+ colorJ = *(bP - nextlineSrc + 2);
+
+ colorG = *(bP - 1);
+ colorA = *(bP);
+ colorB = *(bP + 1);
+ colorK = *(bP + 2);
+
+ colorH = *(bP + nextlineSrc - 1);
+ colorC = *(bP + nextlineSrc);
+ colorD = *(bP + nextlineSrc + 1);
+ colorL = *(bP + nextlineSrc + 2);
+
+ colorM = *(bP + 2 * nextlineSrc - 1);
+ colorN = *(bP + 2 * nextlineSrc);
+ colorO = *(bP + 2 * nextlineSrc + 1);
+ colorP = *(bP + 2 * nextlineSrc + 2);
+
+ if ((colorA == colorD) && (colorB != colorC)) {
+ if (((colorA == colorE) && (colorB == colorL)) ||
+ ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))) {
+ product = colorA;
+ } else {
+ product = INTERPOLATE(colorA, colorB);
+ }
+
+ if (((colorA == colorG) && (colorC == colorO)) ||
+ ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))) {
+ product1 = colorA;
+ } else {
+ product1 = INTERPOLATE(colorA, colorC);
+ }
+ product2 = colorA;
+ } else if ((colorB == colorC) && (colorA != colorD)) {
+ if (((colorB == colorF) && (colorA == colorH)) ||
+ ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))) {
+ product = colorB;
+ } else {
+ product = INTERPOLATE(colorA, colorB);
+ }
+
+ if (((colorC == colorH) && (colorA == colorF)) ||
+ ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))) {
+ product1 = colorC;
+ } else {
+ product1 = INTERPOLATE(colorA, colorC);
+ }
+ product2 = colorB;
+ } else if ((colorA == colorD) && (colorB == colorC)) {
+ if (colorA == colorB) {
+ product = colorA;
+ product1 = colorA;
+ product2 = colorA;
+ } else {
+ register int r = 0;
+
+ product1 = INTERPOLATE(colorA, colorC);
+ product = INTERPOLATE(colorA, colorB);
+
+ r += GetResult(colorA, colorB, colorG, colorE);
+ r -= GetResult(colorB, colorA, colorK, colorF);
+ r -= GetResult(colorB, colorA, colorH, colorN);
+ r += GetResult(colorA, colorB, colorL, colorO);
+
+ if (r > 0)
+ product2 = colorA;
+ else if (r < 0)
+ product2 = colorB;
+ else {
+ product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD);
+ }
+ }
+ } else {
+ product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD);
+
+ if ((colorA == colorC) && (colorA == colorF)
+ && (colorB != colorE) && (colorB == colorJ)) {
+ product = colorA;
+ } else if ((colorB == colorE) && (colorB == colorD)
+ && (colorA != colorF) && (colorA == colorI)) {
+ product = colorB;
+ } else {
+ product = INTERPOLATE(colorA, colorB);
+ }
+
+ if ((colorA == colorB) && (colorA == colorH)
+ && (colorG != colorC) && (colorC == colorM)) {
+ product1 = colorA;
+ } else if ((colorC == colorG) && (colorC == colorD)
+ && (colorA != colorH) && (colorA == colorI)) {
+ product1 = colorC;
+ } else {
+ product1 = INTERPOLATE(colorA, colorC);
+ }
+ }
+
+ *(dP + 0) = (uint16) colorA;
+ *(dP + 1) = (uint16) product;
+ *(dP + dstPitch/2 + 0) = (uint16) product1;
+ *(dP + dstPitch/2 + 1) = (uint16) product2;
+
+ bP += 1;
+ dP += 2;
+ }
+
+ srcPtr += srcPitch;
+ dstPtr += dstPitch * 2;
+ }
+}
+
+MAKE_WRAPPER(_2xSaI)
diff --git a/graphics/scaler/aspect.cpp b/graphics/scaler/aspect.cpp
new file mode 100644
index 0000000000..6e55236850
--- /dev/null
+++ b/graphics/scaler/aspect.cpp
@@ -0,0 +1,196 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "common/scaler/intern.h"
+#include "common/scaler.h"
+
+
+#define kVeryFastAndUglyAspectMode 0 // No interpolation at all, but super-fast
+#define kFastAndNiceAspectMode 1 // Quite good quality with good speed
+#define kSlowAndPerfectAspectMode 2 // Accurate but slow code
+
+#define ASPECT_MODE kFastAndNiceAspectMode
+
+
+#if ASPECT_MODE == kSlowAndPerfectAspectMode
+
+template<int bitFormat, int scale>
+static inline uint16 interpolate5(uint16 A, uint16 B) {
+ uint16 r = (uint16)(((A & redblueMask & 0xFF00) * scale + (B & redblueMask & 0xFF00) * (5 - scale)) / 5);
+ uint16 g = (uint16)(((A & greenMask) * scale + (B & greenMask) * (5 - scale)) / 5);
+ uint16 b = (uint16)(((A & redblueMask & 0x00FF) * scale + (B & redblueMask & 0x00FF) * (5 - scale)) / 5);
+
+ return (uint16)((r & redblueMask & 0xFF00) | (g & greenMask) | (b & redblueMask & 0x00FF));
+}
+
+
+template<int bitFormat, int scale>
+static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) {
+ // Accurate but slightly slower code
+ while (width--) {
+ *dst++ = interpolate5<bitFormat, scale>(*srcA++, *srcB++);
+ }
+}
+#endif
+
+#if ASPECT_MODE == kFastAndNiceAspectMode
+
+template<int bitFormat>
+static inline uint32 INTERPOLATE_1_1(uint32 A, uint32 B) {
+ return (((A & highBits) >> 1) + ((B & highBits) >> 1) + (A & B & lowBits));
+}
+
+template<int bitFormat>
+static inline uint32 INTERPOLATE_1_3(uint32 A, uint32 B) {
+ register uint32 x = ((A & qhighBits) >> 2) + ((B & qhighBits) >> 2) * 3;
+ register uint32 y = ((A & qlowBits) + (B & qlowBits) * 3) >> 2;
+
+ y &= qlowBits;
+ return x + y;
+}
+
+template<int bitFormat, int scale>
+static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) {
+ // For efficiency reasons we blit two pixels at a time, so it is important
+ // that makeRectStretchable() guarantees that the width is even and that
+ // the rect starts on a well-aligned address. (Even where unaligned memory
+ // access is allowed there may be a speed penalty for it.)
+
+ // These asserts are disabled for maximal speed; but I leave them in here
+ // in case other people want to test if the memory alignment (to an
+ // address divisible by 4) is really working properly.
+ //assert(((int)dst & 3) == 0);
+ //assert(((int)srcA & 3) == 0);
+ //assert(((int)srcB & 3) == 0);
+ //assert((width & 1) == 0);
+
+ width /= 2;
+ const uint32 *sA = (const uint32 *)srcA;
+ const uint32 *sB = (const uint32 *)srcB;
+ uint32 *d = (uint32 *)dst;
+ if (scale == 1) {
+ while (width--) {
+ *d++ = INTERPOLATE_1_3<bitFormat>(*sA++, *sB++);
+ }
+ } else {
+ while (width--) {
+ *d++ = INTERPOLATE_1_1<bitFormat>(*sA++, *sB++);
+ }
+ }
+}
+#endif
+
+void makeRectStretchable(int &x, int &y, int &w, int &h) {
+#if ASPECT_MODE != kVeryFastAndUglyAspectMode
+ int m = real2Aspect(y) % 6;
+
+ // Ensure that the rect will start on a line that won't have its
+ // colours changed by the stretching function.
+ if (m != 0 && m != 5) {
+ y -= m;
+ h += m;
+ }
+
+ #if ASPECT_MODE == kFastAndNiceAspectMode
+ // Force x to be even, to ensure aligned memory access (this assumes
+ // that each line starts at an even memory location, but that should
+ // be the case on every target anyway).
+ if (x & 1) {
+ x--;
+ w++;
+ }
+
+ // Finally force the width to be even, since we blit 2 pixels at a time.
+ // While this means we may sometimes blit one column more than necessary,
+ // this should actually be faster than having the check for the
+ if (w & 1)
+ w++;
+ #endif
+#endif
+}
+
+/**
+ * Stretch a 16bpp image vertically by factor 1.2. Used to correct the
+ * aspect-ratio in games using 320x200 pixel graphics with non-qudratic
+ * pixels. Applying this method effectively turns that into 320x240, which
+ * provides the correct aspect-ratio on modern displays.
+ *
+ * The image would normally have occupied y coordinates origSrcY through
+ * origSrcY + height - 1.
+ *
+ * However, we have already placed it at srcY - the aspect-corrected y
+ * coordinate - to allow in-place stretching.
+ *
+ * Therefore, the source image now occupies Y coordinates srcY through
+ * srcY + height - 1, and it should be stretched to Y coordinates srcY
+ * through real2Aspect(srcY + height - 1).
+ */
+template<int bitFormat>
+int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY) {
+ int maxDstY = real2Aspect(origSrcY + height - 1);
+ int y;
+ const uint8 *startSrcPtr = buf + srcX * 2 + (srcY - origSrcY) * pitch;
+ uint8 *dstPtr = buf + srcX * 2 + maxDstY * pitch;
+
+ for (y = maxDstY; y >= srcY; y--) {
+ const uint8 *srcPtr = startSrcPtr + aspect2Real(y) * pitch;
+
+#if ASPECT_MODE == kVeryFastAndUglyAspectMode
+ if (srcPtr == dstPtr)
+ break;
+ memcpy(dstPtr, srcPtr, width * 2);
+#else
+ // Bilinear filter
+ switch (y % 6) {
+ case 0:
+ case 5:
+ if (srcPtr != dstPtr)
+ memcpy(dstPtr, srcPtr, width * 2);
+ break;
+ case 1:
+ interpolate5Line<bitFormat, 1>((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width);
+ break;
+ case 2:
+ interpolate5Line<bitFormat, 2>((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width);
+ break;
+ case 3:
+ interpolate5Line<bitFormat, 2>((uint16 *)dstPtr, (const uint16 *)srcPtr, (const uint16 *)(srcPtr - pitch), width);
+ break;
+ case 4:
+ interpolate5Line<bitFormat, 1>((uint16 *)dstPtr, (const uint16 *)srcPtr, (const uint16 *)(srcPtr - pitch), width);
+ break;
+ }
+#endif
+ dstPtr -= pitch;
+ }
+
+ return 1 + maxDstY - srcY;
+}
+
+int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY) {
+ if (gBitFormat == 565)
+ return stretch200To240<565>(buf, pitch, width, height, srcX, srcY, origSrcY);
+ else // gBitFormat == 555
+ return stretch200To240<555>(buf, pitch, width, height, srcX, srcY, origSrcY);
+}
+
diff --git a/graphics/scaler/hq2x.cpp b/graphics/scaler/hq2x.cpp
new file mode 100644
index 0000000000..cd15e7f97b
--- /dev/null
+++ b/graphics/scaler/hq2x.cpp
@@ -0,0 +1,175 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "common/scaler/intern.h"
+
+#ifdef USE_NASM
+// Assembly version of HQ2x
+
+extern "C" {
+
+#ifndef _WIN32
+#define hq2x_16 _hq2x_16
+#endif
+
+void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+
+}
+
+void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+}
+
+#else
+
+#ifdef HAS_ALTIVEC
+
+#ifdef __amigaos4__
+#include <proto/exec.h>
+#include <altivec.h>
+static bool isAltiVecAvailable() {
+ uint32 vecUnit;
+ IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
+ if (vecUnit == VECTORTYPE_NONE)
+ return false;
+ else
+ return true;
+}
+#else
+
+#include <sys/sysctl.h>
+
+static bool isAltiVecAvailable() {
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+ int hasVectorUnit = 0;
+ size_t length = sizeof(hasVectorUnit);
+ int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
+ if ( 0 == error )
+ return hasVectorUnit != 0;
+ return false;
+}
+#endif
+#endif
+
+#define PIXEL00_0 *(q) = w5;
+#define PIXEL00_10 *(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
+#define PIXEL00_11 *(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL00_12 *(q) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL00_20 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w4, w2);
+#define PIXEL00_21 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w1, w2);
+#define PIXEL00_22 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w1, w4);
+#define PIXEL00_60 *(q) = interpolate16_3<bitFormat,5,2,1>(w5, w2, w4);
+#define PIXEL00_61 *(q) = interpolate16_3<bitFormat,5,2,1>(w5, w4, w2);
+#define PIXEL00_70 *(q) = interpolate16_3<bitFormat,6,1,1>(w5, w4, w2);
+#define PIXEL00_90 *(q) = interpolate16_3<bitFormat,2,3,3>(w5, w4, w2);
+#define PIXEL00_100 *(q) = interpolate16_3<bitFormat,14,1,1>(w5, w4, w2);
+
+#define PIXEL01_0 *(q+1) = w5;
+#define PIXEL01_10 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w3);
+#define PIXEL01_11 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL01_12 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL01_20 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w2, w6);
+#define PIXEL01_21 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w3, w6);
+#define PIXEL01_22 *(q+1) = interpolate16_3<bitFormat,2,1,1>(w5, w3, w2);
+#define PIXEL01_60 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w5, w6, w2);
+#define PIXEL01_61 *(q+1) = interpolate16_3<bitFormat,5,2,1>(w5, w2, w6);
+#define PIXEL01_70 *(q+1) = interpolate16_3<bitFormat,6,1,1>(w5, w2, w6);
+#define PIXEL01_90 *(q+1) = interpolate16_3<bitFormat,2,3,3>(w5, w2, w6);
+#define PIXEL01_100 *(q+1) = interpolate16_3<bitFormat,14,1,1>(w5, w2, w6);
+
+#define PIXEL10_0 *(q+nextlineDst) = w5;
+#define PIXEL10_10 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w7);
+#define PIXEL10_11 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL10_12 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL10_20 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w8, w4);
+#define PIXEL10_21 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w7, w4);
+#define PIXEL10_22 *(q+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w7, w8);
+#define PIXEL10_60 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w4, w8);
+#define PIXEL10_61 *(q+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w8, w4);
+#define PIXEL10_70 *(q+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w5, w8, w4);
+#define PIXEL10_90 *(q+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w5, w8, w4);
+#define PIXEL10_100 *(q+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w5, w8, w4);
+
+#define PIXEL11_0 *(q+1+nextlineDst) = w5;
+#define PIXEL11_10 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w9);
+#define PIXEL11_11 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL11_12 *(q+1+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL11_20 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w6, w8);
+#define PIXEL11_21 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w9, w8);
+#define PIXEL11_22 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,1,1>(w5, w9, w6);
+#define PIXEL11_60 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w8, w6);
+#define PIXEL11_61 *(q+1+nextlineDst) = interpolate16_3<bitFormat,5,2,1>(w5, w6, w8);
+#define PIXEL11_70 *(q+1+nextlineDst) = interpolate16_3<bitFormat,6,1,1>(w5, w6, w8);
+#define PIXEL11_90 *(q+1+nextlineDst) = interpolate16_3<bitFormat,2,3,3>(w5, w6, w8);
+#define PIXEL11_100 *(q+1+nextlineDst) = interpolate16_3<bitFormat,14,1,1>(w5, w6, w8);
+
+#define YUV(x) RGBtoYUV[w ## x]
+
+
+#define bitFormat 565
+void HQ2x_565(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq2x.h"
+}
+#undef bitFormat
+
+#define bitFormat 555
+void HQ2x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq2x.h"
+}
+#undef bitFormat
+
+
+#ifdef HAS_ALTIVEC
+ #define USE_ALTIVEC 1
+
+ #define bitFormat 565
+ void HQ2x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq2x.h"
+ }
+ #undef bitFormat
+
+ #define bitFormat 555
+ void HQ2x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq2x.h"
+ }
+ #undef bitFormat
+#endif
+
+void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+#ifdef HAS_ALTIVEC
+ if (isAltiVecAvailable()) {
+ if (gBitFormat == 565)
+ HQ2x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ else
+ HQ2x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ return;
+ }
+#endif
+
+ if (gBitFormat == 565)
+ HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ else
+ HQ2x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+}
+
+#endif //Assembly version
diff --git a/graphics/scaler/hq2x.h b/graphics/scaler/hq2x.h
new file mode 100644
index 0000000000..a59f108048
--- /dev/null
+++ b/graphics/scaler/hq2x.h
@@ -0,0 +1,1977 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+/*
+ * The HQ2x high quality 2x graphics filter.
+ * Original author Maxim Stepin (see http://www.hiend3d.com/hq2x.html).
+ * Adapted for ScummVM to 16 bit output and optimized by Max Horn.
+ */
+
+ register int w1, w2, w3, w4, w5, w6, w7, w8, w9;
+
+ const uint32 nextlineSrc = srcPitch / sizeof(uint16);
+ const uint16 *p = (const uint16 *)srcPtr;
+
+ const uint32 nextlineDst = dstPitch / sizeof(uint16);
+ uint16 *q = (uint16 *)dstPtr;
+
+ // +----+----+----+
+ // | | | |
+ // | w1 | w2 | w3 |
+ // +----+----+----+
+ // | | | |
+ // | w4 | w5 | w6 |
+ // +----+----+----+
+ // | | | |
+ // | w7 | w8 | w9 |
+ // +----+----+----+
+
+#ifdef USE_ALTIVEC
+ // The YUV threshold.
+ static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
+
+ // Bit pattern mask.
+ static const vector signed int vPatternMask1 = (vector signed int)(0x01, 0x02, 0x04, 0x08);
+ static const vector signed int vPatternMask2 = (vector signed int)(0x10, 0x20, 0x40, 0x80);
+
+ // Permutation masks for the incremental vector loading (see below for more information).
+ static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19);
+ static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31);
+
+ // The YUV vectors.
+ vector signed char vecYUV5555;
+ vector signed char vecYUV1234;
+ vector signed char vecYUV6789;
+#endif
+
+ while (height--) {
+ w1 = *(p - 1 - nextlineSrc);
+ w4 = *(p - 1);
+ w7 = *(p - 1 + nextlineSrc);
+
+ w2 = *(p - nextlineSrc);
+ w5 = *(p);
+ w8 = *(p + nextlineSrc);
+
+#ifdef USE_ALTIVEC
+ // Load inital values of vecYUV1234 / vecYUV6789
+ const int arr1234[4] = {0, YUV(1), YUV(2), 0};
+ const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
+
+ vecYUV1234 = *(const vector signed char *)arr1234;
+ vecYUV6789 = *(const vector signed char *)arr6789;
+#endif
+
+ int tmpWidth = width;
+ while (tmpWidth--) {
+ p++;
+
+ w3 = *(p - nextlineSrc);
+ w6 = *(p);
+ w9 = *(p + nextlineSrc);
+
+ int pattern = 0;
+
+#ifdef USE_ALTIVEC
+ /*
+ Consider this peephole into the image buffer:
+ +----+----+----+----+
+ | | | | |
+ | w00| w01| w02| w03|
+ +----+----+----+----+
+ | | | | |
+ | w10| w11| w12| w13|
+ +----+----+----+----+
+ | | | | |
+ | w20| w21| w22| w23|
+ +----+----+----+----+
+
+ In the previous loop iteration, w11 was the center point, and our
+ vectors contain the following data from the previous iteration:
+ vecYUV5555 = { w11, w11, w11, w11 }
+ vecYUV1234 = { w00, w01, w02, w10 }
+ vecYUV6789 = { w12, w20, w21, w22 }
+
+ Now we have the new center point w12, and we would like to have
+ the following values in our vectors:
+ vecYUV5555 = { w12, w12, w12, w12 }
+ vecYUV1234 = { w01, w02, w03, w11 }
+ vecYUV6789 = { w13, w21, w22, w23 }
+
+ To this end we load a single new vector:
+ vTmp = { w11, w03, w13, w23 }
+
+ We then can compute all the new vector values using permutations only:
+ vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
+ vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] }
+ vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] }
+
+ Beautiful, isn't it? :-)
+ */
+
+ // Load the new values into a temporary vector (see above for an explanation)
+ const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
+ vector signed char vTmp = *(const vector signed char *)tmpArr;
+
+ // Next update the data vectors
+ vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
+ vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
+ vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
+
+ // Compute the absolute difference between the center point's YUV and the outer points
+ const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
+ const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
+
+ // Compare the difference to the threshold (byte-wise)
+ const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
+ const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
+
+ // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
+ // Then and in the patter masks. The idea is that for 0 components, we get 0,
+ // while for the other components we get exactly the mask value.
+ const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
+ const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
+
+ // Now sum up the components of all vectors. Since our pattern mask values
+ // are all "orthogonal", this is effectively the same as ORing them all
+ // together. In the end, the rightmost word of vSum contains the 'pattern'
+ vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
+ vSum = vec_sums(vPattern2, vSum);
+ pattern = ((int *)&vSum)[3];
+#else
+ const int yuv5 = YUV(5);
+ if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
+ if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
+ if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004;
+ if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008;
+ if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010;
+ if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
+ if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
+ if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
+#endif
+
+ switch (pattern) {
+ case 0:
+ case 1:
+ case 4:
+ case 32:
+ case 128:
+ case 5:
+ case 132:
+ case 160:
+ case 33:
+ case 129:
+ case 36:
+ case 133:
+ case 164:
+ case 161:
+ case 37:
+ case 165:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 2:
+ case 34:
+ case 130:
+ case 162:
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 16:
+ case 17:
+ case 48:
+ case 49:
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 64:
+ case 65:
+ case 68:
+ case 69:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_22
+ break;
+ case 8:
+ case 12:
+ case 136:
+ case 140:
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 3:
+ case 35:
+ case 131:
+ case 163:
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 6:
+ case 38:
+ case 134:
+ case 166:
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 20:
+ case 21:
+ case 52:
+ case 53:
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 144:
+ case 145:
+ case 176:
+ case 177:
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_12
+ break;
+ case 192:
+ case 193:
+ case 196:
+ case 197:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_11
+ break;
+ case 96:
+ case 97:
+ case 100:
+ case 101:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_22
+ break;
+ case 40:
+ case 44:
+ case 168:
+ case 172:
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ break;
+ case 9:
+ case 13:
+ case 137:
+ case 141:
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 18:
+ case 50:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 80:
+ case 81:
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 72:
+ case 76:
+ PIXEL00_21
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 10:
+ case 138:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 66:
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ break;
+ case 24:
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 7:
+ case 39:
+ case 135:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 148:
+ case 149:
+ case 180:
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ break;
+ case 224:
+ case 228:
+ case 225:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 41:
+ case 169:
+ case 45:
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ break;
+ case 22:
+ case 54:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 208:
+ case 209:
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 104:
+ case 108:
+ PIXEL00_21
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 11:
+ case 139:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 19:
+ case 51:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_11
+ PIXEL01_10
+ } else {
+ PIXEL00_60
+ PIXEL01_90
+ }
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 146:
+ case 178:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ PIXEL11_12
+ } else {
+ PIXEL01_90
+ PIXEL11_61
+ }
+ PIXEL10_20
+ break;
+ case 84:
+ case 85:
+ PIXEL00_20
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL01_11
+ PIXEL11_10
+ } else {
+ PIXEL01_60
+ PIXEL11_90
+ }
+ PIXEL10_21
+ break;
+ case 112:
+ case 113:
+ PIXEL00_20
+ PIXEL01_22
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL10_12
+ PIXEL11_10
+ } else {
+ PIXEL10_61
+ PIXEL11_90
+ }
+ break;
+ case 200:
+ case 204:
+ PIXEL00_21
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ PIXEL11_11
+ } else {
+ PIXEL10_90
+ PIXEL11_60
+ }
+ break;
+ case 73:
+ case 77:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_12
+ PIXEL10_10
+ } else {
+ PIXEL00_61
+ PIXEL10_90
+ }
+ PIXEL01_20
+ PIXEL11_22
+ break;
+ case 42:
+ case 170:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ PIXEL10_11
+ } else {
+ PIXEL00_90
+ PIXEL10_60
+ }
+ PIXEL01_21
+ PIXEL11_20
+ break;
+ case 14:
+ case 142:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ PIXEL01_12
+ } else {
+ PIXEL00_90
+ PIXEL01_61
+ }
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 67:
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ break;
+ case 70:
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ break;
+ case 28:
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 152:
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 194:
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ break;
+ case 98:
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ break;
+ case 56:
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 25:
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 26:
+ case 31:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 82:
+ case 214:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 88:
+ case 248:
+ PIXEL00_21
+ PIXEL01_22
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 74:
+ case 107:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 27:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 86:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_21
+ PIXEL11_10
+ break;
+ case 216:
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 106:
+ PIXEL00_10
+ PIXEL01_21
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 30:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 210:
+ PIXEL00_22
+ PIXEL01_10
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 120:
+ PIXEL00_21
+ PIXEL01_22
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_10
+ break;
+ case 75:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_22
+ break;
+ case 29:
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ break;
+ case 198:
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ break;
+ case 184:
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 99:
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ break;
+ case 57:
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 71:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ break;
+ case 156:
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 226:
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 60:
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 195:
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ break;
+ case 102:
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ break;
+ case 153:
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 58:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 83:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 92:
+ PIXEL00_21
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 202:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ PIXEL01_21
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ PIXEL11_11
+ break;
+ case 78:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ PIXEL11_22
+ break;
+ case 154:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 114:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 89:
+ PIXEL00_12
+ PIXEL01_22
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 90:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 55:
+ case 23:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_11
+ PIXEL01_0
+ } else {
+ PIXEL00_60
+ PIXEL01_90
+ }
+ PIXEL10_20
+ PIXEL11_21
+ break;
+ case 182:
+ case 150:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ PIXEL11_12
+ } else {
+ PIXEL01_90
+ PIXEL11_61
+ }
+ PIXEL10_20
+ break;
+ case 213:
+ case 212:
+ PIXEL00_20
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL01_11
+ PIXEL11_0
+ } else {
+ PIXEL01_60
+ PIXEL11_90
+ }
+ PIXEL10_21
+ break;
+ case 241:
+ case 240:
+ PIXEL00_20
+ PIXEL01_22
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL10_12
+ PIXEL11_0
+ } else {
+ PIXEL10_61
+ PIXEL11_90
+ }
+ break;
+ case 236:
+ case 232:
+ PIXEL00_21
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ PIXEL11_11
+ } else {
+ PIXEL10_90
+ PIXEL11_60
+ }
+ break;
+ case 109:
+ case 105:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_12
+ PIXEL10_0
+ } else {
+ PIXEL00_61
+ PIXEL10_90
+ }
+ PIXEL01_20
+ PIXEL11_22
+ break;
+ case 171:
+ case 43:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ PIXEL10_11
+ } else {
+ PIXEL00_90
+ PIXEL10_60
+ }
+ PIXEL01_21
+ PIXEL11_20
+ break;
+ case 143:
+ case 15:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ PIXEL01_12
+ } else {
+ PIXEL00_90
+ PIXEL01_61
+ }
+ PIXEL10_22
+ PIXEL11_20
+ break;
+ case 124:
+ PIXEL00_21
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_10
+ break;
+ case 203:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_11
+ break;
+ case 62:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 211:
+ PIXEL00_11
+ PIXEL01_10
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 118:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_12
+ PIXEL11_10
+ break;
+ case 217:
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 110:
+ PIXEL00_10
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 155:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 188:
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 185:
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 61:
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 157:
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 103:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ break;
+ case 227:
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 230:
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 199:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ break;
+ case 220:
+ PIXEL00_21
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 158:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 234:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ PIXEL01_21
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_11
+ break;
+ case 242:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 59:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 121:
+ PIXEL00_12
+ PIXEL01_22
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 87:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 79:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ PIXEL11_22
+ break;
+ case 122:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 94:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 218:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 91:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 229:
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 167:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ break;
+ case 173:
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ break;
+ case 181:
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ break;
+ case 186:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 115:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 93:
+ PIXEL00_12
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 206:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ PIXEL11_11
+ break;
+ case 205:
+ case 201:
+ PIXEL00_12
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_10
+ } else {
+ PIXEL10_70
+ }
+ PIXEL11_11
+ break;
+ case 174:
+ case 46:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_10
+ } else {
+ PIXEL00_70
+ }
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ break;
+ case 179:
+ case 147:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_10
+ } else {
+ PIXEL01_70
+ }
+ PIXEL10_20
+ PIXEL11_12
+ break;
+ case 117:
+ case 116:
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_10
+ } else {
+ PIXEL11_70
+ }
+ break;
+ case 189:
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 231:
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ break;
+ case 126:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_10
+ break;
+ case 219:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_10
+ PIXEL10_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 125:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_12
+ PIXEL10_0
+ } else {
+ PIXEL00_61
+ PIXEL10_90
+ }
+ PIXEL01_11
+ PIXEL11_10
+ break;
+ case 221:
+ PIXEL00_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL01_11
+ PIXEL11_0
+ } else {
+ PIXEL01_60
+ PIXEL11_90
+ }
+ PIXEL10_10
+ break;
+ case 207:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ PIXEL01_12
+ } else {
+ PIXEL00_90
+ PIXEL01_61
+ }
+ PIXEL10_10
+ PIXEL11_11
+ break;
+ case 238:
+ PIXEL00_10
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ PIXEL11_11
+ } else {
+ PIXEL10_90
+ PIXEL11_60
+ }
+ break;
+ case 190:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ PIXEL11_12
+ } else {
+ PIXEL01_90
+ PIXEL11_61
+ }
+ PIXEL10_11
+ break;
+ case 187:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ PIXEL10_11
+ } else {
+ PIXEL00_90
+ PIXEL10_60
+ }
+ PIXEL01_10
+ PIXEL11_12
+ break;
+ case 243:
+ PIXEL00_11
+ PIXEL01_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL10_12
+ PIXEL11_0
+ } else {
+ PIXEL10_61
+ PIXEL11_90
+ }
+ break;
+ case 119:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_11
+ PIXEL01_0
+ } else {
+ PIXEL00_60
+ PIXEL01_90
+ }
+ PIXEL10_12
+ PIXEL11_10
+ break;
+ case 237:
+ case 233:
+ PIXEL00_12
+ PIXEL01_20
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ PIXEL11_11
+ break;
+ case 175:
+ case 47:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ break;
+ case 183:
+ case 151:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_20
+ PIXEL11_12
+ break;
+ case 245:
+ case 244:
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 250:
+ PIXEL00_10
+ PIXEL01_10
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 123:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_10
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_10
+ break;
+ case 95:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_10
+ PIXEL11_10
+ break;
+ case 222:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 252:
+ PIXEL00_21
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 249:
+ PIXEL00_12
+ PIXEL01_22
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 235:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_21
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ PIXEL11_11
+ break;
+ case 111:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_22
+ break;
+ case 63:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_11
+ PIXEL11_21
+ break;
+ case 159:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_22
+ PIXEL11_12
+ break;
+ case 215:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_21
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 246:
+ PIXEL00_22
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 254:
+ PIXEL00_10
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 253:
+ PIXEL00_12
+ PIXEL01_11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 251:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ PIXEL01_10
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 239:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ PIXEL01_12
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ PIXEL11_11
+ break;
+ case 127:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_20
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_20
+ }
+ PIXEL11_10
+ break;
+ case 191:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_11
+ PIXEL11_12
+ break;
+ case 223:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_20
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_10
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_20
+ }
+ break;
+ case 247:
+ PIXEL00_11
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ PIXEL10_12
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ case 255:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_0
+ } else {
+ PIXEL00_100
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_0
+ } else {
+ PIXEL01_100
+ }
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_0
+ } else {
+ PIXEL10_100
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL11_0
+ } else {
+ PIXEL11_100
+ }
+ break;
+ }
+
+ w1 = w2;
+ w4 = w5;
+ w7 = w8;
+
+ w2 = w3;
+ w5 = w6;
+ w8 = w9;
+
+ q += 2;
+ }
+ p += nextlineSrc - width;
+ q += (nextlineDst - width) * 2;
+ }
diff --git a/graphics/scaler/hq2x_i386.asm b/graphics/scaler/hq2x_i386.asm
new file mode 100644
index 0000000000..ef49b590f5
--- /dev/null
+++ b/graphics/scaler/hq2x_i386.asm
@@ -0,0 +1,1842 @@
+;hq2x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq2x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter resd 1
+cross resd 1
+nextline resd 1
+prevline resd 1
+w1 resd 1
+w2 resd 1
+w3 resd 1
+w4 resd 1
+w5 resd 1
+w6 resd 1
+w7 resd 1
+w8 resd 1
+w9 resd 1
+
+SECTION .data
+
+reg_blank dd 0,0
+const3 dd 0x00030003,0x00000003
+const5 dd 0x00050005,0x00000005
+const6 dd 0x00060006,0x00000006
+const14 dd 0x000E000E,0x0000000E
+threshold dd 0x00300706,0x00000000
+zerolowbits dd 0xF7DEF7DE
+moduloSrc dd 0
+moduloDst dd 0
+
+SECTION .text
+
+%macro TestDiff 2
+ xor ecx,ecx
+ mov edx,[%1]
+ cmp edx,[%2]
+ je %%fin
+ mov ecx,_RGBtoYUV
+ movd mm1,[ecx+edx*4]
+ movq mm5,mm1
+ mov edx,[%2]
+ movd mm2,[ecx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ jmp %%fin
+%%same:
+ %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ jmp %%fin
+%%same:
+ %5
+ %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ jmp %%fin
+%%same:
+ %6
+ %7
+ %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ %6
+ jmp %%fin
+%%same:
+ %7
+ %8
+ %9
+ %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp2 4
+ mov edx,%3
+ mov ecx,%4
+ cmp edx,ecx
+ je %%fin1
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+%%fin1
+ mov edx,%2
+ cmp edx,ecx
+ je %%fin2
+ and ecx,[zerolowbits]
+ and edx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin2
+ mov %1,dx
+%endmacro
+
+%macro Interp5 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp6 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const5]
+ psllw mm2, 1
+ paddw mm1, mm3
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp7 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const6]
+ paddw mm2, mm3
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp9 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ psllw mm1, 1
+ paddw mm2, mm3
+ pmullw mm2, [const3]
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp10 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const14]
+ paddw mm2, mm3
+ paddw mm1, mm2
+ psrlw mm1, 6
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro PIXEL00_0 0
+ mov [edi],ax
+%endmacro
+
+%macro PIXEL00_10 0
+ Interp1 [edi],eax,[w1]
+%endmacro
+
+%macro PIXEL00_11 0
+ Interp1 [edi],eax,[w4]
+%endmacro
+
+%macro PIXEL00_12 0
+ Interp1 [edi],eax,[w2]
+%endmacro
+
+%macro PIXEL00_20 0
+ Interp2 [edi],eax,[w4],[w2]
+%endmacro
+
+%macro PIXEL00_21 0
+ Interp2 [edi],eax,[w1],[w2]
+%endmacro
+
+%macro PIXEL00_22 0
+ Interp2 [edi],eax,[w1],[w4]
+%endmacro
+
+%macro PIXEL00_60 0
+ Interp6 [edi],[w2],[w4]
+%endmacro
+
+%macro PIXEL00_61 0
+ Interp6 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_70 0
+ Interp7 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_90 0
+ Interp9 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_100 0
+ Interp10 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL01_0 0
+ mov [edi+2],ax
+%endmacro
+
+%macro PIXEL01_10 0
+ Interp1 [edi+2],eax,[w3]
+%endmacro
+
+%macro PIXEL01_11 0
+ Interp1 [edi+2],eax,[w2]
+%endmacro
+
+%macro PIXEL01_12 0
+ Interp1 [edi+2],eax,[w6]
+%endmacro
+
+%macro PIXEL01_20 0
+ Interp2 [edi+2],eax,[w2],[w6]
+%endmacro
+
+%macro PIXEL01_21 0
+ Interp2 [edi+2],eax,[w3],[w6]
+%endmacro
+
+%macro PIXEL01_22 0
+ Interp2 [edi+2],eax,[w3],[w2]
+%endmacro
+
+%macro PIXEL01_60 0
+ Interp6 [edi+2],[w6],[w2]
+%endmacro
+
+%macro PIXEL01_61 0
+ Interp6 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_70 0
+ Interp7 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_90 0
+ Interp9 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_100 0
+ Interp10 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL10_0 0
+ mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL10_10 0
+ Interp1 [edi+ebx],eax,[w7]
+%endmacro
+
+%macro PIXEL10_11 0
+ Interp1 [edi+ebx],eax,[w8]
+%endmacro
+
+%macro PIXEL10_12 0
+ Interp1 [edi+ebx],eax,[w4]
+%endmacro
+
+%macro PIXEL10_20 0
+ Interp2 [edi+ebx],eax,[w8],[w4]
+%endmacro
+
+%macro PIXEL10_21 0
+ Interp2 [edi+ebx],eax,[w7],[w4]
+%endmacro
+
+%macro PIXEL10_22 0
+ Interp2 [edi+ebx],eax,[w7],[w8]
+%endmacro
+
+%macro PIXEL10_60 0
+ Interp6 [edi+ebx],[w4],[w8]
+%endmacro
+
+%macro PIXEL10_61 0
+ Interp6 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_70 0
+ Interp7 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_90 0
+ Interp9 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_100 0
+ Interp10 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL11_0 0
+ mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL11_10 0
+ Interp1 [edi+ebx+2],eax,[w9]
+%endmacro
+
+%macro PIXEL11_11 0
+ Interp1 [edi+ebx+2],eax,[w6]
+%endmacro
+
+%macro PIXEL11_12 0
+ Interp1 [edi+ebx+2],eax,[w8]
+%endmacro
+
+%macro PIXEL11_20 0
+ Interp2 [edi+ebx+2],eax,[w6],[w8]
+%endmacro
+
+%macro PIXEL11_21 0
+ Interp2 [edi+ebx+2],eax,[w9],[w8]
+%endmacro
+
+%macro PIXEL11_22 0
+ Interp2 [edi+ebx+2],eax,[w9],[w6]
+%endmacro
+
+%macro PIXEL11_60 0
+ Interp6 [edi+ebx+2],[w8],[w6]
+%endmacro
+
+%macro PIXEL11_61 0
+ Interp6 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_70 0
+ Interp7 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_90 0
+ Interp9 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_100 0
+ Interp10 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+inbuffer equ 8
+outbuffer equ 12
+Xres equ 16
+Yres equ 20
+srcPitch equ 24
+dstPitch equ 28
+
+_hq2x_16:
+ push ebp
+ mov ebp,esp
+ pushad
+
+ mov esi,[ebp+inbuffer]
+ mov edi,[ebp+outbuffer]
+ mov edx,[ebp+Yres]
+ mov [linesleft],edx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+dstPitch]
+ mov dword[moduloDst],ebx
+ sub dword[moduloDst],ecx
+ shl dword[moduloDst],1
+ mov ebx,[ebp+srcPitch]
+ mov dword[nextline],ebx
+ mov dword[moduloSrc],ebx
+ sub dword[moduloSrc],ecx
+ neg ebx
+ mov dword[prevline],ebx
+.loopy
+ mov ecx,[ebp+Xres]
+ mov dword[xcounter],ecx
+.loopx
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-2]
+ movq mm6,[esi-2]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-2]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w3],edx
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ movzx edx,ax
+ mov [w6],edx
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ movzx edx,ax
+ mov [w9],edx
+.flags
+ mov ebx,_RGBtoYUV
+ mov eax,[w5]
+ xor ecx,ecx
+ movd mm5,[ebx+eax*4]
+ mov dword[cross],0
+
+ mov edx,[w2]
+ cmp eax,edx
+ je .noflag2
+ or dword[cross],1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag2
+ or ecx,2
+.noflag2
+ mov edx,[w4]
+ cmp eax,edx
+ je .noflag4
+ or dword[cross],2
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag4
+ or ecx,8
+.noflag4
+ mov edx,[w6]
+ cmp eax,edx
+ je .noflag6
+ or dword[cross],4
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag6
+ or ecx,16
+.noflag6
+ mov edx,[w8]
+ cmp eax,edx
+ je .noflag8
+ or dword[cross],8
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag8
+ or ecx,64
+.noflag8
+ test ecx,ecx
+ jnz .testflag1
+ mov ecx,[cross]
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable2+ecx*4]
+.testflag1
+ mov edx,[w1]
+ cmp eax,edx
+ je .noflag1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag1
+ or ecx,1
+.noflag1
+ mov edx,[w3]
+ cmp eax,edx
+ je .noflag3
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag3
+ or ecx,4
+.noflag3
+ mov edx,[w7]
+ cmp eax,edx
+ je .noflag7
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag7
+ or ecx,32
+.noflag7
+ mov edx,[w9]
+ cmp eax,edx
+ je .noflag9
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag9
+ or ecx,128
+.noflag9
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag18
+..@flag50
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_20
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag80
+..@flag81
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_20
+ jmp .loopx_end
+..@flag72
+..@flag76
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag10
+..@flag138
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag66
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag24
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag22
+..@flag54
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag208
+..@flag209
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag104
+..@flag108
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag11
+..@flag139
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag19
+..@flag51
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_10,PIXEL00_60,PIXEL01_90
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag146
+..@flag178
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_20
+ jmp .loopx_end
+..@flag84
+..@flag85
+ PIXEL00_20
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_10,PIXEL01_60,PIXEL11_90
+ PIXEL10_21
+ jmp .loopx_end
+..@flag112
+..@flag113
+ PIXEL00_20
+ PIXEL01_22
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_10,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag200
+..@flag204
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag73
+..@flag77
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_10,PIXEL00_61,PIXEL10_90
+ PIXEL01_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag42
+..@flag170
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_21
+ PIXEL11_20
+ jmp .loopx_end
+..@flag14
+..@flag142
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag67
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag70
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag28
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag152
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag194
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag98
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag56
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag25
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag26
+..@flag31
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag82
+..@flag214
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag88
+..@flag248
+ PIXEL00_21
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag74
+..@flag107
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag27
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag86
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ PIXEL11_10
+ jmp .loopx_end
+..@flag216
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag106
+ PIXEL00_10
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag30
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag210
+ PIXEL00_22
+ PIXEL01_10
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag120
+ PIXEL00_21
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag75
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_22
+ jmp .loopx_end
+..@flag29
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag198
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag184
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag99
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag57
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag71
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag156
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag226
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag60
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag195
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag102
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag153
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag58
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag83
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag92
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag202
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag78
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_22
+ jmp .loopx_end
+..@flag154
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag114
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag89
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag90
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag55
+..@flag23
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag182
+..@flag150
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_20
+ jmp .loopx_end
+..@flag213
+..@flag212
+ PIXEL00_20
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+ PIXEL10_21
+ jmp .loopx_end
+..@flag241
+..@flag240
+ PIXEL00_20
+ PIXEL01_22
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag236
+..@flag232
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag109
+..@flag105
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+ PIXEL01_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag171
+..@flag43
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_21
+ PIXEL11_20
+ jmp .loopx_end
+..@flag143
+..@flag15
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag124
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag203
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_11
+ jmp .loopx_end
+..@flag62
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag211
+ PIXEL00_11
+ PIXEL01_10
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag118
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_12
+ PIXEL11_10
+ jmp .loopx_end
+..@flag217
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag110
+ PIXEL00_10
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag155
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag188
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag185
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag61
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag157
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag103
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag227
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag230
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag199
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag220
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag158
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag234
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_11
+ jmp .loopx_end
+..@flag242
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag59
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag121
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag87
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag79
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_22
+ jmp .loopx_end
+..@flag122
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag94
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag218
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag91
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag229
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag167
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag173
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag181
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag186
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag115
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag93
+ PIXEL00_12
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag206
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag205
+..@flag201
+ PIXEL00_12
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag174
+..@flag46
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag179
+..@flag147
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag117
+..@flag116
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag189
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag231
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag126
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag219
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag125
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+ PIXEL01_11
+ PIXEL11_10
+ jmp .loopx_end
+..@flag221
+ PIXEL00_12
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+ PIXEL10_10
+ jmp .loopx_end
+..@flag207
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_10
+ PIXEL11_11
+ jmp .loopx_end
+..@flag238
+ PIXEL00_10
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag190
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_11
+ jmp .loopx_end
+..@flag187
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_10
+ PIXEL11_12
+ jmp .loopx_end
+..@flag243
+ PIXEL00_11
+ PIXEL01_10
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag119
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+ PIXEL10_12
+ PIXEL11_10
+ jmp .loopx_end
+..@flag237
+..@flag233
+ PIXEL00_12
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag175
+..@flag47
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag183
+..@flag151
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag245
+..@flag244
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag250
+ PIXEL00_10
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag123
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag95
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_10
+ PIXEL11_10
+ jmp .loopx_end
+..@flag222
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag252
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag249
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag235
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag111
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag63
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag159
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag215
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag246
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag254
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag253
+ PIXEL00_12
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag251
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag239
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag127
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag191
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag223
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag247
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag255
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+
+
+..@cross0
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross1
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w2]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov ecx,edx
+ shl edx,16
+ or edx,ecx
+ mov [edi],edx
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross2
+ shl eax,16
+ mov ecx,[w4]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross4
+ mov ecx,[w6]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ shl edx,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross8
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w8]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov ecx,edx
+ shl edx,16
+ or edx,ecx
+ mov [edi],eax
+ mov [edi+ebx],edx
+ jmp .loopx_end
+
+.loopx_end
+ add esi,2
+ add edi,4
+ dec dword[xcounter]
+ jz .nexty
+ jmp .loopx
+.nexty
+ add esi,dword[moduloSrc]
+ add edi,dword[moduloDst]
+ dec dword[linesleft]
+ jz .fin
+ mov ebx,[ebp+srcPitch]
+ mov dword[nextline],ebx
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.fin
+ emms
+ popad
+ mov esp,ebp
+ pop ebp
+ ret
+
+SECTION .data
+FuncTable
+ dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+ dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+ dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+ dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+ dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+ dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+ dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+ dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+ dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+ dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+ dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+ dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+ dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+ dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+ dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+ dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+ dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+ dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+ dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+ dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+ dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+ dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+ dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+ dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+ dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+ dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+ dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+ dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+ dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+ dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+ dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+ dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+ dd ..@cross0, ..@cross1, ..@cross2, ..@flag0,
+ dd ..@cross4, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@cross8, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@flag0, ..@flag0, ..@flag0, ..@flag0
+
diff --git a/graphics/scaler/hq3x.cpp b/graphics/scaler/hq3x.cpp
new file mode 100644
index 0000000000..497fe69be1
--- /dev/null
+++ b/graphics/scaler/hq3x.cpp
@@ -0,0 +1,176 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "common/scaler/intern.h"
+
+#ifdef USE_NASM
+// Assembly version of HQ3x
+
+extern "C" {
+
+#ifndef _WIN32
+#define hq3x_16 _hq3x_16
+#endif
+
+void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+
+}
+
+void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+}
+
+#else
+
+#ifdef HAS_ALTIVEC
+
+#ifdef __amigaos4__
+#include <proto/exec.h>
+static bool isAltiVecAvailable() {
+ uint32 vecUnit;
+ IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
+ if (vecUnit == VECTORTYPE_NONE)
+ return false;
+ else
+ return true;
+}
+#else
+
+#include <sys/sysctl.h>
+
+static bool isAltiVecAvailable() {
+ int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+ int hasVectorUnit = 0;
+ size_t length = sizeof(hasVectorUnit);
+ int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
+ if ( 0 == error )
+ return hasVectorUnit != 0;
+ return false;
+}
+#endif
+#endif
+
+#define PIXEL00_1M *(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
+#define PIXEL00_1U *(q) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL00_1L *(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL00_2 *(q) = interpolate16_3<bitFormat,2,1,1>(w5, w4, w2);
+#define PIXEL00_4 *(q) = interpolate16_3<bitFormat,2,7,7>(w5, w4, w2);
+#define PIXEL00_5 *(q) = interpolate16_2<bitFormat,1,1>(w4, w2);
+#define PIXEL00_C *(q) = w5;
+
+#define PIXEL01_1 *(q+1) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL01_3 *(q+1) = interpolate16_2<bitFormat,7,1>(w5, w2);
+#define PIXEL01_6 *(q+1) = interpolate16_2<bitFormat,3,1>(w2, w5);
+#define PIXEL01_C *(q+1) = w5;
+
+#define PIXEL02_1M *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w3);
+#define PIXEL02_1U *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w2);
+#define PIXEL02_1R *(q+2) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL02_2 *(q+2) = interpolate16_3<bitFormat,2,1,1>(w5, w2, w6);
+#define PIXEL02_4 *(q+2) = interpolate16_3<bitFormat,2,7,7>(w5, w2, w6);
+#define PIXEL02_5 *(q+2) = interpolate16_2<bitFormat,1,1>(w2, w6);
+#define PIXEL02_C *(q+2) = w5;
+
+#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w4);
+#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2<bitFormat,3,1>(w4, w5);
+#define PIXEL10_C *(q+nextlineDst) = w5;
+
+#define PIXEL11 *(q+1+nextlineDst) = w5;
+
+#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2<bitFormat,7,1>(w5, w6);
+#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2<bitFormat,3,1>(w6, w5);
+#define PIXEL12_C *(q+2+nextlineDst) = w5;
+
+#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w7);
+#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w4);
+#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w8, w4);
+#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w8, w4);
+#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w8, w4);
+#define PIXEL20_C *(q+nextlineDst2) = w5;
+
+#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,7,1>(w5, w8);
+#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w8, w5);
+#define PIXEL21_C *(q+1+nextlineDst2) = w5;
+
+#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w9);
+#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w8);
+#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2<bitFormat,3,1>(w5, w6);
+#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,1,1>(w5, w6, w8);
+#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3<bitFormat,2,7,7>(w5, w6, w8);
+#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2<bitFormat,1,1>(w6, w8);
+#define PIXEL22_C *(q+2+nextlineDst2) = w5;
+
+#define YUV(x) RGBtoYUV[w ## x]
+
+
+#define bitFormat 565
+void HQ3x_565(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq3x.h"
+}
+#undef bitFormat
+
+#define bitFormat 555
+void HQ3x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq3x.h"
+}
+#undef bitFormat
+
+
+#ifdef HAS_ALTIVEC
+ #define USE_ALTIVEC 1
+
+ #define bitFormat 565
+ void HQ3x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq3x.h"
+ }
+ #undef bitFormat
+
+ #define bitFormat 555
+ void HQ3x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ #include "common/scaler/hq3x.h"
+ }
+ #undef bitFormat
+#endif
+
+void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+#ifdef HAS_ALTIVEC
+ if (isAltiVecAvailable()) {
+ if (gBitFormat == 565)
+ HQ3x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ else
+ HQ3x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ return;
+ }
+#endif
+
+ if (gBitFormat == 565)
+ HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+ else
+ HQ3x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+}
+
+#endif
diff --git a/graphics/scaler/hq3x.h b/graphics/scaler/hq3x.h
new file mode 100644
index 0000000000..7fda8d5105
--- /dev/null
+++ b/graphics/scaler/hq3x.h
@@ -0,0 +1,2951 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+/*
+ * The HQ3x high quality 3x graphics filter.
+ * Original author Maxim Stepin (see http://www.hiend3d.com/hq3x.html).
+ * Adapted for ScummVM to 16 bit output and optimized by Max Horn.
+ */
+
+ register int w1, w2, w3, w4, w5, w6, w7, w8, w9;
+
+ const uint32 nextlineSrc = srcPitch / sizeof(uint16);
+ const uint16 *p = (const uint16 *)srcPtr;
+
+ const uint32 nextlineDst = dstPitch / sizeof(uint16);
+ const uint32 nextlineDst2 = 2 * nextlineDst;
+ uint16 *q = (uint16 *)dstPtr;
+
+ // +----+----+----+
+ // | | | |
+ // | w1 | w2 | w3 |
+ // +----+----+----+
+ // | | | |
+ // | w4 | w5 | w6 |
+ // +----+----+----+
+ // | | | |
+ // | w7 | w8 | w9 |
+ // +----+----+----+
+
+#ifdef USE_ALTIVEC
+ // The YUV threshold.
+ static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
+
+ // Bit pattern mask.
+ static const vector signed int vPatternMask1 = (vector signed int)(0x01,0x02,0x04,0x08);
+ static const vector signed int vPatternMask2 = (vector signed int)(0x10,0x20,0x40,0x80);
+
+ // Permutation masks for the incremental vector loading (see below for more information).
+ static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19);
+ static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31);
+
+ // The YUV vectors.
+ vector signed char vecYUV5555;
+ vector signed char vecYUV1234;
+ vector signed char vecYUV6789;
+#endif
+
+ while (height--) {
+ w1 = *(p - 1 - nextlineSrc);
+ w4 = *(p - 1);
+ w7 = *(p - 1 + nextlineSrc);
+
+ w2 = *(p - nextlineSrc);
+ w5 = *(p);
+ w8 = *(p + nextlineSrc);
+
+#ifdef USE_ALTIVEC
+ // Load inital values of vecYUV1234 / vecYUV6789
+ const int arr1234[4] = {0, YUV(1), YUV(2), 0};
+ const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
+
+ vecYUV1234 = *(const vector signed char *)arr1234;
+ vecYUV6789 = *(const vector signed char *)arr6789;
+#endif
+
+ int tmpWidth = width;
+ while (tmpWidth--) {
+ p++;
+
+ w3 = *(p - nextlineSrc);
+ w6 = *(p);
+ w9 = *(p + nextlineSrc);
+
+ int pattern = 0;
+
+#ifdef USE_ALTIVEC
+ /*
+ Consider this peephole into the image buffer:
+ +----+----+----+----+
+ | | | | |
+ | w00| w01| w02| w03|
+ +----+----+----+----+
+ | | | | |
+ | w10| w11| w12| w13|
+ +----+----+----+----+
+ | | | | |
+ | w20| w21| w22| w23|
+ +----+----+----+----+
+
+ In the previous loop iteration, w11 was the center point, and our
+ vectors contain the following data from the previous iteration:
+ vecYUV5555 = { w11, w11, w11, w11 }
+ vecYUV1234 = { w00, w01, w02, w10 }
+ vecYUV6789 = { w12, w20, w21, w22 }
+
+ Now we have the new center point w12, and we would like to have
+ the following values in our vectors:
+ vecYUV5555 = { w12, w12, w12, w12 }
+ vecYUV1234 = { w01, w02, w03, w11 }
+ vecYUV6789 = { w13, w21, w22, w23 }
+
+ To this end we load a single new vector:
+ vTmp = { w11, w03, w13, w23 }
+
+ We then can compute all the new vector values using permutations only:
+ vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
+ vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] }
+ vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] }
+
+ Beautiful, isn't it? :-)
+ */
+
+ // Load the new values into a temporary vector (see above for an explanation)
+ const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
+ vector signed char vTmp = *(const vector signed char *)tmpArr;
+
+ // Next update the data vectors
+ vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
+ vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
+ vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
+
+ // Compute the absolute difference between the center point's YUV and the outer points
+ const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
+ const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
+
+ // Compare the difference to the threshold (byte-wise)
+ const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
+ const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
+
+ // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
+ // Then and in the patter masks. The idea is that for 0 components, we get 0,
+ // while for the other components we get exactly the mask value.
+ const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
+ const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
+
+ // Now sum up the components of all vectors. Since our pattern mask values
+ // are all "orthogonal", this is effectively the same as ORing them all
+ // together. In the end, the rightmost word of vSum contains the 'pattern'
+ vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
+ vSum = vec_sums(vPattern2, vSum);
+ pattern = ((int *)&vSum)[3];
+#else
+ const int yuv5 = YUV(5);
+ if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
+ if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
+ if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004;
+ if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008;
+ if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010;
+ if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
+ if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
+ if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
+#endif
+
+ switch (pattern) {
+ case 0:
+ case 1:
+ case 4:
+ case 32:
+ case 128:
+ case 5:
+ case 132:
+ case 160:
+ case 33:
+ case 129:
+ case 36:
+ case 133:
+ case 164:
+ case 161:
+ case 37:
+ case 165:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 2:
+ case 34:
+ case 130:
+ case 162:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 16:
+ case 17:
+ case 48:
+ case 49:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 64:
+ case 65:
+ case 68:
+ case 69:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 8:
+ case 12:
+ case 136:
+ case 140:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 3:
+ case 35:
+ case 131:
+ case 163:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 6:
+ case 38:
+ case 134:
+ case 166:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 20:
+ case 21:
+ case 52:
+ case 53:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 144:
+ case 145:
+ case 176:
+ case 177:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 192:
+ case 193:
+ case 196:
+ case 197:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 96:
+ case 97:
+ case 100:
+ case 101:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 40:
+ case 44:
+ case 168:
+ case 172:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 9:
+ case 13:
+ case 137:
+ case 141:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 18:
+ case 50:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 80:
+ case 81:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_1M
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 72:
+ case 76:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_1M
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 10:
+ case 138:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 66:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 24:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 7:
+ case 39:
+ case 135:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 148:
+ case 149:
+ case 180:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 224:
+ case 228:
+ case 225:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 41:
+ case 169:
+ case 45:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 22:
+ case 54:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 208:
+ case 209:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 104:
+ case 108:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 11:
+ case 139:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 19:
+ case 51:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL12_C
+ } else {
+ PIXEL00_2
+ PIXEL01_6
+ PIXEL02_5
+ PIXEL12_1
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 146:
+ case 178:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL12_C
+ PIXEL22_1D
+ } else {
+ PIXEL01_1
+ PIXEL02_5
+ PIXEL12_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ break;
+ case 84:
+ case 85:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL02_1U
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_1M
+ } else {
+ PIXEL02_2
+ PIXEL12_6
+ PIXEL21_1
+ PIXEL22_5
+ }
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ break;
+ case 112:
+ case 113:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ } else {
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_6
+ PIXEL22_5
+ }
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ break;
+ case 200:
+ case 204:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ } else {
+ PIXEL10_1
+ PIXEL20_5
+ PIXEL21_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ break;
+ case 73:
+ case 77:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_1U
+ PIXEL10_C
+ PIXEL20_1M
+ PIXEL21_C
+ } else {
+ PIXEL00_2
+ PIXEL10_6
+ PIXEL20_5
+ PIXEL21_1
+ }
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ break;
+ case 42:
+ case 170:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL10_C
+ PIXEL20_1D
+ } else {
+ PIXEL00_5
+ PIXEL01_1
+ PIXEL10_6
+ PIXEL20_2
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 14:
+ case 142:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ } else {
+ PIXEL00_5
+ PIXEL01_6
+ PIXEL02_2
+ PIXEL10_1
+ }
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 67:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 70:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 28:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 152:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 194:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 98:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 56:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 25:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 26:
+ case 31:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL10_3
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 82:
+ case 214:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 88:
+ case 248:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL22_4
+ }
+ break;
+ case 74:
+ case 107:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ }
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 27:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 86:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 216:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 106:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 30:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 210:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 120:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 75:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 29:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 198:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 184:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 99:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 57:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 71:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 156:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 226:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 60:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 195:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 102:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 153:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 58:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 83:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 92:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 202:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 78:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 154:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 114:
+ PIXEL00_1M
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 89:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 90:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 55:
+ case 23:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL00_2
+ PIXEL01_6
+ PIXEL02_5
+ PIXEL12_1
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 182:
+ case 150:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ PIXEL22_1D
+ } else {
+ PIXEL01_1
+ PIXEL02_5
+ PIXEL12_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ break;
+ case 213:
+ case 212:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL02_1U
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL02_2
+ PIXEL12_6
+ PIXEL21_1
+ PIXEL22_5
+ }
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ break;
+ case 241:
+ case 240:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_6
+ PIXEL22_5
+ }
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ break;
+ case 236:
+ case 232:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ PIXEL22_1R
+ } else {
+ PIXEL10_1
+ PIXEL20_5
+ PIXEL21_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ break;
+ case 109:
+ case 105:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_1U
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL00_2
+ PIXEL10_6
+ PIXEL20_5
+ PIXEL21_1
+ }
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ break;
+ case 171:
+ case 43:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ PIXEL20_1D
+ } else {
+ PIXEL00_5
+ PIXEL01_1
+ PIXEL10_6
+ PIXEL20_2
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 143:
+ case 15:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ } else {
+ PIXEL00_5
+ PIXEL01_6
+ PIXEL02_2
+ PIXEL10_1
+ }
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 124:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 203:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 62:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 211:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 118:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 217:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 110:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 155:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 188:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 185:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 61:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 157:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 103:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 227:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 230:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 199:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 220:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 158:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 234:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1R
+ break;
+ case 242:
+ PIXEL00_1M
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 59:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 121:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 87:
+ PIXEL00_1L
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 79:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 122:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 94:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_C
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 218:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 91:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 229:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 167:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 173:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 181:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 186:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 115:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 93:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 206:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 205:
+ case 201:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_1M
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 174:
+ case 46:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_1M
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 179:
+ case 147:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_1M
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 117:
+ case 116:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_1M
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 189:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 231:
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 126:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 219:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ PIXEL10_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 125:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL00_1U
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL00_2
+ PIXEL10_6
+ PIXEL20_5
+ PIXEL21_1
+ }
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ PIXEL22_1M
+ break;
+ case 221:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL02_1U
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL02_2
+ PIXEL12_6
+ PIXEL21_1
+ PIXEL22_5
+ }
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ break;
+ case 207:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ } else {
+ PIXEL00_5
+ PIXEL01_6
+ PIXEL02_2
+ PIXEL10_1
+ }
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 238:
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ PIXEL22_1R
+ } else {
+ PIXEL10_1
+ PIXEL20_5
+ PIXEL21_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ break;
+ case 190:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ PIXEL22_1D
+ } else {
+ PIXEL01_1
+ PIXEL02_5
+ PIXEL12_6
+ PIXEL22_2
+ }
+ PIXEL00_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ break;
+ case 187:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ PIXEL20_1D
+ } else {
+ PIXEL00_5
+ PIXEL01_1
+ PIXEL10_6
+ PIXEL20_2
+ }
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 243:
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_6
+ PIXEL22_5
+ }
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ break;
+ case 119:
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL00_2
+ PIXEL01_6
+ PIXEL02_5
+ PIXEL12_1
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 237:
+ case 233:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 175:
+ case 47:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ break;
+ case 183:
+ case 151:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 245:
+ case 244:
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 250:
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL22_4
+ }
+ break;
+ case 123:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ }
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 95:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL10_3
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ break;
+ case 222:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 252:
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 249:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL22_4
+ }
+ break;
+ case 235:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ }
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 111:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 63:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ break;
+ case 159:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL10_3
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 215:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 246:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 254:
+ PIXEL00_1M
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ } else {
+ PIXEL01_3
+ PIXEL02_4
+ }
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ } else {
+ PIXEL10_3
+ PIXEL20_4
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL21_3
+ PIXEL22_2
+ }
+ break;
+ case 253:
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 251:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ } else {
+ PIXEL00_4
+ PIXEL01_3
+ }
+ PIXEL02_1M
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL10_C
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL10_3
+ PIXEL20_2
+ PIXEL21_3
+ }
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL12_C
+ PIXEL22_C
+ } else {
+ PIXEL12_3
+ PIXEL22_4
+ }
+ break;
+ case 239:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ PIXEL22_1R
+ break;
+ case 127:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL01_C
+ PIXEL10_C
+ } else {
+ PIXEL00_2
+ PIXEL01_3
+ PIXEL10_3
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL02_4
+ PIXEL12_3
+ }
+ PIXEL11
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ PIXEL21_C
+ } else {
+ PIXEL20_4
+ PIXEL21_3
+ }
+ PIXEL22_1M
+ break;
+ case 191:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ break;
+ case 223:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ PIXEL10_C
+ } else {
+ PIXEL00_4
+ PIXEL10_3
+ }
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL01_C
+ PIXEL02_C
+ PIXEL12_C
+ } else {
+ PIXEL01_3
+ PIXEL02_2
+ PIXEL12_3
+ }
+ PIXEL11
+ PIXEL20_1M
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL21_C
+ PIXEL22_C
+ } else {
+ PIXEL21_3
+ PIXEL22_4
+ }
+ break;
+ case 247:
+ PIXEL00_1L
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ case 255:
+ if (diffYUV(YUV(4), YUV(2))) {
+ PIXEL00_C
+ } else {
+ PIXEL00_2
+ }
+ PIXEL01_C
+ if (diffYUV(YUV(2), YUV(6))) {
+ PIXEL02_C
+ } else {
+ PIXEL02_2
+ }
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ if (diffYUV(YUV(8), YUV(4))) {
+ PIXEL20_C
+ } else {
+ PIXEL20_2
+ }
+ PIXEL21_C
+ if (diffYUV(YUV(6), YUV(8))) {
+ PIXEL22_C
+ } else {
+ PIXEL22_2
+ }
+ break;
+ }
+
+ w1 = w2;
+ w4 = w5;
+ w7 = w8;
+
+ w2 = w3;
+ w5 = w6;
+ w8 = w9;
+
+ q += 3;
+ }
+ p += nextlineSrc - width;
+ q += (nextlineDst - width) * 3;
+ }
diff --git a/graphics/scaler/hq3x_i386.asm b/graphics/scaler/hq3x_i386.asm
new file mode 100644
index 0000000000..100482f122
--- /dev/null
+++ b/graphics/scaler/hq3x_i386.asm
@@ -0,0 +1,2434 @@
+;hq3x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq3x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter resd 1
+cross resd 1
+nextline resd 1
+prevline resd 1
+w1 resd 1
+w2 resd 1
+w3 resd 1
+w4 resd 1
+w5 resd 1
+w6 resd 1
+w7 resd 1
+w8 resd 1
+w9 resd 1
+
+SECTION .data
+
+reg_blank dd 0,0
+const7 dd 0x00070007,0x00000007
+threshold dd 0x00300706,0x00000000
+zerolowbits dd 0xF7DEF7DE
+moduloSrc dd 0
+moduloDst dd 0
+
+SECTION .text
+
+%macro TestDiff 2
+ xor ecx,ecx
+ mov edx,[%1]
+ cmp edx,[%2]
+ je %%fin
+ mov ecx,_RGBtoYUV
+ movd mm1,[ecx+edx*4]
+ movq mm5,mm1
+ mov edx,[%2]
+ movd mm2,[ecx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ jmp %%fin
+%%same:
+ %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ jmp %%fin
+%%same:
+ %5
+ %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ jmp %%fin
+%%same:
+ %6
+ %7
+ %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ %6
+ jmp %%fin
+%%same:
+ %7
+ %8
+ %9
+ %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp2 4
+ mov edx,%3
+ mov ecx,%4
+ cmp edx,ecx
+ je %%fin1
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+%%fin1
+ mov edx,%2
+ cmp edx,ecx
+ je %%fin2
+ and ecx,[zerolowbits]
+ and edx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin2
+ mov %1,dx
+%endmacro
+
+%macro Interp3 2
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ pmullw mm1, [const7]
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp4 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ psllw mm1, 1
+ paddw mm2, mm3
+ pmullw mm2, [const7]
+ paddw mm1, mm2
+ psrlw mm1, 6
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp5 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro PIXEL00_1M 0
+ Interp1 [edi],eax,[w1]
+%endmacro
+
+%macro PIXEL00_1U 0
+ Interp1 [edi],eax,[w2]
+%endmacro
+
+%macro PIXEL00_1L 0
+ Interp1 [edi],eax,[w4]
+%endmacro
+
+%macro PIXEL00_2 0
+ Interp2 [edi],eax,[w4],[w2]
+%endmacro
+
+%macro PIXEL00_4 0
+ Interp4 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_5 0
+ Interp5 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_C 0
+ mov [edi],ax
+%endmacro
+
+%macro PIXEL01_1 0
+ Interp1 [edi+2],eax,[w2]
+%endmacro
+
+%macro PIXEL01_3 0
+ Interp3 [edi+2],[w2]
+%endmacro
+
+%macro PIXEL01_6 0
+ Interp1 [edi+2],[w2],eax
+%endmacro
+
+%macro PIXEL01_C 0
+ mov [edi+2],ax
+%endmacro
+
+%macro PIXEL02_1M 0
+ Interp1 [edi+4],eax,[w3]
+%endmacro
+
+%macro PIXEL02_1U 0
+ Interp1 [edi+4],eax,[w2]
+%endmacro
+
+%macro PIXEL02_1R 0
+ Interp1 [edi+4],eax,[w6]
+%endmacro
+
+%macro PIXEL02_2 0
+ Interp2 [edi+4],eax,[w2],[w6]
+%endmacro
+
+%macro PIXEL02_4 0
+ Interp4 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_5 0
+ Interp5 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_C 0
+ mov [edi+4],ax
+%endmacro
+
+%macro PIXEL10_1 0
+ Interp1 [edi+ebx],eax,[w4]
+%endmacro
+
+%macro PIXEL10_3 0
+ Interp3 [edi+ebx],[w4]
+%endmacro
+
+%macro PIXEL10_6 0
+ Interp1 [edi+ebx],[w4],eax
+%endmacro
+
+%macro PIXEL10_C 0
+ mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL11 0
+ mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL12_1 0
+ Interp1 [edi+ebx+4],eax,[w6]
+%endmacro
+
+%macro PIXEL12_3 0
+ Interp3 [edi+ebx+4],[w6]
+%endmacro
+
+%macro PIXEL12_6 0
+ Interp1 [edi+ebx+4],[w6],eax
+%endmacro
+
+%macro PIXEL12_C 0
+ mov [edi+ebx+4],ax
+%endmacro
+
+%macro PIXEL20_1M 0
+ Interp1 [edi+ebx*2],eax,[w7]
+%endmacro
+
+%macro PIXEL20_1D 0
+ Interp1 [edi+ebx*2],eax,[w8]
+%endmacro
+
+%macro PIXEL20_1L 0
+ Interp1 [edi+ebx*2],eax,[w4]
+%endmacro
+
+%macro PIXEL20_2 0
+ Interp2 [edi+ebx*2],eax,[w8],[w4]
+%endmacro
+
+%macro PIXEL20_4 0
+ Interp4 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_5 0
+ Interp5 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_C 0
+ mov [edi+ebx*2],ax
+%endmacro
+
+%macro PIXEL21_1 0
+ Interp1 [edi+ebx*2+2],eax,[w8]
+%endmacro
+
+%macro PIXEL21_3 0
+ Interp3 [edi+ebx*2+2],[w8]
+%endmacro
+
+%macro PIXEL21_6 0
+ Interp1 [edi+ebx*2+2],[w8],eax
+%endmacro
+
+%macro PIXEL21_C 0
+ mov [edi+ebx*2+2],ax
+%endmacro
+
+%macro PIXEL22_1M 0
+ Interp1 [edi+ebx*2+4],eax,[w9]
+%endmacro
+
+%macro PIXEL22_1D 0
+ Interp1 [edi+ebx*2+4],eax,[w8]
+%endmacro
+
+%macro PIXEL22_1R 0
+ Interp1 [edi+ebx*2+4],eax,[w6]
+%endmacro
+
+%macro PIXEL22_2 0
+ Interp2 [edi+ebx*2+4],eax,[w6],[w8]
+%endmacro
+
+%macro PIXEL22_4 0
+ Interp4 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_5 0
+ Interp5 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_C 0
+ mov [edi+ebx*2+4],ax
+%endmacro
+
+inbuffer equ 8
+outbuffer equ 12
+Xres equ 16
+Yres equ 20
+srcPitch equ 24
+dstPitch equ 28
+
+_hq3x_16:
+ push ebp
+ mov ebp,esp
+ pushad
+
+ mov esi,[ebp+inbuffer]
+ mov edi,[ebp+outbuffer]
+ mov edx,[ebp+Yres]
+ mov [linesleft],edx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+dstPitch]
+ mov dword[moduloDst],ebx
+ sub dword[moduloDst],ecx
+ mov ecx,dword[moduloDst]
+ shl dword[moduloDst],1
+ add dword[moduloDst],ecx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+srcPitch]
+ mov dword[nextline],ebx
+ mov dword[moduloSrc],ebx
+ sub dword[moduloSrc],ecx
+ neg ebx
+ mov dword[prevline],ebx
+.loopy
+ mov ecx,[ebp+Xres]
+ mov dword[xcounter],ecx
+.loopx
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-2]
+ movq mm6,[esi-2]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-2]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w3],edx
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ movzx edx,ax
+ mov [w6],edx
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ movzx edx,ax
+ mov [w9],edx
+.flags
+ mov ebx,_RGBtoYUV
+ mov eax,[w5]
+ xor ecx,ecx
+ movd mm5,[ebx+eax*4]
+ mov dword[cross],0
+
+ mov edx,[w2]
+ cmp eax,edx
+ je .noflag2
+ or dword[cross],1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag2
+ or ecx,2
+.noflag2
+ mov edx,[w4]
+ cmp eax,edx
+ je .noflag4
+ or dword[cross],2
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag4
+ or ecx,8
+.noflag4
+ mov edx,[w6]
+ cmp eax,edx
+ je .noflag6
+ or dword[cross],4
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag6
+ or ecx,16
+.noflag6
+ mov edx,[w8]
+ cmp eax,edx
+ je .noflag8
+ or dword[cross],8
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag8
+ or ecx,64
+.noflag8
+ test ecx,ecx
+ jnz .testflag1
+ mov ecx,[cross]
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable2+ecx*4]
+.testflag1
+ mov edx,[w1]
+ cmp eax,edx
+ je .noflag1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag1
+ or ecx,1
+.noflag1
+ mov edx,[w3]
+ cmp eax,edx
+ je .noflag3
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag3
+ or ecx,4
+.noflag3
+ mov edx,[w7]
+ cmp eax,edx
+ je .noflag7
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag7
+ or ecx,32
+.noflag7
+ mov edx,[w9]
+ cmp eax,edx
+ je .noflag9
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag9
+ or ecx,128
+.noflag9
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag18
+..@flag50
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag80
+..@flag81
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag72
+..@flag76
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag10
+..@flag138
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag66
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag24
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag22
+..@flag54
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag208
+..@flag209
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag104
+..@flag108
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag11
+..@flag139
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag19
+..@flag51
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag146
+..@flag178
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ jmp .loopx_end
+..@flag84
+..@flag85
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag112
+..@flag113
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag200
+..@flag204
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag73
+..@flag77
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag42
+..@flag170
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag14
+..@flag142
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag67
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag70
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag28
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag152
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag194
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag98
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag56
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag25
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag26
+..@flag31
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag82
+..@flag214
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag88
+..@flag248
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag74
+..@flag107
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag27
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag86
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag216
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag106
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag30
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag210
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag120
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag75
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag29
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag198
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag184
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag99
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag57
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag71
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag156
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag226
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag60
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag195
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag102
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag153
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag58
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag83
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag92
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag202
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag78
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag154
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag114
+ PIXEL00_1M
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag89
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag90
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag55
+..@flag23
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag182
+..@flag150
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ jmp .loopx_end
+..@flag213
+..@flag212
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag241
+..@flag240
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag236
+..@flag232
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag109
+..@flag105
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag171
+..@flag43
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag143
+..@flag15
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag124
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag203
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag62
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag211
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag118
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag217
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag110
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag155
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag188
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag185
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag61
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag157
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag103
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag227
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag230
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag199
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag220
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag158
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag234
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag242
+ PIXEL00_1M
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag59
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag121
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag87
+ PIXEL00_1L
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag79
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag122
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag94
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag218
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag91
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag229
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag167
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag173
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag181
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag186
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag115
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag93
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag206
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag205
+..@flag201
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag174
+..@flag46
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag179
+..@flag147
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag117
+..@flag116
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag189
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag231
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag126
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag219
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag125
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag221
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag207
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag238
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag190
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ jmp .loopx_end
+..@flag187
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag243
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag119
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag237
+..@flag233
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag175
+..@flag47
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag183
+..@flag151
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag245
+..@flag244
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag250
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag123
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag95
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag222
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag252
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag249
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag235
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag111
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag63
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag159
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag215
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag246
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag254
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
+ jmp .loopx_end
+..@flag253
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag251
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag239
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag127
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag191
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag223
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag247
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag255
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+
+..@cross0
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+4],ax
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],ax
+ jmp .loopx_end
+..@cross1
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w2]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],dx
+ mov [edi+2],dx
+ mov [edi+4],dx
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],ax
+ jmp .loopx_end
+..@cross2
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w4]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],dx
+ mov [edi+2],eax
+ mov [edi+ebx],dx
+ mov [edi+ebx+2],eax
+ mov [edi+ebx*2],dx
+ mov [edi+ebx*2+2],eax
+ jmp .loopx_end
+..@cross4
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w6]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],eax
+ mov [edi+4],dx
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],dx
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],dx
+ jmp .loopx_end
+..@cross8
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w8]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],eax
+ mov [edi+4],ax
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],dx
+ mov [edi+ebx*2+2],dx
+ mov [edi+ebx*2+4],dx
+ jmp .loopx_end
+
+.loopx_end
+ add esi,2
+ add edi,6
+ dec dword[xcounter]
+ jz .nexty
+ jmp .loopx
+.nexty
+ add esi,dword[moduloSrc]
+ add edi,dword[moduloDst]
+ dec dword[linesleft]
+ jz .fin
+ mov ebx,[ebp+srcPitch]
+ mov dword[nextline],ebx
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.fin
+ emms
+ popad
+ mov esp,ebp
+ pop ebp
+ ret
+
+SECTION .data
+FuncTable
+ dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+ dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+ dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+ dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+ dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+ dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+ dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+ dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+ dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+ dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+ dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+ dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+ dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+ dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+ dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+ dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+ dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+ dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+ dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+ dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+ dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+ dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+ dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+ dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+ dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+ dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+ dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+ dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+ dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+ dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+ dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+ dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+ dd ..@cross0, ..@cross1, ..@cross2, ..@flag0,
+ dd ..@cross4, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@cross8, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@flag0, ..@flag0, ..@flag0, ..@flag0
+
diff --git a/graphics/scaler/intern.h b/graphics/scaler/intern.h
new file mode 100644
index 0000000000..fa3ca1823a
--- /dev/null
+++ b/graphics/scaler/intern.h
@@ -0,0 +1,171 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+
+#ifndef COMMON_SCALER_INTERN_H
+#define COMMON_SCALER_INTERN_H
+
+#include "common/stdafx.h"
+#include "common/scummsys.h"
+
+template<int bitFormat>
+struct ColorMasks {
+};
+
+template<>
+struct ColorMasks<565> {
+ enum {
+ highBits = 0xF7DEF7DE,
+ lowBits = 0x08210821,
+ qhighBits = 0xE79CE79C,
+ qlowBits = 0x18631863,
+ redblueMask = 0xF81F,
+ greenMask = 0x07E0
+ };
+};
+
+template<>
+struct ColorMasks<555> {
+ enum {
+ highBits = 0x7BDE7BDE,
+ lowBits = 0x04210421,
+ qhighBits = 0x739C739C,
+ qlowBits = 0x0C630C63,
+ redblueMask = 0x7C1F,
+ greenMask = 0x03E0
+ };
+};
+
+#define highBits ColorMasks<bitFormat>::highBits
+#define lowBits ColorMasks<bitFormat>::lowBits
+#define qhighBits ColorMasks<bitFormat>::qhighBits
+#define qlowBits ColorMasks<bitFormat>::qlowBits
+#define redblueMask ColorMasks<bitFormat>::redblueMask
+#define greenMask ColorMasks<bitFormat>::greenMask
+
+
+/**
+ * Interpolate two 16 bit pixel pairs at once with equal weights 1.
+ * In particular, A and B can contain two pixels/each in the upper
+ * and lower halves.
+ */
+template<int bitFormat>
+static inline uint32 INTERPOLATE(uint32 A, uint32 B) {
+
+ return (((A & highBits) >> 1) + ((B & highBits) >> 1) + (A & B & lowBits));
+}
+
+/**
+ * Interpolate four 16 bit pixel pairs at once with equal weights 1.
+ * In particular, A and B can contain two pixels/each in the upper
+ * and lower halves.
+ */
+template<int bitFormat>
+static inline uint32 Q_INTERPOLATE(uint32 A, uint32 B, uint32 C, uint32 D) {
+ register uint32 x = ((A & qhighBits) >> 2) + ((B & qhighBits) >> 2) + ((C & qhighBits) >> 2) + ((D & qhighBits) >> 2);
+ register uint32 y = ((A & qlowBits) + (B & qlowBits) + (C & qlowBits) + (D & qlowBits)) >> 2;
+
+ y &= qlowBits;
+ return x + y;
+}
+
+
+/**
+ * Interpolate two 16 bit pixels with the weights specified in the template
+ * parameters. Used by the hq scaler family.
+ */
+template<int bitFormat, int w1, int w2>
+static inline uint16 interpolate16_2(uint16 p1, uint16 p2) {
+ return ((((p1 & redblueMask) * w1 + (p2 & redblueMask) * w2) / (w1 + w2)) & redblueMask) |
+ ((((p1 & greenMask) * w1 + (p2 & greenMask) * w2) / (w1 + w2)) & greenMask);
+}
+
+/**
+ * Interpolate three 16 bit pixels with the weights specified in the template
+ * parameters. Used by the hq scaler family.
+ */
+template<int bitFormat, int w1, int w2, int w3>
+static inline uint16 interpolate16_3(uint16 p1, uint16 p2, uint16 p3) {
+ return ((((p1 & redblueMask) * w1 + (p2 & redblueMask) * w2 + (p3 & redblueMask) * w3) / (w1 + w2 + w3)) & redblueMask) |
+ ((((p1 & greenMask) * w1 + (p2 & greenMask) * w2 + (p3 & greenMask) * w3) / (w1 + w2 + w3)) & greenMask);
+}
+
+
+/**
+ * Compare two YUV values (encoded 8-8-8) and check if they differ by more than
+ * a certain hard coded threshold. Used by the hq scaler family.
+ */
+static inline bool diffYUV(int yuv1, int yuv2) {
+ static const int Ymask = 0x00FF0000;
+ static const int Umask = 0x0000FF00;
+ static const int Vmask = 0x000000FF;
+ static const int trY = 0x00300000;
+ static const int trU = 0x00000700;
+ static const int trV = 0x00000006;
+
+ int diff;
+ int mask;
+
+ diff = ((yuv1 & Ymask) - (yuv2 & Ymask));
+ mask = diff >> 31; // -1 if value < 0, 0 otherwise
+ diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value
+ if (diff > trY) return true;
+
+ diff = ((yuv1 & Umask) - (yuv2 & Umask));
+ mask = diff >> 31; // -1 if value < 0, 0 otherwise
+ diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value
+ if (diff > trU) return true;
+
+ diff = ((yuv1 & Vmask) - (yuv2 & Vmask));
+ mask = diff >> 31; // -1 if value < 0, 0 otherwise
+ diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value
+ if (diff > trV) return true;
+
+ return false;
+/*
+ return
+ ( ( ABS((yuv1 & Ymask) - (yuv2 & Ymask)) > trY ) ||
+ ( ABS((yuv1 & Umask) - (yuv2 & Umask)) > trU ) ||
+ ( ABS((yuv1 & Vmask) - (yuv2 & Vmask)) > trV ) );
+*/
+}
+
+/**
+ * 16bit RGB to YUV conversion table. This table is setup by InitLUT().
+ * Used by the hq scaler family.
+ */
+extern "C" uint *RGBtoYUV;
+
+/** Auxiliary macro to simplify creating those template function wrappers. */
+#define MAKE_WRAPPER(FUNC) \
+ void FUNC(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { \
+ if (gBitFormat == 565) \
+ FUNC ## Template<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height); \
+ else \
+ FUNC ## Template<555>(srcPtr, srcPitch, dstPtr, dstPitch, width, height); \
+ }
+
+/** Specifies the currently active 16bit pixel format, 555 or 565. */
+extern int gBitFormat;
+
+#endif
diff --git a/graphics/scaler/scale2x.cpp b/graphics/scaler/scale2x.cpp
new file mode 100644
index 0000000000..6d8a73f8cc
--- /dev/null
+++ b/graphics/scaler/scale2x.cpp
@@ -0,0 +1,521 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "common/scaler/intern.h"
+
+#include "scale2x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale2x C implementation */
+
+static inline void scale2x_8_def_single(scale2x_uint8* __restrict__ dst, const scale2x_uint8* __restrict__ src0, const scale2x_uint8* __restrict__ src1, const scale2x_uint8* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+}
+
+static inline void scale2x_16_def_single(scale2x_uint16* __restrict__ dst, const scale2x_uint16* __restrict__ src0, const scale2x_uint16* __restrict__ src1, const scale2x_uint16* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+}
+
+static inline void scale2x_32_def_single(scale2x_uint32* __restrict__ dst, const scale2x_uint32* __restrict__ src0, const scale2x_uint32* __restrict__ src1, const scale2x_uint32* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0];
+ dst[1] = src1[1] == src0[0] ? src0[0] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 2;
+ --count;
+ }
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ scale2x_8_def_single(dst0, src0, src1, src2, count);
+ scale2x_8_def_single(dst1, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 16 bits.
+ * This function operates like scale2x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ scale2x_16_def_single(dst0, src0, src1, src2, count);
+ scale2x_16_def_single(dst1, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 32 bits.
+ * This function operates like scale2x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ scale2x_32_def_single(dst0, src0, src1, src2, count);
+ scale2x_32_def_single(dst1, src2, src1, src0, count);
+}
+
+/***************************************************************************/
+/* Scale2x MMX implementation */
+
+#if defined(__GNUC__) && defined(__i386__)
+
+/*
+ * Apply the Scale2x effect at a single row.
+ * This function must be called only by the other scale2x functions.
+ *
+ * Considering the pixel map :
+ *
+ * ABC (src0)
+ * DEF (src1)
+ * GHI (src2)
+ *
+ * this functions compute 2 new pixels in substitution of the source pixel E
+ * like this map :
+ *
+ * ab (dst)
+ *
+ * with these variables :
+ *
+ * &current -> E
+ * &current_left -> D
+ * &current_right -> F
+ * &current_upper -> B
+ * &current_lower -> H
+ *
+ * %0 -> current_upper
+ * %1 -> current
+ * %2 -> current_lower
+ * %3 -> dst
+ * %4 -> counter
+ *
+ * %mm0 -> *current_left
+ * %mm1 -> *current_next
+ * %mm2 -> tmp0
+ * %mm3 -> tmp1
+ * %mm4 -> tmp2
+ * %mm5 -> tmp3
+ * %mm6 -> *current_upper
+ * %mm7 -> *current
+ */
+static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ assert(count >= 16);
+ assert(count % 8 == 0);
+
+ __asm__ __volatile__(
+/* central runs */
+ "shrl $3, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $56, %%mm0\n"
+ "psllq $56, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $8, %%mm2\n"
+ "psrlq $8, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqb %%mm6, %%mm2\n"
+ "pcmpeqb %%mm6, %%mm4\n"
+ "pcmpeqb (%2), %%mm3\n"
+ "pcmpeqb (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqb %%mm1, %%mm2\n"
+ "pcmpeqb %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklbw %%mm4, %%mm2\n"
+ "punpckhbw %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ assert(count >= 8);
+ assert(count % 4 == 0);
+
+ __asm__ __volatile__(
+/* central runs */
+ "shrl $2, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $48, %%mm0\n"
+ "psllq $48, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $16, %%mm2\n"
+ "psrlq $16, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqw %%mm6, %%mm2\n"
+ "pcmpeqw %%mm6, %%mm4\n"
+ "pcmpeqw (%2), %%mm3\n"
+ "pcmpeqw (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqw %%mm1, %%mm2\n"
+ "pcmpeqw %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpcklwd %%mm4, %%mm2\n"
+ "punpckhwd %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ assert(count >= 4);
+ assert(count % 2 == 0);
+
+ __asm__ __volatile__(
+/* central runs */
+ "shrl $1, %4\n"
+ "jz 1f\n"
+
+ "0:\n"
+
+ /* set the current, current_pre, current_next registers */
+ "movq -8(%1), %%mm0\n"
+ "movq (%1), %%mm7\n"
+ "movq 8(%1), %%mm1\n"
+ "psrlq $32, %%mm0\n"
+ "psllq $32, %%mm1\n"
+ "movq %%mm7, %%mm2\n"
+ "movq %%mm7, %%mm3\n"
+ "psllq $32, %%mm2\n"
+ "psrlq $32, %%mm3\n"
+ "por %%mm2, %%mm0\n"
+ "por %%mm3, %%mm1\n"
+
+ /* current_upper */
+ "movq (%0), %%mm6\n"
+
+ /* compute the upper-left pixel for dst on %%mm2 */
+ /* compute the upper-right pixel for dst on %%mm4 */
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "movq %%mm0, %%mm3\n"
+ "movq %%mm1, %%mm5\n"
+ "pcmpeqd %%mm6, %%mm2\n"
+ "pcmpeqd %%mm6, %%mm4\n"
+ "pcmpeqd (%2), %%mm3\n"
+ "pcmpeqd (%2), %%mm5\n"
+ "pandn %%mm2, %%mm3\n"
+ "pandn %%mm4, %%mm5\n"
+ "movq %%mm0, %%mm2\n"
+ "movq %%mm1, %%mm4\n"
+ "pcmpeqd %%mm1, %%mm2\n"
+ "pcmpeqd %%mm0, %%mm4\n"
+ "pandn %%mm3, %%mm2\n"
+ "pandn %%mm5, %%mm4\n"
+ "movq %%mm2, %%mm3\n"
+ "movq %%mm4, %%mm5\n"
+ "pand %%mm6, %%mm2\n"
+ "pand %%mm6, %%mm4\n"
+ "pandn %%mm7, %%mm3\n"
+ "pandn %%mm7, %%mm5\n"
+ "por %%mm3, %%mm2\n"
+ "por %%mm5, %%mm4\n"
+
+ /* set *dst */
+ "movq %%mm2, %%mm3\n"
+ "punpckldq %%mm4, %%mm2\n"
+ "punpckhdq %%mm4, %%mm3\n"
+ "movq %%mm2, (%3)\n"
+ "movq %%mm3, 8(%3)\n"
+
+ /* next */
+ "addl $8, %0\n"
+ "addl $8, %1\n"
+ "addl $8, %2\n"
+ "addl $16, %3\n"
+
+ "decl %4\n"
+ "jnz 0b\n"
+ "1:\n"
+
+ : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count)
+ :
+ : "cc"
+ );
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 8 bits.
+ * This is a very fast MMX implementation.
+ * The implementation uses a combination of cmp/and/not operations to
+ * completly remove the need of conditional jumps. This trick give the
+ * major speed improvement.
+ * Also, using the 8 bytes MMX registers more than one pixel are computed
+ * at the same time.
+ * Before calling this function you must ensure that the currenct CPU supports
+ * the MMX instruction set. After calling it you must be sure to call the EMMS
+ * instruction before any floating-point operation.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 16 and a multiple of 8.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count)
+{
+ if (count % 8 != 0 || count < 16) {
+ scale2x_8_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 16);
+ assert(count % 8 == 0);
+
+ scale2x_8_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_8_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 16 bits.
+ * This function operates like scale2x_8_mmx() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 8 and a multiple of 4.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count)
+{
+ if (count % 4 != 0 || count < 8) {
+ scale2x_16_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 8);
+ assert(count % 4 == 0);
+
+ scale2x_16_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_16_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+/**
+ * Scale by a factor of 2 a row of pixels of 32 bits.
+ * This function operates like scale2x_8_mmx() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows. It must
+ * be at least 4 and a multiple of 2.
+ * \param dst0 First destination row, double length in pixels.
+ * \param dst1 Second destination row, double length in pixels.
+ */
+void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count)
+{
+ if (count % 2 != 0 || count < 4) {
+ scale2x_32_def(dst0, dst1, src0, src1, src2, count);
+ } else {
+ assert(count >= 4);
+ assert(count % 2 == 0);
+
+ scale2x_32_mmx_single(dst0, src0, src1, src2, count);
+ scale2x_32_mmx_single(dst1, src2, src1, src0, count);
+ }
+}
+
+#endif
+
diff --git a/graphics/scaler/scale2x.h b/graphics/scaler/scale2x.h
new file mode 100644
index 0000000000..2101790905
--- /dev/null
+++ b/graphics/scaler/scale2x.h
@@ -0,0 +1,56 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __SCALE2X_H
+#define __SCALE2X_H
+
+#if defined(_MSC_VER)
+#define __restrict__
+#endif
+
+typedef unsigned char scale2x_uint8;
+typedef unsigned short scale2x_uint16;
+typedef unsigned scale2x_uint32;
+
+void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+#if defined(__GNUC__) && defined(__i386__)
+
+void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+/**
+ * End the use of the MMX instructions.
+ * This function must be called before using any floating-point operations.
+ */
+static inline void scale2x_mmx_emms(void)
+{
+ __asm__ __volatile__ (
+ "emms"
+ );
+}
+
+#endif
+
+#endif
+
diff --git a/graphics/scaler/scale3x.cpp b/graphics/scaler/scale3x.cpp
new file mode 100644
index 0000000000..22366d7cdb
--- /dev/null
+++ b/graphics/scaler/scale3x.cpp
@@ -0,0 +1,238 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains a C and MMX implementation of the Scale2x effect.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "common/scaler/intern.h"
+
+#include "scale3x.h"
+
+#include <assert.h>
+
+/***************************************************************************/
+/* Scale3x C implementation */
+
+static inline void scale3x_8_def_border(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+static inline void scale3x_8_def_center(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+static inline void scale3x_16_def_border(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+static inline void scale3x_16_def_center(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+static inline void scale3x_32_def_border(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0];
+ dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0];
+ dst[2] = src1[1] == src0[0] ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+static inline void scale3x_32_def_center(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count)
+{
+ /* central pixels */
+ while (count) {
+ if (src0[0] != src2[0] && src1[-1] != src1[1]) {
+ dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0];
+ dst[1] = src1[0];
+ dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0];
+ } else {
+ dst[0] = src1[0];
+ dst[1] = src1[0];
+ dst[2] = src1[0];
+ }
+
+ ++src0;
+ ++src1;
+ ++src2;
+ dst += 3;
+ --count;
+ }
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 8 bits.
+ * The function is implemented in C.
+ * The pixels over the left and right borders are assumed of the same color of
+ * the pixels on the border.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count)
+{
+ scale3x_8_def_border(dst0, src0, src1, src2, count);
+ scale3x_8_def_center(dst1, src0, src1, src2, count);
+ scale3x_8_def_border(dst2, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 16 bits.
+ * This function operates like scale3x_8_def() but for 16 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count)
+{
+ scale3x_16_def_border(dst0, src0, src1, src2, count);
+ scale3x_16_def_center(dst1, src0, src1, src2, count);
+ scale3x_16_def_border(dst2, src2, src1, src0, count);
+}
+
+/**
+ * Scale by a factor of 3 a row of pixels of 32 bits.
+ * This function operates like scale3x_8_def() but for 32 bits pixels.
+ * \param src0 Pointer at the first pixel of the previous row.
+ * \param src1 Pointer at the first pixel of the current row.
+ * \param src2 Pointer at the first pixel of the next row.
+ * \param count Length in pixels of the src0, src1 and src2 rows.
+ * It must be at least 2.
+ * \param dst0 First destination row, triple length in pixels.
+ * \param dst1 Second destination row, triple length in pixels.
+ * \param dst2 Third destination row, triple length in pixels.
+ */
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count)
+{
+ scale3x_32_def_border(dst0, src0, src1, src2, count);
+ scale3x_32_def_center(dst1, src0, src1, src2, count);
+ scale3x_32_def_border(dst2, src2, src1, src0, count);
+}
+
diff --git a/graphics/scaler/scale3x.h b/graphics/scaler/scale3x.h
new file mode 100644
index 0000000000..3f24d884e1
--- /dev/null
+++ b/graphics/scaler/scale3x.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __SCALE3X_H
+#define __SCALE3X_H
+
+#if defined(_MSC_VER)
+#define __restrict__
+#endif
+
+typedef unsigned char scale3x_uint8;
+typedef unsigned short scale3x_uint16;
+typedef unsigned scale3x_uint32;
+
+void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count);
+void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count);
+void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count);
+
+#endif
+
diff --git a/graphics/scaler/scalebit.cpp b/graphics/scaler/scalebit.cpp
new file mode 100644
index 0000000000..71d9172534
--- /dev/null
+++ b/graphics/scaler/scalebit.cpp
@@ -0,0 +1,357 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#if defined(HAVE_CONFIG_H)
+#include <config.h>
+#endif
+
+#include "common/scaler/intern.h"
+
+#include "scale2x.h"
+#include "scale3x.h"
+
+#if defined(HAVE_ALLOCA_H)
+#include <alloca.h>
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+
+#define DST(bits, num) (scale2x_uint ## bits *)dst ## num
+#define SRC(bits, num) (const scale2x_uint ## bits *)src ## num
+
+/**
+ * Apply the Scale2x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+ switch (pixel) {
+#if defined(__GNUC__) && defined(__i386__)
+ case 1 : scale2x_8_mmx(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale2x_16_mmx(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale2x_32_mmx(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+#else
+ case 1 : scale2x_8_def(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale2x_16_def(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale2x_32_def(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+#endif
+ }
+}
+
+/**
+ * Apply the Scale3x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale3x(void* dst0, void* dst1, void* dst2, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
+{
+ switch (pixel) {
+ case 1 : scale3x_8_def(DST(8,0), DST(8,1), DST(8,2), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale3x_16_def(DST(16,0), DST(16,1), DST(16,2), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale3x_32_def(DST(32,0), DST(32,1), DST(32,2), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+ }
+}
+
+/**
+ * Apply the Scale4x effect on a group of rows. Used internally.
+ */
+static inline void stage_scale4x(void* dst0, void* dst1, void* dst2, void* dst3, const void* src0, const void* src1, const void* src2, const void* src3, unsigned pixel, unsigned pixel_per_row)
+{
+ stage_scale2x(dst0, dst1, src0, src1, src2, pixel, 2 * pixel_per_row);
+ stage_scale2x(dst2, dst3, src1, src2, src3, pixel, 2 * pixel_per_row);
+}
+
+#define SCDST(i) (dst+(i)*dst_slice)
+#define SCSRC(i) (src+(i)*src_slice)
+#define SCMID(i) (mid[(i)])
+
+/**
+ * Apply the Scale2x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 2x2 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale2x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+
+ assert(height >= 2);
+
+ count = height;
+
+ while (count) {
+ stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+ dst = SCDST(2);
+ src = SCSRC(1);
+
+ --count;
+ }
+
+#if defined(__GNUC__) && defined(__i386__)
+ scale2x_mmx_emms();
+#endif
+}
+
+/**
+ * Apply the Scale32x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 3x3 times the size of the source bitmap.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale3x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+
+ assert(height >= 2);
+
+ count = height;
+
+ while (count) {
+ stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width);
+
+ dst = SCDST(3);
+ src = SCSRC(1);
+
+ --count;
+ }
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function requires also a small buffer bitmap used internally to store
+ * intermediate results. This bitmap must have at least an horizontal size in bytes of 2*width*pixel,
+ * and a vertical size of 6 rows. The memory of this buffer must not be allocated
+ * in video memory because it's also read and not only written. Generally
+ * a heap (malloc) or a stack (alloca) buffer is the best choices.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_mid Pointer at the first pixel of the buffer bitmap.
+ * \param mid_slice Size in bytes of a buffer bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x_buf(void* void_dst, unsigned dst_slice, void* void_mid, unsigned mid_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned char* dst = (unsigned char*)void_dst;
+ const unsigned char* src = (const unsigned char*)void_src;
+ unsigned count;
+ unsigned char* mid[6];
+
+ assert(height >= 4);
+
+ count = height;
+
+ /* set the 6 buffer pointers */
+ mid[0] = (unsigned char*)void_mid;
+ mid[1] = mid[0] + mid_slice;
+ mid[2] = mid[1] + mid_slice;
+ mid[3] = mid[2] + mid_slice;
+ mid[4] = mid[3] + mid_slice;
+ mid[5] = mid[4] + mid_slice;
+
+ while (count) {
+ unsigned char* tmp;
+
+ stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(4), pixel, width);
+ stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width);
+
+ dst = SCDST(4);
+ src = SCSRC(1);
+
+ tmp = SCMID(0); /* shift by 2 position */
+ SCMID(0) = SCMID(2);
+ SCMID(2) = SCMID(4);
+ SCMID(4) = tmp;
+ tmp = SCMID(1);
+ SCMID(1) = SCMID(3);
+ SCMID(3) = SCMID(5);
+ SCMID(5) = tmp;
+
+ --count;
+ }
+
+#if defined(__GNUC__) && defined(__i386__)
+ scale2x_mmx_emms();
+#endif
+}
+
+/**
+ * Apply the Scale4x effect on a bitmap.
+ * The destination bitmap is filled with the scaled version of the source bitmap.
+ * The source bitmap isn't modified.
+ * The destination bitmap must be manually allocated before calling the function,
+ * note that the resulting size is exactly 4x4 times the size of the source bitmap.
+ * \note This function operates like ::scale4x_buf() but the intermediate buffer is
+ * automatically allocated in the stack.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+static void scale4x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ unsigned mid_slice;
+ void* mid;
+
+ mid_slice = 2 * pixel * width; /* required space for 1 row buffer */
+
+ mid_slice = (mid_slice + 0x7) & ~0x7; /* align to 8 bytes */
+
+#if defined(HAVE_ALLOCA)
+ mid = alloca(6 * mid_slice); /* allocate space for 6 row buffers */
+
+ assert(mid != 0); /* alloca should never fails */
+#else
+ mid = malloc(6 * mid_slice); /* allocate space for 6 row buffers */
+
+ if (!mid)
+ return;
+#endif
+
+ scale4x_buf(void_dst, dst_slice, mid, mid_slice, void_src, src_slice, pixel, width, height);
+
+#if !defined(HAVE_ALLOCA)
+ free(mid);
+#endif
+}
+
+/**
+ * Check if the scale implementation is applicable at the given arguments.
+ * \param scale Scale factor. 2, 3 or 4.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ * \return
+ * - -1 on precondition violated.
+ * - 0 on success.
+ */
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height)
+{
+ if (scale != 2 && scale != 3 && scale != 4)
+ return -1;
+
+ if (pixel != 1 && pixel != 2 && pixel != 4)
+ return -1;
+
+ switch (scale) {
+ case 2 :
+ case 3 :
+ if (height < 2)
+ return -1;
+ break;
+ case 4 :
+ if (height < 4)
+ return -1;
+ break;
+ }
+
+#if defined(__GNUC__) && defined(__i386__)
+ switch (scale) {
+ case 2 :
+ case 4 :
+ if (width < (16 / pixel))
+ return -1;
+ if (width % (8 / pixel) != 0)
+ return -1;
+ break;
+ case 3 :
+ if (width < 2)
+ return -1;
+ break;
+ }
+#else
+ if (width < 2)
+ return -1;
+#endif
+
+ return 0;
+}
+
+/**
+ * Apply the Scale effect on a bitmap.
+ * This function is simply a common interface for ::scale2x(), ::scale3x() and ::scale4x().
+ * \param scale Scale factor. 2, 3 or 4.
+ * \param void_dst Pointer at the first pixel of the destination bitmap.
+ * \param dst_slice Size in bytes of a destination bitmap row.
+ * \param void_src Pointer at the first pixel of the source bitmap.
+ * \param src_slice Size in bytes of a source bitmap row.
+ * \param pixel Bytes per pixel of the source and destination bitmap.
+ * \param width Horizontal size in pixels of the source bitmap.
+ * \param height Vertical size in pixels of the source bitmap.
+ */
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height)
+{
+ switch (scale) {
+ case 2 :
+ scale2x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ case 3 :
+ scale3x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ case 4 :
+ scale4x(void_dst, dst_slice, void_src, src_slice, pixel, width, height);
+ break;
+ }
+}
+
diff --git a/graphics/scaler/scalebit.h b/graphics/scaler/scalebit.h
new file mode 100644
index 0000000000..dd46883f97
--- /dev/null
+++ b/graphics/scaler/scalebit.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of the Scale2x project.
+ *
+ * Copyright (C) 2003 Andrea Mazzoleni
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file contains an example implementation of the Scale effect
+ * applyed to a generic bitmap.
+ *
+ * You can find an high level description of the effect at :
+ *
+ * http://scale2x.sourceforge.net/
+ *
+ * Alternatively at the previous license terms, you are allowed to use this
+ * code in your program with these conditions:
+ * - the program is not used in commercial activities.
+ * - the whole source code of the program is released with the binary.
+ * - derivative works of the program are allowed.
+ */
+
+#ifndef __SCALEBIT_H
+#define __SCALEBIT_H
+
+int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height);
+void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height);
+
+#endif
+
diff --git a/graphics/scaler/thumbnail.cpp b/graphics/scaler/thumbnail.cpp
new file mode 100644
index 0000000000..25e00363f1
--- /dev/null
+++ b/graphics/scaler/thumbnail.cpp
@@ -0,0 +1,194 @@
+/* ScummVM - Scumm Interpreter
+ * Copyright (C) 2001 Ludvig Strigeus
+ * Copyright (C) 2001-2006 The ScummVM project
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * $URL$
+ * $Id$
+ *
+ */
+
+#include "common/stdafx.h"
+#include "common/scummsys.h"
+#include "common/system.h"
+
+#include "common/scaler/intern.h"
+
+template<int bitFormat>
+uint16 quadBlockInterpolate(const uint8* src, uint32 srcPitch) {
+ uint16 colorx1y1 = *(((const uint16*)src));
+ uint16 colorx2y1 = *(((const uint16*)src) + 1);
+
+ uint16 colorx1y2 = *(((const uint16*)(src + srcPitch)));
+ uint16 colorx2y2 = *(((const uint16*)(src + srcPitch)) + 1);
+
+ return Q_INTERPOLATE<bitFormat>(colorx1y1, colorx2y1, colorx1y2, colorx2y2);
+}
+
+template<int bitFormat>
+void createThumbnail_2(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) {
+ assert(width % 2 == 0);
+ assert(height % 2 == 0);
+ for (int y = 0; y < height; y += 2) {
+ for (int x = 0; x < width; x += 2, dstPtr += 2) {
+ *((uint16*)dstPtr) = quadBlockInterpolate<bitFormat>(src + 2 * x, srcPitch);
+ }
+ dstPtr += (dstPitch - 2 * width / 2);
+ src += 2 * srcPitch;
+ }
+}
+
+template<int bitFormat>
+void createThumbnail_4(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) {
+ assert(width % 4 == 0);
+ assert(height % 4 == 0);
+ for (int y = 0; y < height; y += 4) {
+ for (int x = 0; x < width; x += 4, dstPtr += 2) {
+ uint16 upleft = quadBlockInterpolate<bitFormat>(src + 2 * x, srcPitch);
+ uint16 upright = quadBlockInterpolate<bitFormat>(src + 2 * (x + 2), srcPitch);
+ uint16 downleft = quadBlockInterpolate<bitFormat>(src + srcPitch * 2 + 2 * x, srcPitch);
+ uint16 downright = quadBlockInterpolate<bitFormat>(src + srcPitch * 2 + 2 * (x + 2), srcPitch);
+
+ *((uint16*)dstPtr) = Q_INTERPOLATE<bitFormat>(upleft, upright, downleft, downright);
+ }
+ dstPtr += (dstPitch - 2 * width / 4);
+ src += 4 * srcPitch;
+ }
+}
+
+#include "common/scaler.h"
+
+void createThumbnail(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) {
+ // only 1/2 and 1/4 downscale supported
+ if (width != 320 && width != 640)
+ return;
+
+ int downScaleMode = (width == 320) ? 2 : 4;
+
+ if (downScaleMode == 2) {
+ if (gBitFormat == 565)
+ createThumbnail_2<565>(src, srcPitch, dstPtr, dstPitch, width, height);
+ else if (gBitFormat == 555)
+ createThumbnail_2<555>(src, srcPitch, dstPtr, dstPitch, width, height);
+ } else if (downScaleMode == 4) {
+ if (gBitFormat == 565)
+ createThumbnail_4<565>(src, srcPitch, dstPtr, dstPitch, width, height);
+ else if (gBitFormat == 555)
+ createThumbnail_4<555>(src, srcPitch, dstPtr, dstPitch, width, height);
+ }
+}
+
+
+/**
+ * Copies the current screen contents to a new surface, using RGB565 format.
+ * WARNING: surf->free() must be called by the user to avoid leaking.
+ *
+ * @param surf the surfce to store the data in it
+ */
+static bool grabScreen565(Graphics::Surface *surf) {
+ Graphics::Surface screen;
+ if (!g_system->grabRawScreen(&screen))
+ return false;
+
+ assert(screen.bytesPerPixel == 1 && screen.pixels != 0);
+
+ byte palette[256 * 4];
+ g_system->grabPalette(&palette[0], 0, 256);
+
+ surf->create(screen.w, screen.h, 2);
+
+ for (uint y = 0; y < screen.h; ++y) {
+ for (uint x = 0; x < screen.w; ++x) {
+ byte r, g, b;
+ r = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4];
+ g = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4 + 1];
+ b = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4 + 2];
+
+ ((uint16*)surf->pixels)[y * surf->w + x] = (((r >> 3) & 0x1F) << 11) | (((g >> 2) & 0x3F) << 5) | ((b >> 3) & 0x1F);
+ }
+ }
+
+ screen.free();
+ return true;
+}
+
+bool createThumbnailFromScreen(Graphics::Surface* surf) {
+ assert(surf);
+
+ int screenWidth = g_system->getWidth();
+ int screenHeight = g_system->getHeight();
+
+ Graphics::Surface screen;
+
+ if (!grabScreen565(&screen))
+ return false;
+
+ uint16 width = screenWidth;
+
+ if (screenWidth < 320) {
+ // Special case to handle MM NES (uses a screen width of 256)
+ width = 320;
+
+ // center MM NES screen
+ Graphics::Surface newscreen;
+ newscreen.create(width, screen.h, screen.bytesPerPixel);
+
+ uint8 *dst = (uint8*)newscreen.getBasePtr((320 - screenWidth) / 2, 0);
+ uint8 *src = (uint8*)screen.getBasePtr(0, 0);
+ uint16 height = screen.h;
+
+ while (height--) {
+ memcpy(dst, src, screen.pitch);
+ dst += newscreen.pitch;
+ src += screen.pitch;
+ }
+
+ screen.free();
+ screen = newscreen;
+ } else if (screenWidth == 720) {
+ // Special case to handle Hercules mode
+ width = 640;
+ screenHeight = 400;
+
+ // cut off menu and so on..
+ Graphics::Surface newscreen;
+ newscreen.create(width, 400, screen.bytesPerPixel);
+
+ uint8 *dst = (uint8*)newscreen.getBasePtr(0, (400 - 240) / 2);
+ uint8 *src = (uint8*)screen.getBasePtr(41, 28);
+
+ for (int y = 0; y < 240; ++y) {
+ memcpy(dst, src, 640 * screen.bytesPerPixel);
+ dst += newscreen.pitch;
+ src += screen.pitch;
+ }
+
+ screen.free();
+ screen = newscreen;
+ }
+
+ uint16 newHeight = !(screenHeight % 240) ? kThumbnailHeight2 : kThumbnailHeight1;
+
+ int gBitFormatBackUp = gBitFormat;
+ gBitFormat = 565;
+ surf->create(kThumbnailWidth, newHeight, sizeof(uint16));
+ createThumbnail((const uint8*)screen.pixels, width * sizeof(uint16), (uint8*)surf->pixels, surf->pitch, width, screenHeight);
+ gBitFormat = gBitFormatBackUp;
+
+ screen.free();
+
+ return true;
+}