From 7335d348ba43c1deaf5e97f222959de3d3e30cbc Mon Sep 17 00:00:00 2001 From: Max Horn Date: Mon, 20 Feb 2006 20:29:02 +0000 Subject: Moved common/scaler* to graphics/ svn-id: r20797 --- graphics/module.mk | 36 +- graphics/scaler.cpp | 322 +++++ graphics/scaler.h | 84 ++ graphics/scaler/2xsai.cpp | 404 ++++++ graphics/scaler/aspect.cpp | 196 +++ graphics/scaler/hq2x.cpp | 175 +++ graphics/scaler/hq2x.h | 1977 +++++++++++++++++++++++++++ graphics/scaler/hq2x_i386.asm | 1842 +++++++++++++++++++++++++ graphics/scaler/hq3x.cpp | 176 +++ graphics/scaler/hq3x.h | 2951 +++++++++++++++++++++++++++++++++++++++++ graphics/scaler/hq3x_i386.asm | 2434 +++++++++++++++++++++++++++++++++ graphics/scaler/intern.h | 171 +++ graphics/scaler/scale2x.cpp | 521 ++++++++ graphics/scaler/scale2x.h | 56 + graphics/scaler/scale3x.cpp | 238 ++++ graphics/scaler/scale3x.h | 37 + graphics/scaler/scalebit.cpp | 357 +++++ graphics/scaler/scalebit.h | 43 + graphics/scaler/thumbnail.cpp | 194 +++ 19 files changed, 12209 insertions(+), 5 deletions(-) create mode 100644 graphics/scaler.cpp create mode 100644 graphics/scaler.h create mode 100644 graphics/scaler/2xsai.cpp create mode 100644 graphics/scaler/aspect.cpp create mode 100644 graphics/scaler/hq2x.cpp create mode 100644 graphics/scaler/hq2x.h create mode 100644 graphics/scaler/hq2x_i386.asm create mode 100644 graphics/scaler/hq3x.cpp create mode 100644 graphics/scaler/hq3x.h create mode 100644 graphics/scaler/hq3x_i386.asm create mode 100644 graphics/scaler/intern.h create mode 100644 graphics/scaler/scale2x.cpp create mode 100644 graphics/scaler/scale2x.h create mode 100644 graphics/scaler/scale3x.cpp create mode 100644 graphics/scaler/scale3x.h create mode 100644 graphics/scaler/scalebit.cpp create mode 100644 graphics/scaler/scalebit.h create mode 100644 graphics/scaler/thumbnail.cpp (limited to 'graphics') diff --git a/graphics/module.mk b/graphics/module.mk index 8c5697d5a1..7ded354cdb 100644 --- a/graphics/module.mk +++ b/graphics/module.mk @@ -6,16 +6,42 @@ MODULE_OBJS := \ font.o \ fontman.o \ ilbm.o \ - newfont.o \ + imagedec.o \ + imageman.o \ newfont_big.o \ + newfont.o \ primitives.o \ + scaler.o \ + scaler/thumbnail.o \ scummfont.o \ - surface.o \ - imageman.o \ - imagedec.o + surface.o + +ifndef DISABLE_SCALERS +MODULE_OBJS += \ + scaler/2xsai.o \ + scaler/aspect.o \ + scaler/scale2x.o \ + scaler/scale3x.o \ + scaler/scalebit.o + +ifndef DISABLE_HQ_SCALERS +MODULE_OBJS += \ + scaler/hq2x.o \ + scaler/hq3x.o + +ifdef HAVE_NASM +MODULE_OBJS += \ + scaler/hq2x_i386.o \ + scaler/hq3x_i386.o +endif + +endif + +endif MODULE_DIRS += \ - graphics + graphics \ + graphics/scaler # Include common rules include $(srcdir)/common.rules diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp new file mode 100644 index 0000000000..5364e39a2f --- /dev/null +++ b/graphics/scaler.cpp @@ -0,0 +1,322 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "graphics/scaler/intern.h" +#include "graphics/scaler/scalebit.h" +#include "common/util.h" + + +int gBitFormat = 565; + +#ifndef DISABLE_HQ_SCALERS +// RGB-to-YUV lookup table +extern "C" { + +#ifdef USE_NASM +// NOTE: if your compiler uses different mangled names, add another +// condition here + +#ifndef _WIN32 +#define RGBtoYUV _RGBtoYUV +#define LUT16to32 _LUT16to32 +#endif + +#endif + +// FIXME/TODO: The following two tables suck up 512 KB. +// They should at least be allocated on the heap, to reduce the size of the +// binary. +// +// Note: a memory lookup table is *not* necessarily faster than computing +// these things on the fly, because of its size. Both tables together, plus +// the code, plus the input/output GFX data, won't fit in the cache on many +// systems, so main memory has to be accessed, which is about the worst thing +// that can happen to code which tries to be fast... +// +// So we should think about ways to get these smaller / removed. The LUT16to32 +// is only used by the HQX asm right now; maybe somebody can modify the code +// there to work w/o it (and do some benchmarking, too?). To do that, just +// do the conversion on the fly, or even do w/o it (as the C++ code manages to), +// by making different versions of the code based on gBitFormat (or by writing +// bit masks into registers which are computed based on gBitFormat). +// +// RGBtoYUV is also used by the C(++) version of the HQX code. Maybe we can +// use the same technique which is employed by our MPEG code to reduce the +// size of the lookup tables at the cost of some additional computations? That +// might actually result in a speedup, too, if done right (and the code code +// might actually be suitable for AltiVec/MMX/SSE speedup). +// +// Of course, the above is largely a conjecture, and the actual speed +// differences are likely to vary a lot between different architectures and +// CPUs. +uint RGBtoYUVstorage[65536]; +uint *RGBtoYUV = RGBtoYUVstorage; +uint LUT16to32[65536]; +} +#endif + +static const uint16 dotmatrix_565[16] = { + 0x01E0, 0x0007, 0x3800, 0x0000, + 0x39E7, 0x0000, 0x39E7, 0x0000, + 0x3800, 0x0000, 0x01E0, 0x0007, + 0x39E7, 0x0000, 0x39E7, 0x0000 +}; +static const uint16 dotmatrix_555[16] = { + 0x00E0, 0x0007, 0x1C00, 0x0000, + 0x1CE7, 0x0000, 0x1CE7, 0x0000, + 0x1C00, 0x0000, 0x00E0, 0x0007, + 0x1CE7, 0x0000, 0x1CE7, 0x0000 +}; +static const uint16 *dotmatrix; + +static void InitLUT(uint32 BitFormat); + +void InitScalers(uint32 BitFormat) { + if (BitFormat == 565) { + dotmatrix = dotmatrix_565; + } else if (BitFormat == 555) { + dotmatrix = dotmatrix_555; + } else { + error("Unknown bit format %d", BitFormat); + } + + gBitFormat = BitFormat; + InitLUT(BitFormat); +} + +void InitLUT(uint32 BitFormat) { +#ifndef DISABLE_HQ_SCALERS + int r, g, b; + int Y, u, v; + int gInc, gShift; + + for (int i = 0; i < 65536; i++) { + LUT16to32[i] = ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3); + } + + if (BitFormat == 565) { + gInc = 256 >> 6; + gShift = 6 - 3; + } else { + gInc = 256 >> 5; + gShift = 5 - 3; + } + + for (r = 0; r < 256; r += 8) { + for (g = 0; g < 256; g += gInc) { + for (b = 0; b < 256; b += 8) { + Y = (r + g + b) >> 2; + u = 128 + ((r - b) >> 2); + v = 128 + ((-r + 2 * g - b) >> 3); + RGBtoYUV[ (r << (5 + gShift)) + (g << gShift) + (b >> 3) ] = (Y << 16) + (u << 8) + v; + } + } + } +#endif +} + +/** + * Trivial 'scaler' - in fact it doesn't do any scaling but just copies the + * source to the destionation. + */ +void Normal1x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + while (height--) { + memcpy(dstPtr, srcPtr, 2 * width); + srcPtr += srcPitch; + dstPtr += dstPitch; + } +} + +#ifndef DISABLE_SCALERS +/** + * Trivial nearest-neighbour 2x scaler. + */ +void Normal2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + uint8 *r; + + assert(((long)dstPtr & 3) == 0); + while (height--) { + r = dstPtr; + for (int i = 0; i < width; ++i, r += 4) { + uint32 color = *(((const uint16 *)srcPtr) + i); + + color |= color << 16; + + *(uint32 *)(r) = color; + *(uint32 *)(r + dstPitch) = color; + } + srcPtr += srcPitch; + dstPtr += dstPitch << 1; + } +} + +/** + * Trivial nearest-neighbour 3x scaler. + */ +void Normal3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + uint8 *r; + const uint32 dstPitch2 = dstPitch * 2; + const uint32 dstPitch3 = dstPitch * 3; + + assert(((long)dstPtr & 1) == 0); + while (height--) { + r = dstPtr; + for (int i = 0; i < width; ++i, r += 6) { + uint16 color = *(((const uint16 *)srcPtr) + i); + + *(uint16 *)(r + 0) = color; + *(uint16 *)(r + 2) = color; + *(uint16 *)(r + 4) = color; + *(uint16 *)(r + 0 + dstPitch) = color; + *(uint16 *)(r + 2 + dstPitch) = color; + *(uint16 *)(r + 4 + dstPitch) = color; + *(uint16 *)(r + 0 + dstPitch2) = color; + *(uint16 *)(r + 2 + dstPitch2) = color; + *(uint16 *)(r + 4 + dstPitch2) = color; + } + srcPtr += srcPitch; + dstPtr += dstPitch3; + } +} + +#define INTERPOLATE INTERPOLATE +#define Q_INTERPOLATE Q_INTERPOLATE + +/** + * Trivial nearest-neighbour 1.5x scaler. + */ +template +void Normal1o5xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + uint8 *r; + const uint32 dstPitch2 = dstPitch * 2; + const uint32 dstPitch3 = dstPitch * 3; + const uint32 srcPitch2 = srcPitch * 2; + + assert(((long)dstPtr & 1) == 0); + while (height > 0) { + r = dstPtr; + for (int i = 0; i < width; i += 2, r += 6) { + uint16 color0 = *(((const uint16 *)srcPtr) + i); + uint16 color1 = *(((const uint16 *)srcPtr) + i + 1); + uint16 color2 = *(((const uint16 *)(srcPtr + srcPitch)) + i); + uint16 color3 = *(((const uint16 *)(srcPtr + srcPitch)) + i + 1); + + *(uint16 *)(r + 0) = color0; + *(uint16 *)(r + 2) = INTERPOLATE(color0, color1); + *(uint16 *)(r + 4) = color1; + *(uint16 *)(r + 0 + dstPitch) = INTERPOLATE(color0, color2); + *(uint16 *)(r + 2 + dstPitch) = Q_INTERPOLATE(color0, color1, color2, color3); + *(uint16 *)(r + 4 + dstPitch) = INTERPOLATE(color1, color3); + *(uint16 *)(r + 0 + dstPitch2) = color2; + *(uint16 *)(r + 2 + dstPitch2) = INTERPOLATE(color2, color3); + *(uint16 *)(r + 4 + dstPitch2) = color3; + } + srcPtr += srcPitch2; + dstPtr += dstPitch3; + height -= 2; + } +} +MAKE_WRAPPER(Normal1o5x) + +/** + * The Scale2x filter, also known as AdvMame2x. + * See also http://scale2x.sourceforge.net + */ +void AdvMame2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + scale(2, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height); +} + +/** + * The Scale3x filter, also known as AdvMame3x. + * See also http://scale2x.sourceforge.net + */ +void AdvMame3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + scale(3, dstPtr, dstPitch, srcPtr - srcPitch, srcPitch, 2, width, height); +} + +template +void TV2xTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + const uint32 nextlineSrc = srcPitch / sizeof(uint16); + const uint16 *p = (const uint16 *)srcPtr; + + const uint32 nextlineDst = dstPitch / sizeof(uint16); + uint16 *q = (uint16 *)dstPtr; + + while (height--) { + for (int i = 0, j = 0; i < width; ++i, j += 2) { + uint16 p1 = *(p + i); + uint32 pi; + + pi = (((p1 & redblueMask) * 7) >> 3) & redblueMask; + pi |= (((p1 & greenMask) * 7) >> 3) & greenMask; + + *(q + j) = p1; + *(q + j + 1) = p1; + *(q + j + nextlineDst) = (uint16)pi; + *(q + j + nextlineDst + 1) = (uint16)pi; + } + p += nextlineSrc; + q += nextlineDst << 1; + } +} +MAKE_WRAPPER(TV2x) + +static inline uint16 DOT_16(uint16 c, int j, int i) { + return c - ((c >> 2) & *(dotmatrix + ((j & 3) << 2) + (i & 3))); +} + +// FIXME: This scaler doesn't quite work. Either it needs to know where on the +// screen it's drawing, or the dirty rects will have to be adjusted so that +// access to the dotmatrix array are made in a consistent way. (Doing that in +// a way that also works together with aspect-ratio correction is left as an +// exercise for the reader.) + +void DotMatrix(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, + int width, int height) { + const uint32 nextlineSrc = srcPitch / sizeof(uint16); + const uint16 *p = (const uint16 *)srcPtr; + + const uint32 nextlineDst = dstPitch / sizeof(uint16); + uint16 *q = (uint16 *)dstPtr; + + for (int j = 0, jj = 0; j < height; ++j, jj += 2) { + for (int i = 0, ii = 0; i < width; ++i, ii += 2) { + uint16 c = *(p + i); + *(q + ii) = DOT_16(c, jj, ii); + *(q + ii + 1) = DOT_16(c, jj, ii + 1); + *(q + ii + nextlineDst) = DOT_16(c, jj + 1, ii); + *(q + ii + nextlineDst + 1) = DOT_16(c, jj + 1, ii + 1); + } + p += nextlineSrc; + q += nextlineDst << 1; + } +} + +#endif diff --git a/graphics/scaler.h b/graphics/scaler.h new file mode 100644 index 0000000000..9f743216ac --- /dev/null +++ b/graphics/scaler.h @@ -0,0 +1,84 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2002-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + */ + +#ifndef COMMON_SCALER_H +#define COMMON_SCALER_H + +#include "common/stdafx.h" +#include "common/scummsys.h" +#include "graphics/surface.h" + +extern void InitScalers(uint32 BitFormat); + +typedef void ScalerProc(const uint8 *srcPtr, uint32 srcPitch, + uint8 *dstPtr, uint32 dstPitch, int width, int height); + +#define DECLARE_SCALER(x) \ + extern void x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, \ + uint32 dstPitch, int width, int height) + +DECLARE_SCALER(_2xSaI); +DECLARE_SCALER(Super2xSaI); +DECLARE_SCALER(SuperEagle); +DECLARE_SCALER(AdvMame2x); +DECLARE_SCALER(AdvMame3x); +DECLARE_SCALER(Normal1x); +DECLARE_SCALER(Normal2x); +DECLARE_SCALER(Normal3x); +DECLARE_SCALER(Normal1o5x); +DECLARE_SCALER(TV2x); +DECLARE_SCALER(DotMatrix); + +#ifndef DISABLE_HQ_SCALERS +DECLARE_SCALER(HQ2x); +DECLARE_SCALER(HQ3x); +#endif + +FORCEINLINE int real2Aspect(int y) { + return y + (y + 1) / 5; +} + +FORCEINLINE int aspect2Real(int y) { + return (y * 5 + 4) / 6; +} + +extern void makeRectStretchable(int &x, int &y, int &w, int &h); + +extern int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY); + +// creates a 160x100 thumbnail for 320x200 games +// and 160x120 thumbnail for 320x240 and 640x480 games +// only 565 mode +enum { + kThumbnailWidth = 160, + kThumbnailHeight1 = 100, + kThumbnailHeight2 = 120 +}; +extern void createThumbnail(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height); + +/** + * creates a thumbnail from the current screen (without overlay) + * @param surf a surface (will always have 16 bpp after this for now) + * @return false if a error occured + */ +extern bool createThumbnailFromScreen(Graphics::Surface* surf); + +#endif diff --git a/graphics/scaler/2xsai.cpp b/graphics/scaler/2xsai.cpp new file mode 100644 index 0000000000..c8e360083f --- /dev/null +++ b/graphics/scaler/2xsai.cpp @@ -0,0 +1,404 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "common/scaler/intern.h" + + + +static inline int GetResult(uint32 A, uint32 B, uint32 C, uint32 D) { + const bool ac = (A==C); + const bool bc = (B==C); + const int x1 = ac; + const int y1 = (bc & !ac); + const bool ad = (A==D); + const bool bd = (B==D); + const int x2 = ad; + const int y2 = (bd & !ad); + const int x = x1+x2; + const int y = y1+y2; + static const int rmap[3][3] = { + {0, 0, -1}, + {0, 0, -1}, + {1, 1, 0} + }; + return rmap[y][x]; +} + +#define INTERPOLATE INTERPOLATE +#define Q_INTERPOLATE Q_INTERPOLATE + +template +void Super2xSaITemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + const uint16 *bP; + uint16 *dP; + const uint32 nextlineSrc = srcPitch >> 1; + + while (height--) { + bP = (const uint16 *)srcPtr; + dP = (uint16 *)dstPtr; + + for (int i = 0; i < width; ++i) { + uint32 color4, color5, color6; + uint32 color1, color2, color3; + uint32 colorA0, colorA1, colorA2, colorA3; + uint32 colorB0, colorB1, colorB2, colorB3; + uint32 colorS1, colorS2; + uint32 product1a, product1b, product2a, product2b; + +//--------------------------------------- B1 B2 +// 4 5 6 S2 +// 1 2 3 S1 +// A1 A2 + + colorB0 = *(bP - nextlineSrc - 1); + colorB1 = *(bP - nextlineSrc); + colorB2 = *(bP - nextlineSrc + 1); + colorB3 = *(bP - nextlineSrc + 2); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + nextlineSrc - 1); + color2 = *(bP + nextlineSrc); + color3 = *(bP + nextlineSrc + 1); + colorS1 = *(bP + nextlineSrc + 2); + + colorA0 = *(bP + 2 * nextlineSrc - 1); + colorA1 = *(bP + 2 * nextlineSrc); + colorA2 = *(bP + 2 * nextlineSrc + 1); + colorA3 = *(bP + 2 * nextlineSrc + 2); + +//-------------------------------------- + if (color2 == color6 && color5 != color3) { + product2b = product1b = color2; + } else if (color5 == color3 && color2 != color6) { + product2b = product1b = color5; + } else if (color5 == color3 && color2 == color6) { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) + product2b = product1b = color6; + else if (r < 0) + product2b = product1b = color5; + else { + product2b = product1b = INTERPOLATE(color5, color6); + } + } else { + if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0) + product2b = Q_INTERPOLATE(color3, color3, color3, color2); + else if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3) + product2b = Q_INTERPOLATE(color2, color2, color2, color3); + else + product2b = INTERPOLATE(color2, color3); + + if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0) + product1b = Q_INTERPOLATE(color6, color6, color6, color5); + else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3) + product1b = Q_INTERPOLATE(color6, color5, color5, color5); + else + product1b = INTERPOLATE(color5, color6); + } + + if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2) + product2a = INTERPOLATE(color2, color5); + else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0) + product2a = INTERPOLATE(color2, color5); + else + product2a = color2; + + if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2) + product1a = INTERPOLATE(color2, color5); + else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0) + product1a = INTERPOLATE(color2, color5); + else + product1a = color5; + + *(dP + 0) = (uint16) product1a; + *(dP + 1) = (uint16) product1b; + *(dP + dstPitch/2 + 0) = (uint16) product2a; + *(dP + dstPitch/2 + 1) = (uint16) product2b; + + bP += 1; + dP += 2; + } + + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + } +} + +MAKE_WRAPPER(Super2xSaI) + +template +void SuperEagleTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + const uint16 *bP; + uint16 *dP; + const uint32 nextlineSrc = srcPitch >> 1; + + while (height--) { + bP = (const uint16 *)srcPtr; + dP = (uint16 *)dstPtr; + for (int i = 0; i < width; ++i) { + uint32 color4, color5, color6; + uint32 color1, color2, color3; + uint32 colorA1, colorA2, colorB1, colorB2, colorS1, colorS2; + uint32 product1a, product1b, product2a, product2b; + + colorB1 = *(bP - nextlineSrc); + colorB2 = *(bP - nextlineSrc + 1); + + color4 = *(bP - 1); + color5 = *(bP); + color6 = *(bP + 1); + colorS2 = *(bP + 2); + + color1 = *(bP + nextlineSrc - 1); + color2 = *(bP + nextlineSrc); + color3 = *(bP + nextlineSrc + 1); + colorS1 = *(bP + nextlineSrc + 2); + + colorA1 = *(bP + 2 * nextlineSrc); + colorA2 = *(bP + 2 * nextlineSrc + 1); + + // -------------------------------------- + if (color5 != color3) { + if (color2 == color6) { + product1b = product2a = color2; + if ((color1 == color2) || (color6 == colorB2)) { + product1a = INTERPOLATE(color2, color5); + product1a = INTERPOLATE(color2, product1a); + } else { + product1a = INTERPOLATE(color5, color6); + } + + if ((color6 == colorS2) || (color2 == colorA1)) { + product2b = INTERPOLATE(color2, color3); + product2b = INTERPOLATE(color2, product2b); + } else { + product2b = INTERPOLATE(color2, color3); + } + } else { + product2b = product1a = INTERPOLATE(color2, color6); + product2b = Q_INTERPOLATE(color3, color3, color3, product2b); + product1a = Q_INTERPOLATE(color5, color5, color5, product1a); + + product2a = product1b = INTERPOLATE(color5, color3); + product2a = Q_INTERPOLATE(color2, color2, color2, product2a); + product1b = Q_INTERPOLATE(color6, color6, color6, product1b); + } + } else { + if (color2 != color6) { + product2b = product1a = color5; + + if ((colorB1 == color5) || (color3 == colorS1)) { + product1b = INTERPOLATE(color5, color6); + product1b = INTERPOLATE(color5, product1b); + } else { + product1b = INTERPOLATE(color5, color6); + } + + if ((color3 == colorA2) || (color4 == color5)) { + product2a = INTERPOLATE(color5, color2); + product2a = INTERPOLATE(color5, product2a); + } else { + product2a = INTERPOLATE(color2, color3); + } + } else { + register int r = 0; + + r += GetResult(color6, color5, color1, colorA1); + r += GetResult(color6, color5, color4, colorB1); + r += GetResult(color6, color5, colorA2, colorS1); + r += GetResult(color6, color5, colorB2, colorS2); + + if (r > 0) { + product1b = product2a = color2; + product1a = product2b = INTERPOLATE(color5, color6); + } else if (r < 0) { + product2b = product1a = color5; + product1b = product2a = INTERPOLATE(color5, color6); + } else { + product2b = product1a = color5; + product1b = product2a = color2; + } + } + } + + *(dP + 0) = (uint16) product1a; + *(dP + 1) = (uint16) product1b; + *(dP + dstPitch/2 + 0) = (uint16) product2a; + *(dP + dstPitch/2 + 1) = (uint16) product2b; + + bP += 1; + dP += 2; + } + + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + } +} + +MAKE_WRAPPER(SuperEagle) + +template +void _2xSaITemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + const uint16 *bP; + uint16 *dP; + const uint32 nextlineSrc = srcPitch >> 1; + + while (height--) { + bP = (const uint16 *)srcPtr; + dP = (uint16 *)dstPtr; + + for (int i = 0; i < width; ++i) { + + register uint32 colorA, colorB; + uint32 colorC, colorD, + colorE, colorF, colorG, colorH, colorI, colorJ, colorK, colorL, colorM, colorN, colorO, colorP; + uint32 product, product1, product2; + +//--------------------------------------- +// Map of the pixels: I|E F|J +// G|A B|K +// H|C D|L +// M|N O|P + colorI = *(bP - nextlineSrc - 1); + colorE = *(bP - nextlineSrc); + colorF = *(bP - nextlineSrc + 1); + colorJ = *(bP - nextlineSrc + 2); + + colorG = *(bP - 1); + colorA = *(bP); + colorB = *(bP + 1); + colorK = *(bP + 2); + + colorH = *(bP + nextlineSrc - 1); + colorC = *(bP + nextlineSrc); + colorD = *(bP + nextlineSrc + 1); + colorL = *(bP + nextlineSrc + 2); + + colorM = *(bP + 2 * nextlineSrc - 1); + colorN = *(bP + 2 * nextlineSrc); + colorO = *(bP + 2 * nextlineSrc + 1); + colorP = *(bP + 2 * nextlineSrc + 2); + + if ((colorA == colorD) && (colorB != colorC)) { + if (((colorA == colorE) && (colorB == colorL)) || + ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))) { + product = colorA; + } else { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorA == colorG) && (colorC == colorO)) || + ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))) { + product1 = colorA; + } else { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorA; + } else if ((colorB == colorC) && (colorA != colorD)) { + if (((colorB == colorF) && (colorA == colorH)) || + ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))) { + product = colorB; + } else { + product = INTERPOLATE(colorA, colorB); + } + + if (((colorC == colorH) && (colorA == colorF)) || + ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))) { + product1 = colorC; + } else { + product1 = INTERPOLATE(colorA, colorC); + } + product2 = colorB; + } else if ((colorA == colorD) && (colorB == colorC)) { + if (colorA == colorB) { + product = colorA; + product1 = colorA; + product2 = colorA; + } else { + register int r = 0; + + product1 = INTERPOLATE(colorA, colorC); + product = INTERPOLATE(colorA, colorB); + + r += GetResult(colorA, colorB, colorG, colorE); + r -= GetResult(colorB, colorA, colorK, colorF); + r -= GetResult(colorB, colorA, colorH, colorN); + r += GetResult(colorA, colorB, colorL, colorO); + + if (r > 0) + product2 = colorA; + else if (r < 0) + product2 = colorB; + else { + product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD); + } + } + } else { + product2 = Q_INTERPOLATE(colorA, colorB, colorC, colorD); + + if ((colorA == colorC) && (colorA == colorF) + && (colorB != colorE) && (colorB == colorJ)) { + product = colorA; + } else if ((colorB == colorE) && (colorB == colorD) + && (colorA != colorF) && (colorA == colorI)) { + product = colorB; + } else { + product = INTERPOLATE(colorA, colorB); + } + + if ((colorA == colorB) && (colorA == colorH) + && (colorG != colorC) && (colorC == colorM)) { + product1 = colorA; + } else if ((colorC == colorG) && (colorC == colorD) + && (colorA != colorH) && (colorA == colorI)) { + product1 = colorC; + } else { + product1 = INTERPOLATE(colorA, colorC); + } + } + + *(dP + 0) = (uint16) colorA; + *(dP + 1) = (uint16) product; + *(dP + dstPitch/2 + 0) = (uint16) product1; + *(dP + dstPitch/2 + 1) = (uint16) product2; + + bP += 1; + dP += 2; + } + + srcPtr += srcPitch; + dstPtr += dstPitch * 2; + } +} + +MAKE_WRAPPER(_2xSaI) diff --git a/graphics/scaler/aspect.cpp b/graphics/scaler/aspect.cpp new file mode 100644 index 0000000000..6e55236850 --- /dev/null +++ b/graphics/scaler/aspect.cpp @@ -0,0 +1,196 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "common/scaler/intern.h" +#include "common/scaler.h" + + +#define kVeryFastAndUglyAspectMode 0 // No interpolation at all, but super-fast +#define kFastAndNiceAspectMode 1 // Quite good quality with good speed +#define kSlowAndPerfectAspectMode 2 // Accurate but slow code + +#define ASPECT_MODE kFastAndNiceAspectMode + + +#if ASPECT_MODE == kSlowAndPerfectAspectMode + +template +static inline uint16 interpolate5(uint16 A, uint16 B) { + uint16 r = (uint16)(((A & redblueMask & 0xFF00) * scale + (B & redblueMask & 0xFF00) * (5 - scale)) / 5); + uint16 g = (uint16)(((A & greenMask) * scale + (B & greenMask) * (5 - scale)) / 5); + uint16 b = (uint16)(((A & redblueMask & 0x00FF) * scale + (B & redblueMask & 0x00FF) * (5 - scale)) / 5); + + return (uint16)((r & redblueMask & 0xFF00) | (g & greenMask) | (b & redblueMask & 0x00FF)); +} + + +template +static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) { + // Accurate but slightly slower code + while (width--) { + *dst++ = interpolate5(*srcA++, *srcB++); + } +} +#endif + +#if ASPECT_MODE == kFastAndNiceAspectMode + +template +static inline uint32 INTERPOLATE_1_1(uint32 A, uint32 B) { + return (((A & highBits) >> 1) + ((B & highBits) >> 1) + (A & B & lowBits)); +} + +template +static inline uint32 INTERPOLATE_1_3(uint32 A, uint32 B) { + register uint32 x = ((A & qhighBits) >> 2) + ((B & qhighBits) >> 2) * 3; + register uint32 y = ((A & qlowBits) + (B & qlowBits) * 3) >> 2; + + y &= qlowBits; + return x + y; +} + +template +static inline void interpolate5Line(uint16 *dst, const uint16 *srcA, const uint16 *srcB, int width) { + // For efficiency reasons we blit two pixels at a time, so it is important + // that makeRectStretchable() guarantees that the width is even and that + // the rect starts on a well-aligned address. (Even where unaligned memory + // access is allowed there may be a speed penalty for it.) + + // These asserts are disabled for maximal speed; but I leave them in here + // in case other people want to test if the memory alignment (to an + // address divisible by 4) is really working properly. + //assert(((int)dst & 3) == 0); + //assert(((int)srcA & 3) == 0); + //assert(((int)srcB & 3) == 0); + //assert((width & 1) == 0); + + width /= 2; + const uint32 *sA = (const uint32 *)srcA; + const uint32 *sB = (const uint32 *)srcB; + uint32 *d = (uint32 *)dst; + if (scale == 1) { + while (width--) { + *d++ = INTERPOLATE_1_3(*sA++, *sB++); + } + } else { + while (width--) { + *d++ = INTERPOLATE_1_1(*sA++, *sB++); + } + } +} +#endif + +void makeRectStretchable(int &x, int &y, int &w, int &h) { +#if ASPECT_MODE != kVeryFastAndUglyAspectMode + int m = real2Aspect(y) % 6; + + // Ensure that the rect will start on a line that won't have its + // colours changed by the stretching function. + if (m != 0 && m != 5) { + y -= m; + h += m; + } + + #if ASPECT_MODE == kFastAndNiceAspectMode + // Force x to be even, to ensure aligned memory access (this assumes + // that each line starts at an even memory location, but that should + // be the case on every target anyway). + if (x & 1) { + x--; + w++; + } + + // Finally force the width to be even, since we blit 2 pixels at a time. + // While this means we may sometimes blit one column more than necessary, + // this should actually be faster than having the check for the + if (w & 1) + w++; + #endif +#endif +} + +/** + * Stretch a 16bpp image vertically by factor 1.2. Used to correct the + * aspect-ratio in games using 320x200 pixel graphics with non-qudratic + * pixels. Applying this method effectively turns that into 320x240, which + * provides the correct aspect-ratio on modern displays. + * + * The image would normally have occupied y coordinates origSrcY through + * origSrcY + height - 1. + * + * However, we have already placed it at srcY - the aspect-corrected y + * coordinate - to allow in-place stretching. + * + * Therefore, the source image now occupies Y coordinates srcY through + * srcY + height - 1, and it should be stretched to Y coordinates srcY + * through real2Aspect(srcY + height - 1). + */ +template +int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY) { + int maxDstY = real2Aspect(origSrcY + height - 1); + int y; + const uint8 *startSrcPtr = buf + srcX * 2 + (srcY - origSrcY) * pitch; + uint8 *dstPtr = buf + srcX * 2 + maxDstY * pitch; + + for (y = maxDstY; y >= srcY; y--) { + const uint8 *srcPtr = startSrcPtr + aspect2Real(y) * pitch; + +#if ASPECT_MODE == kVeryFastAndUglyAspectMode + if (srcPtr == dstPtr) + break; + memcpy(dstPtr, srcPtr, width * 2); +#else + // Bilinear filter + switch (y % 6) { + case 0: + case 5: + if (srcPtr != dstPtr) + memcpy(dstPtr, srcPtr, width * 2); + break; + case 1: + interpolate5Line((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width); + break; + case 2: + interpolate5Line((uint16 *)dstPtr, (const uint16 *)(srcPtr - pitch), (const uint16 *)srcPtr, width); + break; + case 3: + interpolate5Line((uint16 *)dstPtr, (const uint16 *)srcPtr, (const uint16 *)(srcPtr - pitch), width); + break; + case 4: + interpolate5Line((uint16 *)dstPtr, (const uint16 *)srcPtr, (const uint16 *)(srcPtr - pitch), width); + break; + } +#endif + dstPtr -= pitch; + } + + return 1 + maxDstY - srcY; +} + +int stretch200To240(uint8 *buf, uint32 pitch, int width, int height, int srcX, int srcY, int origSrcY) { + if (gBitFormat == 565) + return stretch200To240<565>(buf, pitch, width, height, srcX, srcY, origSrcY); + else // gBitFormat == 555 + return stretch200To240<555>(buf, pitch, width, height, srcX, srcY, origSrcY); +} + diff --git a/graphics/scaler/hq2x.cpp b/graphics/scaler/hq2x.cpp new file mode 100644 index 0000000000..cd15e7f97b --- /dev/null +++ b/graphics/scaler/hq2x.cpp @@ -0,0 +1,175 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "common/scaler/intern.h" + +#ifdef USE_NASM +// Assembly version of HQ2x + +extern "C" { + +#ifndef _WIN32 +#define hq2x_16 _hq2x_16 +#endif + +void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32); + +} + +void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch); +} + +#else + +#ifdef HAS_ALTIVEC + +#ifdef __amigaos4__ +#include +#include +static bool isAltiVecAvailable() { + uint32 vecUnit; + IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE); + if (vecUnit == VECTORTYPE_NONE) + return false; + else + return true; +} +#else + +#include + +static bool isAltiVecAvailable() { + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int hasVectorUnit = 0; + size_t length = sizeof(hasVectorUnit); + int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); + if ( 0 == error ) + return hasVectorUnit != 0; + return false; +} +#endif +#endif + +#define PIXEL00_0 *(q) = w5; +#define PIXEL00_10 *(q) = interpolate16_2(w5, w1); +#define PIXEL00_11 *(q) = interpolate16_2(w5, w4); +#define PIXEL00_12 *(q) = interpolate16_2(w5, w2); +#define PIXEL00_20 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_21 *(q) = interpolate16_3(w5, w1, w2); +#define PIXEL00_22 *(q) = interpolate16_3(w5, w1, w4); +#define PIXEL00_60 *(q) = interpolate16_3(w5, w2, w4); +#define PIXEL00_61 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_70 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_90 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_100 *(q) = interpolate16_3(w5, w4, w2); + +#define PIXEL01_0 *(q+1) = w5; +#define PIXEL01_10 *(q+1) = interpolate16_2(w5, w3); +#define PIXEL01_11 *(q+1) = interpolate16_2(w5, w2); +#define PIXEL01_12 *(q+1) = interpolate16_2(w5, w6); +#define PIXEL01_20 *(q+1) = interpolate16_3(w5, w2, w6); +#define PIXEL01_21 *(q+1) = interpolate16_3(w5, w3, w6); +#define PIXEL01_22 *(q+1) = interpolate16_3(w5, w3, w2); +#define PIXEL01_60 *(q+1) = interpolate16_3(w5, w6, w2); +#define PIXEL01_61 *(q+1) = interpolate16_3(w5, w2, w6); +#define PIXEL01_70 *(q+1) = interpolate16_3(w5, w2, w6); +#define PIXEL01_90 *(q+1) = interpolate16_3(w5, w2, w6); +#define PIXEL01_100 *(q+1) = interpolate16_3(w5, w2, w6); + +#define PIXEL10_0 *(q+nextlineDst) = w5; +#define PIXEL10_10 *(q+nextlineDst) = interpolate16_2(w5, w7); +#define PIXEL10_11 *(q+nextlineDst) = interpolate16_2(w5, w8); +#define PIXEL10_12 *(q+nextlineDst) = interpolate16_2(w5, w4); +#define PIXEL10_20 *(q+nextlineDst) = interpolate16_3(w5, w8, w4); +#define PIXEL10_21 *(q+nextlineDst) = interpolate16_3(w5, w7, w4); +#define PIXEL10_22 *(q+nextlineDst) = interpolate16_3(w5, w7, w8); +#define PIXEL10_60 *(q+nextlineDst) = interpolate16_3(w5, w4, w8); +#define PIXEL10_61 *(q+nextlineDst) = interpolate16_3(w5, w8, w4); +#define PIXEL10_70 *(q+nextlineDst) = interpolate16_3(w5, w8, w4); +#define PIXEL10_90 *(q+nextlineDst) = interpolate16_3(w5, w8, w4); +#define PIXEL10_100 *(q+nextlineDst) = interpolate16_3(w5, w8, w4); + +#define PIXEL11_0 *(q+1+nextlineDst) = w5; +#define PIXEL11_10 *(q+1+nextlineDst) = interpolate16_2(w5, w9); +#define PIXEL11_11 *(q+1+nextlineDst) = interpolate16_2(w5, w6); +#define PIXEL11_12 *(q+1+nextlineDst) = interpolate16_2(w5, w8); +#define PIXEL11_20 *(q+1+nextlineDst) = interpolate16_3(w5, w6, w8); +#define PIXEL11_21 *(q+1+nextlineDst) = interpolate16_3(w5, w9, w8); +#define PIXEL11_22 *(q+1+nextlineDst) = interpolate16_3(w5, w9, w6); +#define PIXEL11_60 *(q+1+nextlineDst) = interpolate16_3(w5, w8, w6); +#define PIXEL11_61 *(q+1+nextlineDst) = interpolate16_3(w5, w6, w8); +#define PIXEL11_70 *(q+1+nextlineDst) = interpolate16_3(w5, w6, w8); +#define PIXEL11_90 *(q+1+nextlineDst) = interpolate16_3(w5, w6, w8); +#define PIXEL11_100 *(q+1+nextlineDst) = interpolate16_3(w5, w6, w8); + +#define YUV(x) RGBtoYUV[w ## x] + + +#define bitFormat 565 +void HQ2x_565(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq2x.h" +} +#undef bitFormat + +#define bitFormat 555 +void HQ2x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq2x.h" +} +#undef bitFormat + + +#ifdef HAS_ALTIVEC + #define USE_ALTIVEC 1 + + #define bitFormat 565 + void HQ2x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq2x.h" + } + #undef bitFormat + + #define bitFormat 555 + void HQ2x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq2x.h" + } + #undef bitFormat +#endif + +void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { +#ifdef HAS_ALTIVEC + if (isAltiVecAvailable()) { + if (gBitFormat == 565) + HQ2x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + else + HQ2x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + return; + } +#endif + + if (gBitFormat == 565) + HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + else + HQ2x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height); +} + +#endif //Assembly version diff --git a/graphics/scaler/hq2x.h b/graphics/scaler/hq2x.h new file mode 100644 index 0000000000..a59f108048 --- /dev/null +++ b/graphics/scaler/hq2x.h @@ -0,0 +1,1977 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +/* + * The HQ2x high quality 2x graphics filter. + * Original author Maxim Stepin (see http://www.hiend3d.com/hq2x.html). + * Adapted for ScummVM to 16 bit output and optimized by Max Horn. + */ + + register int w1, w2, w3, w4, w5, w6, w7, w8, w9; + + const uint32 nextlineSrc = srcPitch / sizeof(uint16); + const uint16 *p = (const uint16 *)srcPtr; + + const uint32 nextlineDst = dstPitch / sizeof(uint16); + uint16 *q = (uint16 *)dstPtr; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + +#ifdef USE_ALTIVEC + // The YUV threshold. + static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706); + + // Bit pattern mask. + static const vector signed int vPatternMask1 = (vector signed int)(0x01, 0x02, 0x04, 0x08); + static const vector signed int vPatternMask2 = (vector signed int)(0x10, 0x20, 0x40, 0x80); + + // Permutation masks for the incremental vector loading (see below for more information). + static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19); + static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31); + + // The YUV vectors. + vector signed char vecYUV5555; + vector signed char vecYUV1234; + vector signed char vecYUV6789; +#endif + + while (height--) { + w1 = *(p - 1 - nextlineSrc); + w4 = *(p - 1); + w7 = *(p - 1 + nextlineSrc); + + w2 = *(p - nextlineSrc); + w5 = *(p); + w8 = *(p + nextlineSrc); + +#ifdef USE_ALTIVEC + // Load inital values of vecYUV1234 / vecYUV6789 + const int arr1234[4] = {0, YUV(1), YUV(2), 0}; + const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)}; + + vecYUV1234 = *(const vector signed char *)arr1234; + vecYUV6789 = *(const vector signed char *)arr6789; +#endif + + int tmpWidth = width; + while (tmpWidth--) { + p++; + + w3 = *(p - nextlineSrc); + w6 = *(p); + w9 = *(p + nextlineSrc); + + int pattern = 0; + +#ifdef USE_ALTIVEC + /* + Consider this peephole into the image buffer: + +----+----+----+----+ + | | | | | + | w00| w01| w02| w03| + +----+----+----+----+ + | | | | | + | w10| w11| w12| w13| + +----+----+----+----+ + | | | | | + | w20| w21| w22| w23| + +----+----+----+----+ + + In the previous loop iteration, w11 was the center point, and our + vectors contain the following data from the previous iteration: + vecYUV5555 = { w11, w11, w11, w11 } + vecYUV1234 = { w00, w01, w02, w10 } + vecYUV6789 = { w12, w20, w21, w22 } + + Now we have the new center point w12, and we would like to have + the following values in our vectors: + vecYUV5555 = { w12, w12, w12, w12 } + vecYUV1234 = { w01, w02, w03, w11 } + vecYUV6789 = { w13, w21, w22, w23 } + + To this end we load a single new vector: + vTmp = { w11, w03, w13, w23 } + + We then can compute all the new vector values using permutations only: + vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] } + vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] } + vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] } + + Beautiful, isn't it? :-) + */ + + // Load the new values into a temporary vector (see above for an explanation) + const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)}; + vector signed char vTmp = *(const vector signed char *)tmpArr; + + // Next update the data vectors + vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0); + vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234); + vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789); + + // Compute the absolute difference between the center point's YUV and the outer points + const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234)); + const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789)); + + // Compare the difference to the threshold (byte-wise) + const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold); + const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold); + + // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0. + // Then and in the patter masks. The idea is that for 0 components, we get 0, + // while for the other components we get exactly the mask value. + const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1); + const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2); + + // Now sum up the components of all vectors. Since our pattern mask values + // are all "orthogonal", this is effectively the same as ORing them all + // together. In the end, the rightmost word of vSum contains the 'pattern' + vector signed int vSum = vec_sums(vPattern1, (vector signed int)0); + vSum = vec_sums(vPattern2, vSum); + pattern = ((int *)&vSum)[3]; +#else + const int yuv5 = YUV(5); + if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; + if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; + if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; + if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; + if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; + if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; + if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; + if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; +#endif + + switch (pattern) { + case 0: + case 1: + case 4: + case 32: + case 128: + case 5: + case 132: + case 160: + case 33: + case 129: + case 36: + case 133: + case 164: + case 161: + case 37: + case 165: + PIXEL00_20 + PIXEL01_20 + PIXEL10_20 + PIXEL11_20 + break; + case 2: + case 34: + case 130: + case 162: + PIXEL00_22 + PIXEL01_21 + PIXEL10_20 + PIXEL11_20 + break; + case 16: + case 17: + case 48: + case 49: + PIXEL00_20 + PIXEL01_22 + PIXEL10_20 + PIXEL11_21 + break; + case 64: + case 65: + case 68: + case 69: + PIXEL00_20 + PIXEL01_20 + PIXEL10_21 + PIXEL11_22 + break; + case 8: + case 12: + case 136: + case 140: + PIXEL00_21 + PIXEL01_20 + PIXEL10_22 + PIXEL11_20 + break; + case 3: + case 35: + case 131: + case 163: + PIXEL00_11 + PIXEL01_21 + PIXEL10_20 + PIXEL11_20 + break; + case 6: + case 38: + case 134: + case 166: + PIXEL00_22 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + break; + case 20: + case 21: + case 52: + case 53: + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_21 + break; + case 144: + case 145: + case 176: + case 177: + PIXEL00_20 + PIXEL01_22 + PIXEL10_20 + PIXEL11_12 + break; + case 192: + case 193: + case 196: + case 197: + PIXEL00_20 + PIXEL01_20 + PIXEL10_21 + PIXEL11_11 + break; + case 96: + case 97: + case 100: + case 101: + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_22 + break; + case 40: + case 44: + case 168: + case 172: + PIXEL00_21 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + break; + case 9: + case 13: + case 137: + case 141: + PIXEL00_12 + PIXEL01_20 + PIXEL10_22 + PIXEL11_20 + break; + case 18: + case 50: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_20 + } + PIXEL10_20 + PIXEL11_21 + break; + case 80: + case 81: + PIXEL00_20 + PIXEL01_22 + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_20 + } + break; + case 72: + case 76: + PIXEL00_21 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 10: + case 138: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_20 + } + PIXEL01_21 + PIXEL10_22 + PIXEL11_20 + break; + case 66: + PIXEL00_22 + PIXEL01_21 + PIXEL10_21 + PIXEL11_22 + break; + case 24: + PIXEL00_21 + PIXEL01_22 + PIXEL10_22 + PIXEL11_21 + break; + case 7: + case 39: + case 135: + PIXEL00_11 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + break; + case 148: + case 149: + case 180: + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_12 + break; + case 224: + case 228: + case 225: + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_11 + break; + case 41: + case 169: + case 45: + PIXEL00_12 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + break; + case 22: + case 54: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_20 + PIXEL11_21 + break; + case 208: + case 209: + PIXEL00_20 + PIXEL01_22 + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 104: + case 108: + PIXEL00_21 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 11: + case 139: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_21 + PIXEL10_22 + PIXEL11_20 + break; + case 19: + case 51: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_11 + PIXEL01_10 + } else { + PIXEL00_60 + PIXEL01_90 + } + PIXEL10_20 + PIXEL11_21 + break; + case 146: + case 178: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + PIXEL11_12 + } else { + PIXEL01_90 + PIXEL11_61 + } + PIXEL10_20 + break; + case 84: + case 85: + PIXEL00_20 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL01_11 + PIXEL11_10 + } else { + PIXEL01_60 + PIXEL11_90 + } + PIXEL10_21 + break; + case 112: + case 113: + PIXEL00_20 + PIXEL01_22 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL10_12 + PIXEL11_10 + } else { + PIXEL10_61 + PIXEL11_90 + } + break; + case 200: + case 204: + PIXEL00_21 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + PIXEL11_11 + } else { + PIXEL10_90 + PIXEL11_60 + } + break; + case 73: + case 77: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_12 + PIXEL10_10 + } else { + PIXEL00_61 + PIXEL10_90 + } + PIXEL01_20 + PIXEL11_22 + break; + case 42: + case 170: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + PIXEL10_11 + } else { + PIXEL00_90 + PIXEL10_60 + } + PIXEL01_21 + PIXEL11_20 + break; + case 14: + case 142: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + PIXEL01_12 + } else { + PIXEL00_90 + PIXEL01_61 + } + PIXEL10_22 + PIXEL11_20 + break; + case 67: + PIXEL00_11 + PIXEL01_21 + PIXEL10_21 + PIXEL11_22 + break; + case 70: + PIXEL00_22 + PIXEL01_12 + PIXEL10_21 + PIXEL11_22 + break; + case 28: + PIXEL00_21 + PIXEL01_11 + PIXEL10_22 + PIXEL11_21 + break; + case 152: + PIXEL00_21 + PIXEL01_22 + PIXEL10_22 + PIXEL11_12 + break; + case 194: + PIXEL00_22 + PIXEL01_21 + PIXEL10_21 + PIXEL11_11 + break; + case 98: + PIXEL00_22 + PIXEL01_21 + PIXEL10_12 + PIXEL11_22 + break; + case 56: + PIXEL00_21 + PIXEL01_22 + PIXEL10_11 + PIXEL11_21 + break; + case 25: + PIXEL00_12 + PIXEL01_22 + PIXEL10_22 + PIXEL11_21 + break; + case 26: + case 31: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_22 + PIXEL11_21 + break; + case 82: + case 214: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 88: + case 248: + PIXEL00_21 + PIXEL01_22 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 74: + case 107: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_21 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 27: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_10 + PIXEL10_22 + PIXEL11_21 + break; + case 86: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_21 + PIXEL11_10 + break; + case 216: + PIXEL00_21 + PIXEL01_22 + PIXEL10_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 106: + PIXEL00_10 + PIXEL01_21 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 30: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_22 + PIXEL11_21 + break; + case 210: + PIXEL00_22 + PIXEL01_10 + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 120: + PIXEL00_21 + PIXEL01_22 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_10 + break; + case 75: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_21 + PIXEL10_10 + PIXEL11_22 + break; + case 29: + PIXEL00_12 + PIXEL01_11 + PIXEL10_22 + PIXEL11_21 + break; + case 198: + PIXEL00_22 + PIXEL01_12 + PIXEL10_21 + PIXEL11_11 + break; + case 184: + PIXEL00_21 + PIXEL01_22 + PIXEL10_11 + PIXEL11_12 + break; + case 99: + PIXEL00_11 + PIXEL01_21 + PIXEL10_12 + PIXEL11_22 + break; + case 57: + PIXEL00_12 + PIXEL01_22 + PIXEL10_11 + PIXEL11_21 + break; + case 71: + PIXEL00_11 + PIXEL01_12 + PIXEL10_21 + PIXEL11_22 + break; + case 156: + PIXEL00_21 + PIXEL01_11 + PIXEL10_22 + PIXEL11_12 + break; + case 226: + PIXEL00_22 + PIXEL01_21 + PIXEL10_12 + PIXEL11_11 + break; + case 60: + PIXEL00_21 + PIXEL01_11 + PIXEL10_11 + PIXEL11_21 + break; + case 195: + PIXEL00_11 + PIXEL01_21 + PIXEL10_21 + PIXEL11_11 + break; + case 102: + PIXEL00_22 + PIXEL01_12 + PIXEL10_12 + PIXEL11_22 + break; + case 153: + PIXEL00_12 + PIXEL01_22 + PIXEL10_22 + PIXEL11_12 + break; + case 58: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_11 + PIXEL11_21 + break; + case 83: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 92: + PIXEL00_21 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 202: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + PIXEL01_21 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + PIXEL11_11 + break; + case 78: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + PIXEL11_22 + break; + case 154: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_22 + PIXEL11_12 + break; + case 114: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 89: + PIXEL00_12 + PIXEL01_22 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 90: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 55: + case 23: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_11 + PIXEL01_0 + } else { + PIXEL00_60 + PIXEL01_90 + } + PIXEL10_20 + PIXEL11_21 + break; + case 182: + case 150: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + PIXEL11_12 + } else { + PIXEL01_90 + PIXEL11_61 + } + PIXEL10_20 + break; + case 213: + case 212: + PIXEL00_20 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL01_11 + PIXEL11_0 + } else { + PIXEL01_60 + PIXEL11_90 + } + PIXEL10_21 + break; + case 241: + case 240: + PIXEL00_20 + PIXEL01_22 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL10_12 + PIXEL11_0 + } else { + PIXEL10_61 + PIXEL11_90 + } + break; + case 236: + case 232: + PIXEL00_21 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + PIXEL11_11 + } else { + PIXEL10_90 + PIXEL11_60 + } + break; + case 109: + case 105: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_12 + PIXEL10_0 + } else { + PIXEL00_61 + PIXEL10_90 + } + PIXEL01_20 + PIXEL11_22 + break; + case 171: + case 43: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + PIXEL10_11 + } else { + PIXEL00_90 + PIXEL10_60 + } + PIXEL01_21 + PIXEL11_20 + break; + case 143: + case 15: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + PIXEL01_12 + } else { + PIXEL00_90 + PIXEL01_61 + } + PIXEL10_22 + PIXEL11_20 + break; + case 124: + PIXEL00_21 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_10 + break; + case 203: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_21 + PIXEL10_10 + PIXEL11_11 + break; + case 62: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_11 + PIXEL11_21 + break; + case 211: + PIXEL00_11 + PIXEL01_10 + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 118: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_12 + PIXEL11_10 + break; + case 217: + PIXEL00_12 + PIXEL01_22 + PIXEL10_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 110: + PIXEL00_10 + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 155: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_10 + PIXEL10_22 + PIXEL11_12 + break; + case 188: + PIXEL00_21 + PIXEL01_11 + PIXEL10_11 + PIXEL11_12 + break; + case 185: + PIXEL00_12 + PIXEL01_22 + PIXEL10_11 + PIXEL11_12 + break; + case 61: + PIXEL00_12 + PIXEL01_11 + PIXEL10_11 + PIXEL11_21 + break; + case 157: + PIXEL00_12 + PIXEL01_11 + PIXEL10_22 + PIXEL11_12 + break; + case 103: + PIXEL00_11 + PIXEL01_12 + PIXEL10_12 + PIXEL11_22 + break; + case 227: + PIXEL00_11 + PIXEL01_21 + PIXEL10_12 + PIXEL11_11 + break; + case 230: + PIXEL00_22 + PIXEL01_12 + PIXEL10_12 + PIXEL11_11 + break; + case 199: + PIXEL00_11 + PIXEL01_12 + PIXEL10_21 + PIXEL11_11 + break; + case 220: + PIXEL00_21 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 158: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_22 + PIXEL11_12 + break; + case 234: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + PIXEL01_21 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_11 + break; + case 242: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 59: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_11 + PIXEL11_21 + break; + case 121: + PIXEL00_12 + PIXEL01_22 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 87: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 79: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + PIXEL11_22 + break; + case 122: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 94: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 218: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 91: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 229: + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_11 + break; + case 167: + PIXEL00_11 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + break; + case 173: + PIXEL00_12 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + break; + case 181: + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_12 + break; + case 186: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_11 + PIXEL11_12 + break; + case 115: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 93: + PIXEL00_12 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 206: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + PIXEL11_11 + break; + case 205: + case 201: + PIXEL00_12 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_10 + } else { + PIXEL10_70 + } + PIXEL11_11 + break; + case 174: + case 46: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_10 + } else { + PIXEL00_70 + } + PIXEL01_12 + PIXEL10_11 + PIXEL11_20 + break; + case 179: + case 147: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_10 + } else { + PIXEL01_70 + } + PIXEL10_20 + PIXEL11_12 + break; + case 117: + case 116: + PIXEL00_20 + PIXEL01_11 + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_10 + } else { + PIXEL11_70 + } + break; + case 189: + PIXEL00_12 + PIXEL01_11 + PIXEL10_11 + PIXEL11_12 + break; + case 231: + PIXEL00_11 + PIXEL01_12 + PIXEL10_12 + PIXEL11_11 + break; + case 126: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_10 + break; + case 219: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_10 + PIXEL10_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 125: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_12 + PIXEL10_0 + } else { + PIXEL00_61 + PIXEL10_90 + } + PIXEL01_11 + PIXEL11_10 + break; + case 221: + PIXEL00_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL01_11 + PIXEL11_0 + } else { + PIXEL01_60 + PIXEL11_90 + } + PIXEL10_10 + break; + case 207: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + PIXEL01_12 + } else { + PIXEL00_90 + PIXEL01_61 + } + PIXEL10_10 + PIXEL11_11 + break; + case 238: + PIXEL00_10 + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + PIXEL11_11 + } else { + PIXEL10_90 + PIXEL11_60 + } + break; + case 190: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + PIXEL11_12 + } else { + PIXEL01_90 + PIXEL11_61 + } + PIXEL10_11 + break; + case 187: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + PIXEL10_11 + } else { + PIXEL00_90 + PIXEL10_60 + } + PIXEL01_10 + PIXEL11_12 + break; + case 243: + PIXEL00_11 + PIXEL01_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL10_12 + PIXEL11_0 + } else { + PIXEL10_61 + PIXEL11_90 + } + break; + case 119: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_11 + PIXEL01_0 + } else { + PIXEL00_60 + PIXEL01_90 + } + PIXEL10_12 + PIXEL11_10 + break; + case 237: + case 233: + PIXEL00_12 + PIXEL01_20 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + PIXEL11_11 + break; + case 175: + case 47: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + PIXEL01_12 + PIXEL10_11 + PIXEL11_20 + break; + case 183: + case 151: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_20 + PIXEL11_12 + break; + case 245: + case 244: + PIXEL00_20 + PIXEL01_11 + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 250: + PIXEL00_10 + PIXEL01_10 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 123: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_10 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_10 + break; + case 95: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_10 + PIXEL11_10 + break; + case 222: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 252: + PIXEL00_21 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 249: + PIXEL00_12 + PIXEL01_22 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 235: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_21 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + PIXEL11_11 + break; + case 111: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_22 + break; + case 63: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_11 + PIXEL11_21 + break; + case 159: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_22 + PIXEL11_12 + break; + case 215: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_21 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 246: + PIXEL00_22 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 254: + PIXEL00_10 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 253: + PIXEL00_12 + PIXEL01_11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 251: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + PIXEL01_10 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 239: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + PIXEL01_12 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + PIXEL11_11 + break; + case 127: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_20 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_20 + } + PIXEL11_10 + break; + case 191: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_11 + PIXEL11_12 + break; + case 223: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_20 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_10 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_20 + } + break; + case 247: + PIXEL00_11 + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + PIXEL10_12 + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + case 255: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_0 + } else { + PIXEL00_100 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_0 + } else { + PIXEL01_100 + } + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_0 + } else { + PIXEL10_100 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL11_0 + } else { + PIXEL11_100 + } + break; + } + + w1 = w2; + w4 = w5; + w7 = w8; + + w2 = w3; + w5 = w6; + w8 = w9; + + q += 2; + } + p += nextlineSrc - width; + q += (nextlineDst - width) * 2; + } diff --git a/graphics/scaler/hq2x_i386.asm b/graphics/scaler/hq2x_i386.asm new file mode 100644 index 0000000000..ef49b590f5 --- /dev/null +++ b/graphics/scaler/hq2x_i386.asm @@ -0,0 +1,1842 @@ +;hq2x filter +;16bpp output +;---------------------------------------------------------- +;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) +; +;This program is free software; you can redistribute it and/or +;modify it under the terms of the GNU General Public License +;as published by the Free Software Foundation; either +;version 2 of the License, or (at your option) any later +;version. +; +;This program is distributed in the hope that it will be useful, +;but WITHOUT ANY WARRANTY; without even the implied warranty of +;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;GNU General Public License for more details. +; +;You should have received a copy of the GNU General Public License +;along with this program; if not, write to the Free Software +;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +GLOBAL _hq2x_16 + +EXTERN _LUT16to32 +EXTERN _RGBtoYUV + +SECTION .bss +linesleft resd 1 +xcounter resd 1 +cross resd 1 +nextline resd 1 +prevline resd 1 +w1 resd 1 +w2 resd 1 +w3 resd 1 +w4 resd 1 +w5 resd 1 +w6 resd 1 +w7 resd 1 +w8 resd 1 +w9 resd 1 + +SECTION .data + +reg_blank dd 0,0 +const3 dd 0x00030003,0x00000003 +const5 dd 0x00050005,0x00000005 +const6 dd 0x00060006,0x00000006 +const14 dd 0x000E000E,0x0000000E +threshold dd 0x00300706,0x00000000 +zerolowbits dd 0xF7DEF7DE +moduloSrc dd 0 +moduloDst dd 0 + +SECTION .text + +%macro TestDiff 2 + xor ecx,ecx + mov edx,[%1] + cmp edx,[%2] + je %%fin + mov ecx,_RGBtoYUV + movd mm1,[ecx+edx*4] + movq mm5,mm1 + mov edx,[%2] + movd mm2,[ecx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd ecx,mm1 +%%fin: +%endmacro + +%macro DiffOrNot 4 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + jmp %%fin +%%same: + %4 +%%fin +%endmacro + +%macro DiffOrNot 6 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + jmp %%fin +%%same: + %5 + %6 +%%fin +%endmacro + +%macro DiffOrNot 8 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + %5 + jmp %%fin +%%same: + %6 + %7 + %8 +%%fin +%endmacro + +%macro DiffOrNot 10 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + %5 + %6 + jmp %%fin +%%same: + %7 + %8 + %9 + %10 +%%fin +%endmacro + +%macro Interp1 3 + mov edx,%2 + mov ecx,%3 + cmp edx,ecx + je %%fin + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin + mov %1,dx +%endmacro + +%macro Interp2 4 + mov edx,%3 + mov ecx,%4 + cmp edx,ecx + je %%fin1 + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 +%%fin1 + mov edx,%2 + cmp edx,ecx + je %%fin2 + and ecx,[zerolowbits] + and edx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin2 + mov %1,dx +%endmacro + +%macro Interp5 3 + mov edx,%2 + mov ecx,%3 + cmp edx,ecx + je %%fin + and edx,[zerolowbits] + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin + mov %1,dx +%endmacro + +%macro Interp6 3 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + mov edx, %3 + movd mm3, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + punpcklbw mm3, [reg_blank] + pmullw mm1, [const5] + psllw mm2, 1 + paddw mm1, mm3 + paddw mm1, mm2 + psrlw mm1, 5 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro Interp7 3 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + mov edx, %3 + movd mm3, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + punpcklbw mm3, [reg_blank] + pmullw mm1, [const6] + paddw mm2, mm3 + paddw mm1, mm2 + psrlw mm1, 5 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro Interp9 3 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + mov edx, %3 + movd mm3, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + punpcklbw mm3, [reg_blank] + psllw mm1, 1 + paddw mm2, mm3 + pmullw mm2, [const3] + paddw mm1, mm2 + psrlw mm1, 5 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro Interp10 3 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + mov edx, %3 + movd mm3, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + punpcklbw mm3, [reg_blank] + pmullw mm1, [const14] + paddw mm2, mm3 + paddw mm1, mm2 + psrlw mm1, 6 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro PIXEL00_0 0 + mov [edi],ax +%endmacro + +%macro PIXEL00_10 0 + Interp1 [edi],eax,[w1] +%endmacro + +%macro PIXEL00_11 0 + Interp1 [edi],eax,[w4] +%endmacro + +%macro PIXEL00_12 0 + Interp1 [edi],eax,[w2] +%endmacro + +%macro PIXEL00_20 0 + Interp2 [edi],eax,[w4],[w2] +%endmacro + +%macro PIXEL00_21 0 + Interp2 [edi],eax,[w1],[w2] +%endmacro + +%macro PIXEL00_22 0 + Interp2 [edi],eax,[w1],[w4] +%endmacro + +%macro PIXEL00_60 0 + Interp6 [edi],[w2],[w4] +%endmacro + +%macro PIXEL00_61 0 + Interp6 [edi],[w4],[w2] +%endmacro + +%macro PIXEL00_70 0 + Interp7 [edi],[w4],[w2] +%endmacro + +%macro PIXEL00_90 0 + Interp9 [edi],[w4],[w2] +%endmacro + +%macro PIXEL00_100 0 + Interp10 [edi],[w4],[w2] +%endmacro + +%macro PIXEL01_0 0 + mov [edi+2],ax +%endmacro + +%macro PIXEL01_10 0 + Interp1 [edi+2],eax,[w3] +%endmacro + +%macro PIXEL01_11 0 + Interp1 [edi+2],eax,[w2] +%endmacro + +%macro PIXEL01_12 0 + Interp1 [edi+2],eax,[w6] +%endmacro + +%macro PIXEL01_20 0 + Interp2 [edi+2],eax,[w2],[w6] +%endmacro + +%macro PIXEL01_21 0 + Interp2 [edi+2],eax,[w3],[w6] +%endmacro + +%macro PIXEL01_22 0 + Interp2 [edi+2],eax,[w3],[w2] +%endmacro + +%macro PIXEL01_60 0 + Interp6 [edi+2],[w6],[w2] +%endmacro + +%macro PIXEL01_61 0 + Interp6 [edi+2],[w2],[w6] +%endmacro + +%macro PIXEL01_70 0 + Interp7 [edi+2],[w2],[w6] +%endmacro + +%macro PIXEL01_90 0 + Interp9 [edi+2],[w2],[w6] +%endmacro + +%macro PIXEL01_100 0 + Interp10 [edi+2],[w2],[w6] +%endmacro + +%macro PIXEL10_0 0 + mov [edi+ebx],ax +%endmacro + +%macro PIXEL10_10 0 + Interp1 [edi+ebx],eax,[w7] +%endmacro + +%macro PIXEL10_11 0 + Interp1 [edi+ebx],eax,[w8] +%endmacro + +%macro PIXEL10_12 0 + Interp1 [edi+ebx],eax,[w4] +%endmacro + +%macro PIXEL10_20 0 + Interp2 [edi+ebx],eax,[w8],[w4] +%endmacro + +%macro PIXEL10_21 0 + Interp2 [edi+ebx],eax,[w7],[w4] +%endmacro + +%macro PIXEL10_22 0 + Interp2 [edi+ebx],eax,[w7],[w8] +%endmacro + +%macro PIXEL10_60 0 + Interp6 [edi+ebx],[w4],[w8] +%endmacro + +%macro PIXEL10_61 0 + Interp6 [edi+ebx],[w8],[w4] +%endmacro + +%macro PIXEL10_70 0 + Interp7 [edi+ebx],[w8],[w4] +%endmacro + +%macro PIXEL10_90 0 + Interp9 [edi+ebx],[w8],[w4] +%endmacro + +%macro PIXEL10_100 0 + Interp10 [edi+ebx],[w8],[w4] +%endmacro + +%macro PIXEL11_0 0 + mov [edi+ebx+2],ax +%endmacro + +%macro PIXEL11_10 0 + Interp1 [edi+ebx+2],eax,[w9] +%endmacro + +%macro PIXEL11_11 0 + Interp1 [edi+ebx+2],eax,[w6] +%endmacro + +%macro PIXEL11_12 0 + Interp1 [edi+ebx+2],eax,[w8] +%endmacro + +%macro PIXEL11_20 0 + Interp2 [edi+ebx+2],eax,[w6],[w8] +%endmacro + +%macro PIXEL11_21 0 + Interp2 [edi+ebx+2],eax,[w9],[w8] +%endmacro + +%macro PIXEL11_22 0 + Interp2 [edi+ebx+2],eax,[w9],[w6] +%endmacro + +%macro PIXEL11_60 0 + Interp6 [edi+ebx+2],[w8],[w6] +%endmacro + +%macro PIXEL11_61 0 + Interp6 [edi+ebx+2],[w6],[w8] +%endmacro + +%macro PIXEL11_70 0 + Interp7 [edi+ebx+2],[w6],[w8] +%endmacro + +%macro PIXEL11_90 0 + Interp9 [edi+ebx+2],[w6],[w8] +%endmacro + +%macro PIXEL11_100 0 + Interp10 [edi+ebx+2],[w6],[w8] +%endmacro + +inbuffer equ 8 +outbuffer equ 12 +Xres equ 16 +Yres equ 20 +srcPitch equ 24 +dstPitch equ 28 + +_hq2x_16: + push ebp + mov ebp,esp + pushad + + mov esi,[ebp+inbuffer] + mov edi,[ebp+outbuffer] + mov edx,[ebp+Yres] + mov [linesleft],edx + mov ecx,[ebp+Xres] + shl ecx,1 + mov ebx,[ebp+dstPitch] + mov dword[moduloDst],ebx + sub dword[moduloDst],ecx + shl dword[moduloDst],1 + mov ebx,[ebp+srcPitch] + mov dword[nextline],ebx + mov dword[moduloSrc],ebx + sub dword[moduloSrc],ecx + neg ebx + mov dword[prevline],ebx +.loopy + mov ecx,[ebp+Xres] + mov dword[xcounter],ecx +.loopx + mov ebx,[prevline] + movq mm5,[esi+ebx-2] + movq mm6,[esi-2] + mov ebx,[nextline] + movq mm7,[esi+ebx-2] + movd eax,mm5 + movzx edx,ax + mov [w1],edx + shr eax,16 + mov [w2],eax + psrlq mm5,32 + movd eax,mm5 + movzx edx,ax + mov [w3],edx + movd eax,mm6 + movzx edx,ax + mov [w4],edx + shr eax,16 + mov [w5],eax + psrlq mm6,32 + movd eax,mm6 + movzx edx,ax + mov [w6],edx + movd eax,mm7 + movzx edx,ax + mov [w7],edx + shr eax,16 + mov [w8],eax + psrlq mm7,32 + movd eax,mm7 + movzx edx,ax + mov [w9],edx +.flags + mov ebx,_RGBtoYUV + mov eax,[w5] + xor ecx,ecx + movd mm5,[ebx+eax*4] + mov dword[cross],0 + + mov edx,[w2] + cmp eax,edx + je .noflag2 + or dword[cross],1 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag2 + or ecx,2 +.noflag2 + mov edx,[w4] + cmp eax,edx + je .noflag4 + or dword[cross],2 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag4 + or ecx,8 +.noflag4 + mov edx,[w6] + cmp eax,edx + je .noflag6 + or dword[cross],4 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag6 + or ecx,16 +.noflag6 + mov edx,[w8] + cmp eax,edx + je .noflag8 + or dword[cross],8 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag8 + or ecx,64 +.noflag8 + test ecx,ecx + jnz .testflag1 + mov ecx,[cross] + mov ebx,[ebp+dstPitch] + jmp [FuncTable2+ecx*4] +.testflag1 + mov edx,[w1] + cmp eax,edx + je .noflag1 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag1 + or ecx,1 +.noflag1 + mov edx,[w3] + cmp eax,edx + je .noflag3 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag3 + or ecx,4 +.noflag3 + mov edx,[w7] + cmp eax,edx + je .noflag7 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag7 + or ecx,32 +.noflag7 + mov edx,[w9] + cmp eax,edx + je .noflag9 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag9 + or ecx,128 +.noflag9 + mov ebx,[ebp+dstPitch] + jmp [FuncTable+ecx*4] + +..@flag0 +..@flag1 +..@flag4 +..@flag32 +..@flag128 +..@flag5 +..@flag132 +..@flag160 +..@flag33 +..@flag129 +..@flag36 +..@flag133 +..@flag164 +..@flag161 +..@flag37 +..@flag165 + PIXEL00_20 + PIXEL01_20 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag2 +..@flag34 +..@flag130 +..@flag162 + PIXEL00_22 + PIXEL01_21 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag16 +..@flag17 +..@flag48 +..@flag49 + PIXEL00_20 + PIXEL01_22 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag64 +..@flag65 +..@flag68 +..@flag69 + PIXEL00_20 + PIXEL01_20 + PIXEL10_21 + PIXEL11_22 + jmp .loopx_end +..@flag8 +..@flag12 +..@flag136 +..@flag140 + PIXEL00_21 + PIXEL01_20 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag3 +..@flag35 +..@flag131 +..@flag163 + PIXEL00_11 + PIXEL01_21 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag6 +..@flag38 +..@flag134 +..@flag166 + PIXEL00_22 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag20 +..@flag21 +..@flag52 +..@flag53 + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag144 +..@flag145 +..@flag176 +..@flag177 + PIXEL00_20 + PIXEL01_22 + PIXEL10_20 + PIXEL11_12 + jmp .loopx_end +..@flag192 +..@flag193 +..@flag196 +..@flag197 + PIXEL00_20 + PIXEL01_20 + PIXEL10_21 + PIXEL11_11 + jmp .loopx_end +..@flag96 +..@flag97 +..@flag100 +..@flag101 + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_22 + jmp .loopx_end +..@flag40 +..@flag44 +..@flag168 +..@flag172 + PIXEL00_21 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + jmp .loopx_end +..@flag9 +..@flag13 +..@flag137 +..@flag141 + PIXEL00_12 + PIXEL01_20 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag18 +..@flag50 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_20 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag80 +..@flag81 + PIXEL00_20 + PIXEL01_22 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_20 + jmp .loopx_end +..@flag72 +..@flag76 + PIXEL00_21 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag10 +..@flag138 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_20 + PIXEL01_21 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag66 + PIXEL00_22 + PIXEL01_21 + PIXEL10_21 + PIXEL11_22 + jmp .loopx_end +..@flag24 + PIXEL00_21 + PIXEL01_22 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag7 +..@flag39 +..@flag135 + PIXEL00_11 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag148 +..@flag149 +..@flag180 + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_12 + jmp .loopx_end +..@flag224 +..@flag228 +..@flag225 + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag41 +..@flag169 +..@flag45 + PIXEL00_12 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + jmp .loopx_end +..@flag22 +..@flag54 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag208 +..@flag209 + PIXEL00_20 + PIXEL01_22 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag104 +..@flag108 + PIXEL00_21 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag11 +..@flag139 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_21 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag19 +..@flag51 + DiffOrNot w2,w6,PIXEL00_11,PIXEL01_10,PIXEL00_60,PIXEL01_90 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag146 +..@flag178 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_10,PIXEL11_12,PIXEL01_90,PIXEL11_61 + PIXEL10_20 + jmp .loopx_end +..@flag84 +..@flag85 + PIXEL00_20 + DiffOrNot w6,w8,PIXEL01_11,PIXEL11_10,PIXEL01_60,PIXEL11_90 + PIXEL10_21 + jmp .loopx_end +..@flag112 +..@flag113 + PIXEL00_20 + PIXEL01_22 + DiffOrNot w6,w8,PIXEL10_12,PIXEL11_10,PIXEL10_61,PIXEL11_90 + jmp .loopx_end +..@flag200 +..@flag204 + PIXEL00_21 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_10,PIXEL11_11,PIXEL10_90,PIXEL11_60 + jmp .loopx_end +..@flag73 +..@flag77 + DiffOrNot w8,w4,PIXEL00_12,PIXEL10_10,PIXEL00_61,PIXEL10_90 + PIXEL01_20 + PIXEL11_22 + jmp .loopx_end +..@flag42 +..@flag170 + DiffOrNot w4,w2,PIXEL00_10,PIXEL10_11,PIXEL00_90,PIXEL10_60 + PIXEL01_21 + PIXEL11_20 + jmp .loopx_end +..@flag14 +..@flag142 + DiffOrNot w4,w2,PIXEL00_10,PIXEL01_12,PIXEL00_90,PIXEL01_61 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag67 + PIXEL00_11 + PIXEL01_21 + PIXEL10_21 + PIXEL11_22 + jmp .loopx_end +..@flag70 + PIXEL00_22 + PIXEL01_12 + PIXEL10_21 + PIXEL11_22 + jmp .loopx_end +..@flag28 + PIXEL00_21 + PIXEL01_11 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag152 + PIXEL00_21 + PIXEL01_22 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag194 + PIXEL00_22 + PIXEL01_21 + PIXEL10_21 + PIXEL11_11 + jmp .loopx_end +..@flag98 + PIXEL00_22 + PIXEL01_21 + PIXEL10_12 + PIXEL11_22 + jmp .loopx_end +..@flag56 + PIXEL00_21 + PIXEL01_22 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag25 + PIXEL00_12 + PIXEL01_22 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag26 +..@flag31 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag82 +..@flag214 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag88 +..@flag248 + PIXEL00_21 + PIXEL01_22 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag74 +..@flag107 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_21 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag27 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_10 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag86 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_21 + PIXEL11_10 + jmp .loopx_end +..@flag216 + PIXEL00_21 + PIXEL01_22 + PIXEL10_10 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag106 + PIXEL00_10 + PIXEL01_21 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag30 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag210 + PIXEL00_22 + PIXEL01_10 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag120 + PIXEL00_21 + PIXEL01_22 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_10 + jmp .loopx_end +..@flag75 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_21 + PIXEL10_10 + PIXEL11_22 + jmp .loopx_end +..@flag29 + PIXEL00_12 + PIXEL01_11 + PIXEL10_22 + PIXEL11_21 + jmp .loopx_end +..@flag198 + PIXEL00_22 + PIXEL01_12 + PIXEL10_21 + PIXEL11_11 + jmp .loopx_end +..@flag184 + PIXEL00_21 + PIXEL01_22 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag99 + PIXEL00_11 + PIXEL01_21 + PIXEL10_12 + PIXEL11_22 + jmp .loopx_end +..@flag57 + PIXEL00_12 + PIXEL01_22 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag71 + PIXEL00_11 + PIXEL01_12 + PIXEL10_21 + PIXEL11_22 + jmp .loopx_end +..@flag156 + PIXEL00_21 + PIXEL01_11 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag226 + PIXEL00_22 + PIXEL01_21 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag60 + PIXEL00_21 + PIXEL01_11 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag195 + PIXEL00_11 + PIXEL01_21 + PIXEL10_21 + PIXEL11_11 + jmp .loopx_end +..@flag102 + PIXEL00_22 + PIXEL01_12 + PIXEL10_12 + PIXEL11_22 + jmp .loopx_end +..@flag153 + PIXEL00_12 + PIXEL01_22 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag58 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag83 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag92 + PIXEL00_21 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag202 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + PIXEL01_21 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + PIXEL11_11 + jmp .loopx_end +..@flag78 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + PIXEL11_22 + jmp .loopx_end +..@flag154 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag114 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag89 + PIXEL00_12 + PIXEL01_22 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag90 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag55 +..@flag23 + DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90 + PIXEL10_20 + PIXEL11_21 + jmp .loopx_end +..@flag182 +..@flag150 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61 + PIXEL10_20 + jmp .loopx_end +..@flag213 +..@flag212 + PIXEL00_20 + DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90 + PIXEL10_21 + jmp .loopx_end +..@flag241 +..@flag240 + PIXEL00_20 + PIXEL01_22 + DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90 + jmp .loopx_end +..@flag236 +..@flag232 + PIXEL00_21 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60 + jmp .loopx_end +..@flag109 +..@flag105 + DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90 + PIXEL01_20 + PIXEL11_22 + jmp .loopx_end +..@flag171 +..@flag43 + DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60 + PIXEL01_21 + PIXEL11_20 + jmp .loopx_end +..@flag143 +..@flag15 + DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61 + PIXEL10_22 + PIXEL11_20 + jmp .loopx_end +..@flag124 + PIXEL00_21 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_10 + jmp .loopx_end +..@flag203 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_21 + PIXEL10_10 + PIXEL11_11 + jmp .loopx_end +..@flag62 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag211 + PIXEL00_11 + PIXEL01_10 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag118 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_12 + PIXEL11_10 + jmp .loopx_end +..@flag217 + PIXEL00_12 + PIXEL01_22 + PIXEL10_10 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag110 + PIXEL00_10 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag155 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_10 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag188 + PIXEL00_21 + PIXEL01_11 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag185 + PIXEL00_12 + PIXEL01_22 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag61 + PIXEL00_12 + PIXEL01_11 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag157 + PIXEL00_12 + PIXEL01_11 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag103 + PIXEL00_11 + PIXEL01_12 + PIXEL10_12 + PIXEL11_22 + jmp .loopx_end +..@flag227 + PIXEL00_11 + PIXEL01_21 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag230 + PIXEL00_22 + PIXEL01_12 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag199 + PIXEL00_11 + PIXEL01_12 + PIXEL10_21 + PIXEL11_11 + jmp .loopx_end +..@flag220 + PIXEL00_21 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag158 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag234 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + PIXEL01_21 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_11 + jmp .loopx_end +..@flag242 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag59 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag121 + PIXEL00_12 + PIXEL01_22 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag87 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag79 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + PIXEL11_22 + jmp .loopx_end +..@flag122 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag94 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag218 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag91 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag229 + PIXEL00_20 + PIXEL01_20 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag167 + PIXEL00_11 + PIXEL01_12 + PIXEL10_20 + PIXEL11_20 + jmp .loopx_end +..@flag173 + PIXEL00_12 + PIXEL01_20 + PIXEL10_11 + PIXEL11_20 + jmp .loopx_end +..@flag181 + PIXEL00_20 + PIXEL01_11 + PIXEL10_20 + PIXEL11_12 + jmp .loopx_end +..@flag186 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag115 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag93 + PIXEL00_12 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag206 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + PIXEL11_11 + jmp .loopx_end +..@flag205 +..@flag201 + PIXEL00_12 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70 + PIXEL11_11 + jmp .loopx_end +..@flag174 +..@flag46 + DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70 + PIXEL01_12 + PIXEL10_11 + PIXEL11_20 + jmp .loopx_end +..@flag179 +..@flag147 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70 + PIXEL10_20 + PIXEL11_12 + jmp .loopx_end +..@flag117 +..@flag116 + PIXEL00_20 + PIXEL01_11 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70 + jmp .loopx_end +..@flag189 + PIXEL00_12 + PIXEL01_11 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag231 + PIXEL00_11 + PIXEL01_12 + PIXEL10_12 + PIXEL11_11 + jmp .loopx_end +..@flag126 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_10 + jmp .loopx_end +..@flag219 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_10 + PIXEL10_10 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag125 + DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90 + PIXEL01_11 + PIXEL11_10 + jmp .loopx_end +..@flag221 + PIXEL00_12 + DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90 + PIXEL10_10 + jmp .loopx_end +..@flag207 + DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61 + PIXEL10_10 + PIXEL11_11 + jmp .loopx_end +..@flag238 + PIXEL00_10 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60 + jmp .loopx_end +..@flag190 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61 + PIXEL10_11 + jmp .loopx_end +..@flag187 + DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60 + PIXEL01_10 + PIXEL11_12 + jmp .loopx_end +..@flag243 + PIXEL00_11 + PIXEL01_10 + DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90 + jmp .loopx_end +..@flag119 + DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90 + PIXEL10_12 + PIXEL11_10 + jmp .loopx_end +..@flag237 +..@flag233 + PIXEL00_12 + PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + PIXEL11_11 + jmp .loopx_end +..@flag175 +..@flag47 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + PIXEL01_12 + PIXEL10_11 + PIXEL11_20 + jmp .loopx_end +..@flag183 +..@flag151 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_20 + PIXEL11_12 + jmp .loopx_end +..@flag245 +..@flag244 + PIXEL00_20 + PIXEL01_11 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag250 + PIXEL00_10 + PIXEL01_10 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag123 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_10 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_10 + jmp .loopx_end +..@flag95 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_10 + PIXEL11_10 + jmp .loopx_end +..@flag222 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_10 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag252 + PIXEL00_21 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag249 + PIXEL00_12 + PIXEL01_22 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag235 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_21 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + PIXEL11_11 + jmp .loopx_end +..@flag111 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_22 + jmp .loopx_end +..@flag63 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_11 + PIXEL11_21 + jmp .loopx_end +..@flag159 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_22 + PIXEL11_12 + jmp .loopx_end +..@flag215 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_21 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag246 + PIXEL00_22 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag254 + PIXEL00_10 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag253 + PIXEL00_12 + PIXEL01_11 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag251 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + PIXEL01_10 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag239 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + PIXEL01_12 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + PIXEL11_11 + jmp .loopx_end +..@flag127 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20 + PIXEL11_10 + jmp .loopx_end +..@flag191 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_11 + PIXEL11_12 + jmp .loopx_end +..@flag223 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_10 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20 + jmp .loopx_end +..@flag247 + PIXEL00_11 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + PIXEL10_12 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end +..@flag255 + DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100 + DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100 + DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100 + DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100 + jmp .loopx_end + + +..@cross0 + mov edx,eax + shl eax,16 + or eax,edx + mov [edi],eax + mov [edi+ebx],eax + jmp .loopx_end +..@cross1 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w2] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov ecx,edx + shl edx,16 + or edx,ecx + mov [edi],edx + mov [edi+ebx],eax + jmp .loopx_end +..@cross2 + shl eax,16 + mov ecx,[w4] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + or eax,edx + mov [edi],eax + mov [edi+ebx],eax + jmp .loopx_end +..@cross4 + mov ecx,[w6] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + shl edx,16 + or eax,edx + mov [edi],eax + mov [edi+ebx],eax + jmp .loopx_end +..@cross8 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w8] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov ecx,edx + shl edx,16 + or edx,ecx + mov [edi],eax + mov [edi+ebx],edx + jmp .loopx_end + +.loopx_end + add esi,2 + add edi,4 + dec dword[xcounter] + jz .nexty + jmp .loopx +.nexty + add esi,dword[moduloSrc] + add edi,dword[moduloDst] + dec dword[linesleft] + jz .fin + mov ebx,[ebp+srcPitch] + mov dword[nextline],ebx + neg ebx + mov dword[prevline],ebx + jmp .loopy +.fin + emms + popad + mov esp,ebp + pop ebp + ret + +SECTION .data +FuncTable + dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7 + dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15 + dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23 + dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31 + dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39 + dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47 + dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55 + dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63 + dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71 + dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79 + dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87 + dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95 + dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103 + dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111 + dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119 + dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127 + dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135 + dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143 + dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151 + dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159 + dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167 + dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175 + dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183 + dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191 + dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199 + dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207 + dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215 + dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223 + dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231 + dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239 + dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247 + dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255 + +FuncTable2 + dd ..@cross0, ..@cross1, ..@cross2, ..@flag0, + dd ..@cross4, ..@flag0, ..@flag0, ..@flag0, + dd ..@cross8, ..@flag0, ..@flag0, ..@flag0, + dd ..@flag0, ..@flag0, ..@flag0, ..@flag0 + diff --git a/graphics/scaler/hq3x.cpp b/graphics/scaler/hq3x.cpp new file mode 100644 index 0000000000..497fe69be1 --- /dev/null +++ b/graphics/scaler/hq3x.cpp @@ -0,0 +1,176 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "common/scaler/intern.h" + +#ifdef USE_NASM +// Assembly version of HQ3x + +extern "C" { + +#ifndef _WIN32 +#define hq3x_16 _hq3x_16 +#endif + +void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32); + +} + +void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch); +} + +#else + +#ifdef HAS_ALTIVEC + +#ifdef __amigaos4__ +#include +static bool isAltiVecAvailable() { + uint32 vecUnit; + IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE); + if (vecUnit == VECTORTYPE_NONE) + return false; + else + return true; +} +#else + +#include + +static bool isAltiVecAvailable() { + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int hasVectorUnit = 0; + size_t length = sizeof(hasVectorUnit); + int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); + if ( 0 == error ) + return hasVectorUnit != 0; + return false; +} +#endif +#endif + +#define PIXEL00_1M *(q) = interpolate16_2(w5, w1); +#define PIXEL00_1U *(q) = interpolate16_2(w5, w2); +#define PIXEL00_1L *(q) = interpolate16_2(w5, w4); +#define PIXEL00_2 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_4 *(q) = interpolate16_3(w5, w4, w2); +#define PIXEL00_5 *(q) = interpolate16_2(w4, w2); +#define PIXEL00_C *(q) = w5; + +#define PIXEL01_1 *(q+1) = interpolate16_2(w5, w2); +#define PIXEL01_3 *(q+1) = interpolate16_2(w5, w2); +#define PIXEL01_6 *(q+1) = interpolate16_2(w2, w5); +#define PIXEL01_C *(q+1) = w5; + +#define PIXEL02_1M *(q+2) = interpolate16_2(w5, w3); +#define PIXEL02_1U *(q+2) = interpolate16_2(w5, w2); +#define PIXEL02_1R *(q+2) = interpolate16_2(w5, w6); +#define PIXEL02_2 *(q+2) = interpolate16_3(w5, w2, w6); +#define PIXEL02_4 *(q+2) = interpolate16_3(w5, w2, w6); +#define PIXEL02_5 *(q+2) = interpolate16_2(w2, w6); +#define PIXEL02_C *(q+2) = w5; + +#define PIXEL10_1 *(q+nextlineDst) = interpolate16_2(w5, w4); +#define PIXEL10_3 *(q+nextlineDst) = interpolate16_2(w5, w4); +#define PIXEL10_6 *(q+nextlineDst) = interpolate16_2(w4, w5); +#define PIXEL10_C *(q+nextlineDst) = w5; + +#define PIXEL11 *(q+1+nextlineDst) = w5; + +#define PIXEL12_1 *(q+2+nextlineDst) = interpolate16_2(w5, w6); +#define PIXEL12_3 *(q+2+nextlineDst) = interpolate16_2(w5, w6); +#define PIXEL12_6 *(q+2+nextlineDst) = interpolate16_2(w6, w5); +#define PIXEL12_C *(q+2+nextlineDst) = w5; + +#define PIXEL20_1M *(q+nextlineDst2) = interpolate16_2(w5, w7); +#define PIXEL20_1D *(q+nextlineDst2) = interpolate16_2(w5, w8); +#define PIXEL20_1L *(q+nextlineDst2) = interpolate16_2(w5, w4); +#define PIXEL20_2 *(q+nextlineDst2) = interpolate16_3(w5, w8, w4); +#define PIXEL20_4 *(q+nextlineDst2) = interpolate16_3(w5, w8, w4); +#define PIXEL20_5 *(q+nextlineDst2) = interpolate16_2(w8, w4); +#define PIXEL20_C *(q+nextlineDst2) = w5; + +#define PIXEL21_1 *(q+1+nextlineDst2) = interpolate16_2(w5, w8); +#define PIXEL21_3 *(q+1+nextlineDst2) = interpolate16_2(w5, w8); +#define PIXEL21_6 *(q+1+nextlineDst2) = interpolate16_2(w8, w5); +#define PIXEL21_C *(q+1+nextlineDst2) = w5; + +#define PIXEL22_1M *(q+2+nextlineDst2) = interpolate16_2(w5, w9); +#define PIXEL22_1D *(q+2+nextlineDst2) = interpolate16_2(w5, w8); +#define PIXEL22_1R *(q+2+nextlineDst2) = interpolate16_2(w5, w6); +#define PIXEL22_2 *(q+2+nextlineDst2) = interpolate16_3(w5, w6, w8); +#define PIXEL22_4 *(q+2+nextlineDst2) = interpolate16_3(w5, w6, w8); +#define PIXEL22_5 *(q+2+nextlineDst2) = interpolate16_2(w6, w8); +#define PIXEL22_C *(q+2+nextlineDst2) = w5; + +#define YUV(x) RGBtoYUV[w ## x] + + +#define bitFormat 565 +void HQ3x_565(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq3x.h" +} +#undef bitFormat + +#define bitFormat 555 +void HQ3x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq3x.h" +} +#undef bitFormat + + +#ifdef HAS_ALTIVEC + #define USE_ALTIVEC 1 + + #define bitFormat 565 + void HQ3x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq3x.h" + } + #undef bitFormat + + #define bitFormat 555 + void HQ3x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { + #include "common/scaler/hq3x.h" + } + #undef bitFormat +#endif + +void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { +#ifdef HAS_ALTIVEC + if (isAltiVecAvailable()) { + if (gBitFormat == 565) + HQ3x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + else + HQ3x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + return; + } +#endif + + if (gBitFormat == 565) + HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height); + else + HQ3x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height); +} + +#endif diff --git a/graphics/scaler/hq3x.h b/graphics/scaler/hq3x.h new file mode 100644 index 0000000000..7fda8d5105 --- /dev/null +++ b/graphics/scaler/hq3x.h @@ -0,0 +1,2951 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +/* + * The HQ3x high quality 3x graphics filter. + * Original author Maxim Stepin (see http://www.hiend3d.com/hq3x.html). + * Adapted for ScummVM to 16 bit output and optimized by Max Horn. + */ + + register int w1, w2, w3, w4, w5, w6, w7, w8, w9; + + const uint32 nextlineSrc = srcPitch / sizeof(uint16); + const uint16 *p = (const uint16 *)srcPtr; + + const uint32 nextlineDst = dstPitch / sizeof(uint16); + const uint32 nextlineDst2 = 2 * nextlineDst; + uint16 *q = (uint16 *)dstPtr; + + // +----+----+----+ + // | | | | + // | w1 | w2 | w3 | + // +----+----+----+ + // | | | | + // | w4 | w5 | w6 | + // +----+----+----+ + // | | | | + // | w7 | w8 | w9 | + // +----+----+----+ + +#ifdef USE_ALTIVEC + // The YUV threshold. + static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706); + + // Bit pattern mask. + static const vector signed int vPatternMask1 = (vector signed int)(0x01,0x02,0x04,0x08); + static const vector signed int vPatternMask2 = (vector signed int)(0x10,0x20,0x40,0x80); + + // Permutation masks for the incremental vector loading (see below for more information). + static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7, 8,9,10,11, 20,21,22,23, 16,17,18,19); + static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27, 8,9,10,11, 12,13,14,15, 28,29,30,31); + + // The YUV vectors. + vector signed char vecYUV5555; + vector signed char vecYUV1234; + vector signed char vecYUV6789; +#endif + + while (height--) { + w1 = *(p - 1 - nextlineSrc); + w4 = *(p - 1); + w7 = *(p - 1 + nextlineSrc); + + w2 = *(p - nextlineSrc); + w5 = *(p); + w8 = *(p + nextlineSrc); + +#ifdef USE_ALTIVEC + // Load inital values of vecYUV1234 / vecYUV6789 + const int arr1234[4] = {0, YUV(1), YUV(2), 0}; + const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)}; + + vecYUV1234 = *(const vector signed char *)arr1234; + vecYUV6789 = *(const vector signed char *)arr6789; +#endif + + int tmpWidth = width; + while (tmpWidth--) { + p++; + + w3 = *(p - nextlineSrc); + w6 = *(p); + w9 = *(p + nextlineSrc); + + int pattern = 0; + +#ifdef USE_ALTIVEC + /* + Consider this peephole into the image buffer: + +----+----+----+----+ + | | | | | + | w00| w01| w02| w03| + +----+----+----+----+ + | | | | | + | w10| w11| w12| w13| + +----+----+----+----+ + | | | | | + | w20| w21| w22| w23| + +----+----+----+----+ + + In the previous loop iteration, w11 was the center point, and our + vectors contain the following data from the previous iteration: + vecYUV5555 = { w11, w11, w11, w11 } + vecYUV1234 = { w00, w01, w02, w10 } + vecYUV6789 = { w12, w20, w21, w22 } + + Now we have the new center point w12, and we would like to have + the following values in our vectors: + vecYUV5555 = { w12, w12, w12, w12 } + vecYUV1234 = { w01, w02, w03, w11 } + vecYUV6789 = { w13, w21, w22, w23 } + + To this end we load a single new vector: + vTmp = { w11, w03, w13, w23 } + + We then can compute all the new vector values using permutations only: + vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] } + vecYUV1234 = { vecYUV1234[1], vecYUV1234[2], vTmp[1], vTmp[0] } + vecYUV6789 = { vTmp[2], vecYUV6789[2], vecYUV6789[3], vTmp[3] } + + Beautiful, isn't it? :-) + */ + + // Load the new values into a temporary vector (see above for an explanation) + const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)}; + vector signed char vTmp = *(const vector signed char *)tmpArr; + + // Next update the data vectors + vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0); + vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234); + vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789); + + // Compute the absolute difference between the center point's YUV and the outer points + const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234)); + const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789)); + + // Compare the difference to the threshold (byte-wise) + const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold); + const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold); + + // Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0. + // Then and in the patter masks. The idea is that for 0 components, we get 0, + // while for the other components we get exactly the mask value. + const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1); + const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2); + + // Now sum up the components of all vectors. Since our pattern mask values + // are all "orthogonal", this is effectively the same as ORing them all + // together. In the end, the rightmost word of vSum contains the 'pattern' + vector signed int vSum = vec_sums(vPattern1, (vector signed int)0); + vSum = vec_sums(vPattern2, vSum); + pattern = ((int *)&vSum)[3]; +#else + const int yuv5 = YUV(5); + if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001; + if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002; + if (w5 != w3 && diffYUV(yuv5, YUV(3))) pattern |= 0x0004; + if (w5 != w4 && diffYUV(yuv5, YUV(4))) pattern |= 0x0008; + if (w5 != w6 && diffYUV(yuv5, YUV(6))) pattern |= 0x0010; + if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020; + if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040; + if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080; +#endif + + switch (pattern) { + case 0: + case 1: + case 4: + case 32: + case 128: + case 5: + case 132: + case 160: + case 33: + case 129: + case 36: + case 133: + case 164: + case 161: + case 37: + case 165: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 2: + case 34: + case 130: + case 162: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 16: + case 17: + case 48: + case 49: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 64: + case 65: + case 68: + case 69: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 8: + case 12: + case 136: + case 140: + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 3: + case 35: + case 131: + case 163: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 6: + case 38: + case 134: + case 166: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 20: + case 21: + case 52: + case 53: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 144: + case 145: + case 176: + case 177: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + case 192: + case 193: + case 196: + case 197: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 96: + case 97: + case 100: + case 101: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 40: + case 44: + case 168: + case 172: + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + case 9: + case 13: + case 137: + case 141: + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 18: + case 50: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_1M + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 80: + case 81: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_1M + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 72: + case 76: + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_1M + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 10: + case 138: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 66: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 24: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 7: + case 39: + case 135: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 148: + case 149: + case 180: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + case 224: + case 228: + case 225: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 41: + case 169: + case 45: + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + case 22: + case 54: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 208: + case 209: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 104: + case 108: + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 11: + case 139: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 19: + case 51: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL12_C + } else { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 146: + case 178: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_1M + PIXEL12_C + PIXEL22_1D + } else { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + break; + case 84: + case 85: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_1M + } else { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + break; + case 112: + case 113: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + } else { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + case 200: + case 204: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + } else { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + break; + case 73: + case 77: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_1U + PIXEL10_C + PIXEL20_1M + PIXEL21_C + } else { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + break; + case 42: + case 170: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } else { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + break; + case 14: + case 142: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } else { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 67: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 70: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 28: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 152: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 194: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 98: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 56: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 25: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 26: + case 31: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + PIXEL12_C + } else { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 82: + case 214: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + } else { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL21_C + PIXEL22_C + } else { + PIXEL21_3 + PIXEL22_4 + } + break; + case 88: + case 248: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + } else { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL22_4 + } + break; + case 74: + case 107: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + } else { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + PIXEL21_C + } else { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 27: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 86: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 216: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 106: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 30: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 210: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 120: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 75: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 29: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + break; + case 198: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 184: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 99: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 57: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 71: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 156: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 226: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 60: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 195: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 102: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 153: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 58: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 83: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 92: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 202: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 78: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1M + break; + case 154: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 114: + PIXEL00_1M + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 89: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 90: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 55: + case 23: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_1L + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + break; + case 182: + case 150: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + PIXEL22_1D + } else { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + break; + case 213: + case 212: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + break; + case 241: + case 240: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + case 236: + case 232: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + PIXEL22_1R + } else { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + break; + case 109: + case 105: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_1U + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + break; + case 171: + case 43: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } else { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + break; + case 143: + case 15: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } else { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + break; + case 124: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 203: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 62: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 211: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 118: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 217: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 110: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 155: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 188: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 185: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 61: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 157: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 103: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 227: + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 230: + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 199: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 220: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 158: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 234: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1R + break; + case 242: + PIXEL00_1M + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 59: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 121: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 87: + PIXEL00_1L + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 79: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1R + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1M + break; + case 122: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 94: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 218: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 91: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 229: + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 167: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + break; + case 173: + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + case 181: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + case 186: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 115: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 93: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 206: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 205: + case 201: + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_1M + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 174: + case 46: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_1M + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + case 179: + case 147: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_1M + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + case 117: + case 116: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_1M + } else { + PIXEL22_2 + } + break; + case 189: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 231: + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + break; + case 126: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 219: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL01_3 + PIXEL10_3 + } + PIXEL02_1M + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_4 + } + break; + case 125: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL00_1U + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL00_2 + PIXEL10_6 + PIXEL20_5 + PIXEL21_1 + } + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + PIXEL22_1M + break; + case 221: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL02_1U + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL02_2 + PIXEL12_6 + PIXEL21_1 + PIXEL22_5 + } + PIXEL00_1U + PIXEL01_1 + PIXEL10_C + PIXEL11 + PIXEL20_1M + break; + case 207: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL02_1R + PIXEL10_C + } else { + PIXEL00_5 + PIXEL01_6 + PIXEL02_2 + PIXEL10_1 + } + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + break; + case 238: + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + PIXEL22_1R + } else { + PIXEL10_1 + PIXEL20_5 + PIXEL21_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + break; + case 190: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + PIXEL22_1D + } else { + PIXEL01_1 + PIXEL02_5 + PIXEL12_6 + PIXEL22_2 + } + PIXEL00_1M + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + break; + case 187: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + PIXEL20_1D + } else { + PIXEL00_5 + PIXEL01_1 + PIXEL10_6 + PIXEL20_2 + } + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL21_1 + PIXEL22_1D + break; + case 243: + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL20_1L + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_1 + PIXEL20_2 + PIXEL21_6 + PIXEL22_5 + } + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + break; + case 119: + if (diffYUV(YUV(2), YUV(6))) { + PIXEL00_1L + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL00_2 + PIXEL01_6 + PIXEL02_5 + PIXEL12_1 + } + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + break; + case 237: + case 233: + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 175: + case 47: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + break; + case 183: + case 151: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + break; + case 245: + case 244: + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + case 250: + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + } else { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL22_4 + } + break; + case 123: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + } else { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + PIXEL21_C + } else { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 95: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + PIXEL12_C + } else { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + break; + case 222: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + } else { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL21_C + PIXEL22_C + } else { + PIXEL21_3 + PIXEL22_4 + } + break; + case 252: + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + } else { + PIXEL10_3 + PIXEL20_4 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + case 249: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL22_4 + } + break; + case 235: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + } else { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 111: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + PIXEL21_C + } else { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 63: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + PIXEL12_C + } else { + PIXEL02_4 + PIXEL12_3 + } + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + break; + case 159: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL10_3 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + break; + case 215: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL21_C + PIXEL22_C + } else { + PIXEL21_3 + PIXEL22_4 + } + break; + case 246: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + } else { + PIXEL01_3 + PIXEL02_4 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + case 254: + PIXEL00_1M + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + } else { + PIXEL01_3 + PIXEL02_4 + } + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + } else { + PIXEL10_3 + PIXEL20_4 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL21_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL21_3 + PIXEL22_2 + } + break; + case 253: + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + case 251: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + } else { + PIXEL00_4 + PIXEL01_3 + } + PIXEL02_1M + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL10_C + PIXEL20_C + PIXEL21_C + } else { + PIXEL10_3 + PIXEL20_2 + PIXEL21_3 + } + if (diffYUV(YUV(6), YUV(8))) { + PIXEL12_C + PIXEL22_C + } else { + PIXEL12_3 + PIXEL22_4 + } + break; + case 239: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + PIXEL22_1R + break; + case 127: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL01_C + PIXEL10_C + } else { + PIXEL00_2 + PIXEL01_3 + PIXEL10_3 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + PIXEL12_C + } else { + PIXEL02_4 + PIXEL12_3 + } + PIXEL11 + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + PIXEL21_C + } else { + PIXEL20_4 + PIXEL21_3 + } + PIXEL22_1M + break; + case 191: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + break; + case 223: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + PIXEL10_C + } else { + PIXEL00_4 + PIXEL10_3 + } + if (diffYUV(YUV(2), YUV(6))) { + PIXEL01_C + PIXEL02_C + PIXEL12_C + } else { + PIXEL01_3 + PIXEL02_2 + PIXEL12_3 + } + PIXEL11 + PIXEL20_1M + if (diffYUV(YUV(6), YUV(8))) { + PIXEL21_C + PIXEL22_C + } else { + PIXEL21_3 + PIXEL22_4 + } + break; + case 247: + PIXEL00_1L + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + case 255: + if (diffYUV(YUV(4), YUV(2))) { + PIXEL00_C + } else { + PIXEL00_2 + } + PIXEL01_C + if (diffYUV(YUV(2), YUV(6))) { + PIXEL02_C + } else { + PIXEL02_2 + } + PIXEL10_C + PIXEL11 + PIXEL12_C + if (diffYUV(YUV(8), YUV(4))) { + PIXEL20_C + } else { + PIXEL20_2 + } + PIXEL21_C + if (diffYUV(YUV(6), YUV(8))) { + PIXEL22_C + } else { + PIXEL22_2 + } + break; + } + + w1 = w2; + w4 = w5; + w7 = w8; + + w2 = w3; + w5 = w6; + w8 = w9; + + q += 3; + } + p += nextlineSrc - width; + q += (nextlineDst - width) * 3; + } diff --git a/graphics/scaler/hq3x_i386.asm b/graphics/scaler/hq3x_i386.asm new file mode 100644 index 0000000000..100482f122 --- /dev/null +++ b/graphics/scaler/hq3x_i386.asm @@ -0,0 +1,2434 @@ +;hq3x filter +;16bpp output +;---------------------------------------------------------- +;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) +; +;This program is free software; you can redistribute it and/or +;modify it under the terms of the GNU General Public License +;as published by the Free Software Foundation; either +;version 2 of the License, or (at your option) any later +;version. +; +;This program is distributed in the hope that it will be useful, +;but WITHOUT ANY WARRANTY; without even the implied warranty of +;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;GNU General Public License for more details. +; +;You should have received a copy of the GNU General Public License +;along with this program; if not, write to the Free Software +;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +GLOBAL _hq3x_16 + +EXTERN _LUT16to32 +EXTERN _RGBtoYUV + +SECTION .bss +linesleft resd 1 +xcounter resd 1 +cross resd 1 +nextline resd 1 +prevline resd 1 +w1 resd 1 +w2 resd 1 +w3 resd 1 +w4 resd 1 +w5 resd 1 +w6 resd 1 +w7 resd 1 +w8 resd 1 +w9 resd 1 + +SECTION .data + +reg_blank dd 0,0 +const7 dd 0x00070007,0x00000007 +threshold dd 0x00300706,0x00000000 +zerolowbits dd 0xF7DEF7DE +moduloSrc dd 0 +moduloDst dd 0 + +SECTION .text + +%macro TestDiff 2 + xor ecx,ecx + mov edx,[%1] + cmp edx,[%2] + je %%fin + mov ecx,_RGBtoYUV + movd mm1,[ecx+edx*4] + movq mm5,mm1 + mov edx,[%2] + movd mm2,[ecx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd ecx,mm1 +%%fin: +%endmacro + +%macro DiffOrNot 4 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + jmp %%fin +%%same: + %4 +%%fin +%endmacro + +%macro DiffOrNot 6 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + jmp %%fin +%%same: + %5 + %6 +%%fin +%endmacro + +%macro DiffOrNot 8 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + %5 + jmp %%fin +%%same: + %6 + %7 + %8 +%%fin +%endmacro + +%macro DiffOrNot 10 + TestDiff %1,%2 + test ecx,ecx + jz %%same + %3 + %4 + %5 + %6 + jmp %%fin +%%same: + %7 + %8 + %9 + %10 +%%fin +%endmacro + +%macro Interp1 3 + mov edx,%2 + mov ecx,%3 + cmp edx,ecx + je %%fin + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin + mov %1,dx +%endmacro + +%macro Interp2 4 + mov edx,%3 + mov ecx,%4 + cmp edx,ecx + je %%fin1 + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 +%%fin1 + mov edx,%2 + cmp edx,ecx + je %%fin2 + and ecx,[zerolowbits] + and edx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin2 + mov %1,dx +%endmacro + +%macro Interp3 2 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + pmullw mm1, [const7] + paddw mm1, mm2 + psrlw mm1, 5 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro Interp4 3 + mov ecx, _LUT16to32 + movd mm1, [ecx+eax*4] + mov edx, %2 + movd mm2, [ecx+edx*4] + mov edx, %3 + movd mm3, [ecx+edx*4] + punpcklbw mm1, [reg_blank] + punpcklbw mm2, [reg_blank] + punpcklbw mm3, [reg_blank] + psllw mm1, 1 + paddw mm2, mm3 + pmullw mm2, [const7] + paddw mm1, mm2 + psrlw mm1, 6 + packuswb mm1, [reg_blank] + movd edx, mm1 + shl dl, 2 + shr edx, 1 + shl dx, 3 + shr edx, 5 + mov %1, dx +%endmacro + +%macro Interp5 3 + mov edx,%2 + mov ecx,%3 + cmp edx,ecx + je %%fin + and edx,[zerolowbits] + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 +%%fin + mov %1,dx +%endmacro + +%macro PIXEL00_1M 0 + Interp1 [edi],eax,[w1] +%endmacro + +%macro PIXEL00_1U 0 + Interp1 [edi],eax,[w2] +%endmacro + +%macro PIXEL00_1L 0 + Interp1 [edi],eax,[w4] +%endmacro + +%macro PIXEL00_2 0 + Interp2 [edi],eax,[w4],[w2] +%endmacro + +%macro PIXEL00_4 0 + Interp4 [edi],[w4],[w2] +%endmacro + +%macro PIXEL00_5 0 + Interp5 [edi],[w4],[w2] +%endmacro + +%macro PIXEL00_C 0 + mov [edi],ax +%endmacro + +%macro PIXEL01_1 0 + Interp1 [edi+2],eax,[w2] +%endmacro + +%macro PIXEL01_3 0 + Interp3 [edi+2],[w2] +%endmacro + +%macro PIXEL01_6 0 + Interp1 [edi+2],[w2],eax +%endmacro + +%macro PIXEL01_C 0 + mov [edi+2],ax +%endmacro + +%macro PIXEL02_1M 0 + Interp1 [edi+4],eax,[w3] +%endmacro + +%macro PIXEL02_1U 0 + Interp1 [edi+4],eax,[w2] +%endmacro + +%macro PIXEL02_1R 0 + Interp1 [edi+4],eax,[w6] +%endmacro + +%macro PIXEL02_2 0 + Interp2 [edi+4],eax,[w2],[w6] +%endmacro + +%macro PIXEL02_4 0 + Interp4 [edi+4],[w2],[w6] +%endmacro + +%macro PIXEL02_5 0 + Interp5 [edi+4],[w2],[w6] +%endmacro + +%macro PIXEL02_C 0 + mov [edi+4],ax +%endmacro + +%macro PIXEL10_1 0 + Interp1 [edi+ebx],eax,[w4] +%endmacro + +%macro PIXEL10_3 0 + Interp3 [edi+ebx],[w4] +%endmacro + +%macro PIXEL10_6 0 + Interp1 [edi+ebx],[w4],eax +%endmacro + +%macro PIXEL10_C 0 + mov [edi+ebx],ax +%endmacro + +%macro PIXEL11 0 + mov [edi+ebx+2],ax +%endmacro + +%macro PIXEL12_1 0 + Interp1 [edi+ebx+4],eax,[w6] +%endmacro + +%macro PIXEL12_3 0 + Interp3 [edi+ebx+4],[w6] +%endmacro + +%macro PIXEL12_6 0 + Interp1 [edi+ebx+4],[w6],eax +%endmacro + +%macro PIXEL12_C 0 + mov [edi+ebx+4],ax +%endmacro + +%macro PIXEL20_1M 0 + Interp1 [edi+ebx*2],eax,[w7] +%endmacro + +%macro PIXEL20_1D 0 + Interp1 [edi+ebx*2],eax,[w8] +%endmacro + +%macro PIXEL20_1L 0 + Interp1 [edi+ebx*2],eax,[w4] +%endmacro + +%macro PIXEL20_2 0 + Interp2 [edi+ebx*2],eax,[w8],[w4] +%endmacro + +%macro PIXEL20_4 0 + Interp4 [edi+ebx*2],[w8],[w4] +%endmacro + +%macro PIXEL20_5 0 + Interp5 [edi+ebx*2],[w8],[w4] +%endmacro + +%macro PIXEL20_C 0 + mov [edi+ebx*2],ax +%endmacro + +%macro PIXEL21_1 0 + Interp1 [edi+ebx*2+2],eax,[w8] +%endmacro + +%macro PIXEL21_3 0 + Interp3 [edi+ebx*2+2],[w8] +%endmacro + +%macro PIXEL21_6 0 + Interp1 [edi+ebx*2+2],[w8],eax +%endmacro + +%macro PIXEL21_C 0 + mov [edi+ebx*2+2],ax +%endmacro + +%macro PIXEL22_1M 0 + Interp1 [edi+ebx*2+4],eax,[w9] +%endmacro + +%macro PIXEL22_1D 0 + Interp1 [edi+ebx*2+4],eax,[w8] +%endmacro + +%macro PIXEL22_1R 0 + Interp1 [edi+ebx*2+4],eax,[w6] +%endmacro + +%macro PIXEL22_2 0 + Interp2 [edi+ebx*2+4],eax,[w6],[w8] +%endmacro + +%macro PIXEL22_4 0 + Interp4 [edi+ebx*2+4],[w6],[w8] +%endmacro + +%macro PIXEL22_5 0 + Interp5 [edi+ebx*2+4],[w6],[w8] +%endmacro + +%macro PIXEL22_C 0 + mov [edi+ebx*2+4],ax +%endmacro + +inbuffer equ 8 +outbuffer equ 12 +Xres equ 16 +Yres equ 20 +srcPitch equ 24 +dstPitch equ 28 + +_hq3x_16: + push ebp + mov ebp,esp + pushad + + mov esi,[ebp+inbuffer] + mov edi,[ebp+outbuffer] + mov edx,[ebp+Yres] + mov [linesleft],edx + mov ecx,[ebp+Xres] + shl ecx,1 + mov ebx,[ebp+dstPitch] + mov dword[moduloDst],ebx + sub dword[moduloDst],ecx + mov ecx,dword[moduloDst] + shl dword[moduloDst],1 + add dword[moduloDst],ecx + mov ecx,[ebp+Xres] + shl ecx,1 + mov ebx,[ebp+srcPitch] + mov dword[nextline],ebx + mov dword[moduloSrc],ebx + sub dword[moduloSrc],ecx + neg ebx + mov dword[prevline],ebx +.loopy + mov ecx,[ebp+Xres] + mov dword[xcounter],ecx +.loopx + mov ebx,[prevline] + movq mm5,[esi+ebx-2] + movq mm6,[esi-2] + mov ebx,[nextline] + movq mm7,[esi+ebx-2] + movd eax,mm5 + movzx edx,ax + mov [w1],edx + shr eax,16 + mov [w2],eax + psrlq mm5,32 + movd eax,mm5 + movzx edx,ax + mov [w3],edx + movd eax,mm6 + movzx edx,ax + mov [w4],edx + shr eax,16 + mov [w5],eax + psrlq mm6,32 + movd eax,mm6 + movzx edx,ax + mov [w6],edx + movd eax,mm7 + movzx edx,ax + mov [w7],edx + shr eax,16 + mov [w8],eax + psrlq mm7,32 + movd eax,mm7 + movzx edx,ax + mov [w9],edx +.flags + mov ebx,_RGBtoYUV + mov eax,[w5] + xor ecx,ecx + movd mm5,[ebx+eax*4] + mov dword[cross],0 + + mov edx,[w2] + cmp eax,edx + je .noflag2 + or dword[cross],1 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag2 + or ecx,2 +.noflag2 + mov edx,[w4] + cmp eax,edx + je .noflag4 + or dword[cross],2 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag4 + or ecx,8 +.noflag4 + mov edx,[w6] + cmp eax,edx + je .noflag6 + or dword[cross],4 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag6 + or ecx,16 +.noflag6 + mov edx,[w8] + cmp eax,edx + je .noflag8 + or dword[cross],8 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag8 + or ecx,64 +.noflag8 + test ecx,ecx + jnz .testflag1 + mov ecx,[cross] + mov ebx,[ebp+dstPitch] + jmp [FuncTable2+ecx*4] +.testflag1 + mov edx,[w1] + cmp eax,edx + je .noflag1 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag1 + or ecx,1 +.noflag1 + mov edx,[w3] + cmp eax,edx + je .noflag3 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag3 + or ecx,4 +.noflag3 + mov edx,[w7] + cmp eax,edx + je .noflag7 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag7 + or ecx,32 +.noflag7 + mov edx,[w9] + cmp eax,edx + je .noflag9 + movq mm1,mm5 + movd mm2,[ebx+edx*4] + psubusb mm1,mm2 + psubusb mm2,mm5 + por mm1,mm2 + psubusb mm1,[threshold] + movd edx,mm1 + test edx,edx + jz .noflag9 + or ecx,128 +.noflag9 + mov ebx,[ebp+dstPitch] + jmp [FuncTable+ecx*4] + +..@flag0 +..@flag1 +..@flag4 +..@flag32 +..@flag128 +..@flag5 +..@flag132 +..@flag160 +..@flag33 +..@flag129 +..@flag36 +..@flag133 +..@flag164 +..@flag161 +..@flag37 +..@flag165 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag2 +..@flag34 +..@flag130 +..@flag162 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag16 +..@flag17 +..@flag48 +..@flag49 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag64 +..@flag65 +..@flag68 +..@flag69 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag8 +..@flag12 +..@flag136 +..@flag140 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag3 +..@flag35 +..@flag131 +..@flag163 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag6 +..@flag38 +..@flag134 +..@flag166 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag20 +..@flag21 +..@flag52 +..@flag53 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag144 +..@flag145 +..@flag176 +..@flag177 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag192 +..@flag193 +..@flag196 +..@flag197 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag96 +..@flag97 +..@flag100 +..@flag101 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag40 +..@flag44 +..@flag168 +..@flag172 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag9 +..@flag13 +..@flag137 +..@flag141 + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag18 +..@flag50 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag80 +..@flag81 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag72 +..@flag76 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag10 +..@flag138 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag66 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag24 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag7 +..@flag39 +..@flag135 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag148 +..@flag149 +..@flag180 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag224 +..@flag228 +..@flag225 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag41 +..@flag169 +..@flag45 + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag22 +..@flag54 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag208 +..@flag209 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag104 +..@flag108 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag11 +..@flag139 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag19 +..@flag51 + DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1 + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag146 +..@flag178 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2 + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + jmp .loopx_end +..@flag84 +..@flag85 + DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5 + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + jmp .loopx_end +..@flag112 +..@flag113 + DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + jmp .loopx_end +..@flag200 +..@flag204 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + jmp .loopx_end +..@flag73 +..@flag77 + DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1 + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + jmp .loopx_end +..@flag42 +..@flag170 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag14 +..@flag142 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag67 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag70 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag28 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag152 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag194 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag98 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag56 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag25 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag26 +..@flag31 + DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3 + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag82 +..@flag214 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag88 +..@flag248 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4 + PIXEL21_C + DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4 + jmp .loopx_end +..@flag74 +..@flag107 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag27 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag86 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag216 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag106 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag30 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag210 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag120 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag75 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag29 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag198 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag184 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag99 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag57 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag71 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag156 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag226 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag60 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag195 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag102 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag153 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag58 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag83 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag92 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag202 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag78 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag154 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag114 + PIXEL00_1M + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag89 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag90 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag55 +..@flag23 + DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1 + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag182 +..@flag150 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2 + PIXEL00_1M + PIXEL10_1 + PIXEL11 + PIXEL20_2 + PIXEL21_1 + jmp .loopx_end +..@flag213 +..@flag212 + DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5 + PIXEL00_2 + PIXEL01_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + jmp .loopx_end +..@flag241 +..@flag240 + DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1M + PIXEL10_1 + PIXEL11 + jmp .loopx_end +..@flag236 +..@flag232 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2 + PIXEL00_1M + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + jmp .loopx_end +..@flag109 +..@flag105 + DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1 + PIXEL01_1 + PIXEL02_2 + PIXEL11 + PIXEL12_1 + PIXEL22_1M + jmp .loopx_end +..@flag171 +..@flag43 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag143 +..@flag15 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag124 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag203 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag62 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag211 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag118 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag217 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag110 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag155 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag188 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag185 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag61 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag157 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag103 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag227 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag230 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag199 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag220 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag158 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_C + PIXEL11 + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag234 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + PIXEL02_1M + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1R + jmp .loopx_end +..@flag242 + PIXEL00_1M + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL20_1L + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag59 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag121 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag87 + PIXEL00_1L + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_1 + PIXEL11 + PIXEL20_1M + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag79 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1R + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag122 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag94 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL10_C + PIXEL11 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag218 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_C + PIXEL11 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag91 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag229 + PIXEL00_2 + PIXEL01_1 + PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag167 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_2 + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag173 + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag181 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag186 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag115 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag93 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag206 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag205 +..@flag201 + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag174 +..@flag46 + DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag179 +..@flag147 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag117 +..@flag116 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2 + jmp .loopx_end +..@flag189 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag231 + PIXEL00_1L + PIXEL01_C + PIXEL02_1R + PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1L + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag126 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3 + PIXEL11 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag219 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3 + PIXEL02_1M + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag125 + DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1 + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + PIXEL22_1M + jmp .loopx_end +..@flag221 + DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5 + PIXEL00_1U + PIXEL01_1 + PIXEL10_C + PIXEL11 + PIXEL20_1M + jmp .loopx_end +..@flag207 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1 + PIXEL11 + PIXEL12_1 + PIXEL20_1M + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag238 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2 + PIXEL00_1M + PIXEL01_C + PIXEL02_1R + PIXEL11 + PIXEL12_1 + jmp .loopx_end +..@flag190 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2 + PIXEL00_1M + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + jmp .loopx_end +..@flag187 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2 + PIXEL02_1M + PIXEL11 + PIXEL12_C + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag243 + DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5 + PIXEL00_1L + PIXEL01_C + PIXEL02_1M + PIXEL10_1 + PIXEL11 + jmp .loopx_end +..@flag119 + DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1 + PIXEL10_1 + PIXEL11 + PIXEL20_1L + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag237 +..@flag233 + PIXEL00_1U + PIXEL01_1 + PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag175 +..@flag47 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + PIXEL20_1D + PIXEL21_1 + PIXEL22_2 + jmp .loopx_end +..@flag183 +..@flag151 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_2 + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag245 +..@flag244 + PIXEL00_2 + PIXEL01_1 + PIXEL02_1U + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end +..@flag250 + PIXEL00_1M + PIXEL01_C + PIXEL02_1M + PIXEL11 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4 + PIXEL21_C + DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4 + jmp .loopx_end +..@flag123 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag95 + DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3 + PIXEL11 + PIXEL20_1M + PIXEL21_C + PIXEL22_1M + jmp .loopx_end +..@flag222 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4 + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1M + DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag252 + PIXEL00_1M + PIXEL01_1 + PIXEL02_1U + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end +..@flag249 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1M + PIXEL10_C + PIXEL11 + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4 + jmp .loopx_end +..@flag235 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3 + PIXEL02_1M + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag111 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag63 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3 + PIXEL10_C + PIXEL11 + PIXEL20_1D + PIXEL21_1 + PIXEL22_1M + jmp .loopx_end +..@flag159 + DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL11 + PIXEL12_C + PIXEL20_1M + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag215 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1M + DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag246 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end +..@flag254 + PIXEL00_1M + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4 + PIXEL11 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4 + DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2 + jmp .loopx_end +..@flag253 + PIXEL00_1U + PIXEL01_1 + PIXEL02_1U + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end +..@flag251 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3 + PIXEL02_1M + PIXEL11 + DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3 + DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4 + jmp .loopx_end +..@flag239 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + PIXEL02_1R + PIXEL10_C + PIXEL11 + PIXEL12_1 + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + PIXEL22_1R + jmp .loopx_end +..@flag127 + DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3 + DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3 + PIXEL11 + DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3 + PIXEL22_1M + jmp .loopx_end +..@flag191 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + PIXEL20_1D + PIXEL21_1 + PIXEL22_1D + jmp .loopx_end +..@flag223 + DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3 + DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3 + PIXEL11 + PIXEL20_1M + DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4 + jmp .loopx_end +..@flag247 + PIXEL00_1L + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL10_1 + PIXEL11 + PIXEL12_C + PIXEL20_1L + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end +..@flag255 + DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2 + PIXEL01_C + DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2 + PIXEL10_C + PIXEL11 + PIXEL12_C + DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2 + PIXEL21_C + DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2 + jmp .loopx_end + +..@cross0 + mov edx,eax + shl eax,16 + or eax,edx + mov [edi],eax + mov [edi+4],ax + mov [edi+ebx],eax + mov [edi+ebx+4],ax + mov [edi+ebx*2],eax + mov [edi+ebx*2+4],ax + jmp .loopx_end +..@cross1 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w2] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov [edi],dx + mov [edi+2],dx + mov [edi+4],dx + mov [edi+ebx],eax + mov [edi+ebx+4],ax + mov [edi+ebx*2],eax + mov [edi+ebx*2+4],ax + jmp .loopx_end +..@cross2 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w4] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov [edi],dx + mov [edi+2],eax + mov [edi+ebx],dx + mov [edi+ebx+2],eax + mov [edi+ebx*2],dx + mov [edi+ebx*2+2],eax + jmp .loopx_end +..@cross4 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w6] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov [edi],eax + mov [edi+4],dx + mov [edi+ebx],eax + mov [edi+ebx+4],dx + mov [edi+ebx*2],eax + mov [edi+ebx*2+4],dx + jmp .loopx_end +..@cross8 + mov edx,eax + shl eax,16 + or eax,edx + mov ecx,[w8] + and edx,[zerolowbits] + and ecx,[zerolowbits] + add ecx,edx + shr ecx,1 + add ecx,0x0821 + and ecx,[zerolowbits] + add edx,ecx + shr edx,1 + mov [edi],eax + mov [edi+4],ax + mov [edi+ebx],eax + mov [edi+ebx+4],ax + mov [edi+ebx*2],dx + mov [edi+ebx*2+2],dx + mov [edi+ebx*2+4],dx + jmp .loopx_end + +.loopx_end + add esi,2 + add edi,6 + dec dword[xcounter] + jz .nexty + jmp .loopx +.nexty + add esi,dword[moduloSrc] + add edi,dword[moduloDst] + dec dword[linesleft] + jz .fin + mov ebx,[ebp+srcPitch] + mov dword[nextline],ebx + neg ebx + mov dword[prevline],ebx + jmp .loopy +.fin + emms + popad + mov esp,ebp + pop ebp + ret + +SECTION .data +FuncTable + dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7 + dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15 + dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23 + dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31 + dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39 + dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47 + dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55 + dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63 + dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71 + dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79 + dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87 + dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95 + dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103 + dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111 + dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119 + dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127 + dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135 + dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143 + dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151 + dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159 + dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167 + dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175 + dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183 + dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191 + dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199 + dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207 + dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215 + dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223 + dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231 + dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239 + dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247 + dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255 + +FuncTable2 + dd ..@cross0, ..@cross1, ..@cross2, ..@flag0, + dd ..@cross4, ..@flag0, ..@flag0, ..@flag0, + dd ..@cross8, ..@flag0, ..@flag0, ..@flag0, + dd ..@flag0, ..@flag0, ..@flag0, ..@flag0 + diff --git a/graphics/scaler/intern.h b/graphics/scaler/intern.h new file mode 100644 index 0000000000..fa3ca1823a --- /dev/null +++ b/graphics/scaler/intern.h @@ -0,0 +1,171 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + + +#ifndef COMMON_SCALER_INTERN_H +#define COMMON_SCALER_INTERN_H + +#include "common/stdafx.h" +#include "common/scummsys.h" + +template +struct ColorMasks { +}; + +template<> +struct ColorMasks<565> { + enum { + highBits = 0xF7DEF7DE, + lowBits = 0x08210821, + qhighBits = 0xE79CE79C, + qlowBits = 0x18631863, + redblueMask = 0xF81F, + greenMask = 0x07E0 + }; +}; + +template<> +struct ColorMasks<555> { + enum { + highBits = 0x7BDE7BDE, + lowBits = 0x04210421, + qhighBits = 0x739C739C, + qlowBits = 0x0C630C63, + redblueMask = 0x7C1F, + greenMask = 0x03E0 + }; +}; + +#define highBits ColorMasks::highBits +#define lowBits ColorMasks::lowBits +#define qhighBits ColorMasks::qhighBits +#define qlowBits ColorMasks::qlowBits +#define redblueMask ColorMasks::redblueMask +#define greenMask ColorMasks::greenMask + + +/** + * Interpolate two 16 bit pixel pairs at once with equal weights 1. + * In particular, A and B can contain two pixels/each in the upper + * and lower halves. + */ +template +static inline uint32 INTERPOLATE(uint32 A, uint32 B) { + + return (((A & highBits) >> 1) + ((B & highBits) >> 1) + (A & B & lowBits)); +} + +/** + * Interpolate four 16 bit pixel pairs at once with equal weights 1. + * In particular, A and B can contain two pixels/each in the upper + * and lower halves. + */ +template +static inline uint32 Q_INTERPOLATE(uint32 A, uint32 B, uint32 C, uint32 D) { + register uint32 x = ((A & qhighBits) >> 2) + ((B & qhighBits) >> 2) + ((C & qhighBits) >> 2) + ((D & qhighBits) >> 2); + register uint32 y = ((A & qlowBits) + (B & qlowBits) + (C & qlowBits) + (D & qlowBits)) >> 2; + + y &= qlowBits; + return x + y; +} + + +/** + * Interpolate two 16 bit pixels with the weights specified in the template + * parameters. Used by the hq scaler family. + */ +template +static inline uint16 interpolate16_2(uint16 p1, uint16 p2) { + return ((((p1 & redblueMask) * w1 + (p2 & redblueMask) * w2) / (w1 + w2)) & redblueMask) | + ((((p1 & greenMask) * w1 + (p2 & greenMask) * w2) / (w1 + w2)) & greenMask); +} + +/** + * Interpolate three 16 bit pixels with the weights specified in the template + * parameters. Used by the hq scaler family. + */ +template +static inline uint16 interpolate16_3(uint16 p1, uint16 p2, uint16 p3) { + return ((((p1 & redblueMask) * w1 + (p2 & redblueMask) * w2 + (p3 & redblueMask) * w3) / (w1 + w2 + w3)) & redblueMask) | + ((((p1 & greenMask) * w1 + (p2 & greenMask) * w2 + (p3 & greenMask) * w3) / (w1 + w2 + w3)) & greenMask); +} + + +/** + * Compare two YUV values (encoded 8-8-8) and check if they differ by more than + * a certain hard coded threshold. Used by the hq scaler family. + */ +static inline bool diffYUV(int yuv1, int yuv2) { + static const int Ymask = 0x00FF0000; + static const int Umask = 0x0000FF00; + static const int Vmask = 0x000000FF; + static const int trY = 0x00300000; + static const int trU = 0x00000700; + static const int trV = 0x00000006; + + int diff; + int mask; + + diff = ((yuv1 & Ymask) - (yuv2 & Ymask)); + mask = diff >> 31; // -1 if value < 0, 0 otherwise + diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value + if (diff > trY) return true; + + diff = ((yuv1 & Umask) - (yuv2 & Umask)); + mask = diff >> 31; // -1 if value < 0, 0 otherwise + diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value + if (diff > trU) return true; + + diff = ((yuv1 & Vmask) - (yuv2 & Vmask)); + mask = diff >> 31; // -1 if value < 0, 0 otherwise + diff = (diff ^ mask) - mask; //-1: ~value + 1; 0: value + if (diff > trV) return true; + + return false; +/* + return + ( ( ABS((yuv1 & Ymask) - (yuv2 & Ymask)) > trY ) || + ( ABS((yuv1 & Umask) - (yuv2 & Umask)) > trU ) || + ( ABS((yuv1 & Vmask) - (yuv2 & Vmask)) > trV ) ); +*/ +} + +/** + * 16bit RGB to YUV conversion table. This table is setup by InitLUT(). + * Used by the hq scaler family. + */ +extern "C" uint *RGBtoYUV; + +/** Auxiliary macro to simplify creating those template function wrappers. */ +#define MAKE_WRAPPER(FUNC) \ + void FUNC(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) { \ + if (gBitFormat == 565) \ + FUNC ## Template<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height); \ + else \ + FUNC ## Template<555>(srcPtr, srcPitch, dstPtr, dstPitch, width, height); \ + } + +/** Specifies the currently active 16bit pixel format, 555 or 565. */ +extern int gBitFormat; + +#endif diff --git a/graphics/scaler/scale2x.cpp b/graphics/scaler/scale2x.cpp new file mode 100644 index 0000000000..6d8a73f8cc --- /dev/null +++ b/graphics/scaler/scale2x.cpp @@ -0,0 +1,521 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains a C and MMX implementation of the Scale2x effect. + * + * You can find an high level description of the effect at : + * + * http://scale2x.sourceforge.net/ + * + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + * - derivative works of the program are allowed. + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include "common/scaler/intern.h" + +#include "scale2x.h" + +#include + +/***************************************************************************/ +/* Scale2x C implementation */ + +static inline void scale2x_8_def_single(scale2x_uint8* __restrict__ dst, const scale2x_uint8* __restrict__ src0, const scale2x_uint8* __restrict__ src1, const scale2x_uint8* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0]; + dst[1] = src1[1] == src0[0] ? src0[0] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 2; + --count; + } +} + +static inline void scale2x_16_def_single(scale2x_uint16* __restrict__ dst, const scale2x_uint16* __restrict__ src0, const scale2x_uint16* __restrict__ src1, const scale2x_uint16* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0]; + dst[1] = src1[1] == src0[0] ? src0[0] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 2; + --count; + } +} + +static inline void scale2x_32_def_single(scale2x_uint32* __restrict__ dst, const scale2x_uint32* __restrict__ src0, const scale2x_uint32* __restrict__ src1, const scale2x_uint32* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src0[0] : src1[0]; + dst[1] = src1[1] == src0[0] ? src0[0] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 2; + --count; + } +} + +/** + * Scale by a factor of 2 a row of pixels of 8 bits. + * The function is implemented in C. + * The pixels over the left and right borders are assumed of the same color of + * the pixels on the border. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + scale2x_8_def_single(dst0, src0, src1, src2, count); + scale2x_8_def_single(dst1, src2, src1, src0, count); +} + +/** + * Scale by a factor of 2 a row of pixels of 16 bits. + * This function operates like scale2x_8_def() but for 16 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count) +{ + scale2x_16_def_single(dst0, src0, src1, src2, count); + scale2x_16_def_single(dst1, src2, src1, src0, count); +} + +/** + * Scale by a factor of 2 a row of pixels of 32 bits. + * This function operates like scale2x_8_def() but for 32 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count) +{ + scale2x_32_def_single(dst0, src0, src1, src2, count); + scale2x_32_def_single(dst1, src2, src1, src0, count); +} + +/***************************************************************************/ +/* Scale2x MMX implementation */ + +#if defined(__GNUC__) && defined(__i386__) + +/* + * Apply the Scale2x effect at a single row. + * This function must be called only by the other scale2x functions. + * + * Considering the pixel map : + * + * ABC (src0) + * DEF (src1) + * GHI (src2) + * + * this functions compute 2 new pixels in substitution of the source pixel E + * like this map : + * + * ab (dst) + * + * with these variables : + * + * ¤t -> E + * ¤t_left -> D + * ¤t_right -> F + * ¤t_upper -> B + * ¤t_lower -> H + * + * %0 -> current_upper + * %1 -> current + * %2 -> current_lower + * %3 -> dst + * %4 -> counter + * + * %mm0 -> *current_left + * %mm1 -> *current_next + * %mm2 -> tmp0 + * %mm3 -> tmp1 + * %mm4 -> tmp2 + * %mm5 -> tmp3 + * %mm6 -> *current_upper + * %mm7 -> *current + */ +static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 16); + assert(count % 8 == 0); + + __asm__ __volatile__( +/* central runs */ + "shrl $3, %4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1), %%mm0\n" + "movq (%1), %%mm7\n" + "movq 8(%1), %%mm1\n" + "psrlq $56, %%mm0\n" + "psllq $56, %%mm1\n" + "movq %%mm7, %%mm2\n" + "movq %%mm7, %%mm3\n" + "psllq $8, %%mm2\n" + "psrlq $8, %%mm3\n" + "por %%mm2, %%mm0\n" + "por %%mm3, %%mm1\n" + + /* current_upper */ + "movq (%0), %%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "movq %%mm0, %%mm3\n" + "movq %%mm1, %%mm5\n" + "pcmpeqb %%mm6, %%mm2\n" + "pcmpeqb %%mm6, %%mm4\n" + "pcmpeqb (%2), %%mm3\n" + "pcmpeqb (%2), %%mm5\n" + "pandn %%mm2, %%mm3\n" + "pandn %%mm4, %%mm5\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "pcmpeqb %%mm1, %%mm2\n" + "pcmpeqb %%mm0, %%mm4\n" + "pandn %%mm3, %%mm2\n" + "pandn %%mm5, %%mm4\n" + "movq %%mm2, %%mm3\n" + "movq %%mm4, %%mm5\n" + "pand %%mm6, %%mm2\n" + "pand %%mm6, %%mm4\n" + "pandn %%mm7, %%mm3\n" + "pandn %%mm7, %%mm5\n" + "por %%mm3, %%mm2\n" + "por %%mm5, %%mm4\n" + + /* set *dst */ + "movq %%mm2, %%mm3\n" + "punpcklbw %%mm4, %%mm2\n" + "punpckhbw %%mm4, %%mm3\n" + "movq %%mm2, (%3)\n" + "movq %%mm3, 8(%3)\n" + + /* next */ + "addl $8, %0\n" + "addl $8, %1\n" + "addl $8, %2\n" + "addl $16, %3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +} + +static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count) +{ + assert(count >= 8); + assert(count % 4 == 0); + + __asm__ __volatile__( +/* central runs */ + "shrl $2, %4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1), %%mm0\n" + "movq (%1), %%mm7\n" + "movq 8(%1), %%mm1\n" + "psrlq $48, %%mm0\n" + "psllq $48, %%mm1\n" + "movq %%mm7, %%mm2\n" + "movq %%mm7, %%mm3\n" + "psllq $16, %%mm2\n" + "psrlq $16, %%mm3\n" + "por %%mm2, %%mm0\n" + "por %%mm3, %%mm1\n" + + /* current_upper */ + "movq (%0), %%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "movq %%mm0, %%mm3\n" + "movq %%mm1, %%mm5\n" + "pcmpeqw %%mm6, %%mm2\n" + "pcmpeqw %%mm6, %%mm4\n" + "pcmpeqw (%2), %%mm3\n" + "pcmpeqw (%2), %%mm5\n" + "pandn %%mm2, %%mm3\n" + "pandn %%mm4, %%mm5\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "pcmpeqw %%mm1, %%mm2\n" + "pcmpeqw %%mm0, %%mm4\n" + "pandn %%mm3, %%mm2\n" + "pandn %%mm5, %%mm4\n" + "movq %%mm2, %%mm3\n" + "movq %%mm4, %%mm5\n" + "pand %%mm6, %%mm2\n" + "pand %%mm6, %%mm4\n" + "pandn %%mm7, %%mm3\n" + "pandn %%mm7, %%mm5\n" + "por %%mm3, %%mm2\n" + "por %%mm5, %%mm4\n" + + /* set *dst */ + "movq %%mm2, %%mm3\n" + "punpcklwd %%mm4, %%mm2\n" + "punpckhwd %%mm4, %%mm3\n" + "movq %%mm2, (%3)\n" + "movq %%mm3, 8(%3)\n" + + /* next */ + "addl $8, %0\n" + "addl $8, %1\n" + "addl $8, %2\n" + "addl $16, %3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +} + +static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count) +{ + assert(count >= 4); + assert(count % 2 == 0); + + __asm__ __volatile__( +/* central runs */ + "shrl $1, %4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1), %%mm0\n" + "movq (%1), %%mm7\n" + "movq 8(%1), %%mm1\n" + "psrlq $32, %%mm0\n" + "psllq $32, %%mm1\n" + "movq %%mm7, %%mm2\n" + "movq %%mm7, %%mm3\n" + "psllq $32, %%mm2\n" + "psrlq $32, %%mm3\n" + "por %%mm2, %%mm0\n" + "por %%mm3, %%mm1\n" + + /* current_upper */ + "movq (%0), %%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "movq %%mm0, %%mm3\n" + "movq %%mm1, %%mm5\n" + "pcmpeqd %%mm6, %%mm2\n" + "pcmpeqd %%mm6, %%mm4\n" + "pcmpeqd (%2), %%mm3\n" + "pcmpeqd (%2), %%mm5\n" + "pandn %%mm2, %%mm3\n" + "pandn %%mm4, %%mm5\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm4\n" + "pcmpeqd %%mm1, %%mm2\n" + "pcmpeqd %%mm0, %%mm4\n" + "pandn %%mm3, %%mm2\n" + "pandn %%mm5, %%mm4\n" + "movq %%mm2, %%mm3\n" + "movq %%mm4, %%mm5\n" + "pand %%mm6, %%mm2\n" + "pand %%mm6, %%mm4\n" + "pandn %%mm7, %%mm3\n" + "pandn %%mm7, %%mm5\n" + "por %%mm3, %%mm2\n" + "por %%mm5, %%mm4\n" + + /* set *dst */ + "movq %%mm2, %%mm3\n" + "punpckldq %%mm4, %%mm2\n" + "punpckhdq %%mm4, %%mm3\n" + "movq %%mm2, (%3)\n" + "movq %%mm3, 8(%3)\n" + + /* next */ + "addl $8, %0\n" + "addl $8, %1\n" + "addl $8, %2\n" + "addl $16, %3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +} + +/** + * Scale by a factor of 2 a row of pixels of 8 bits. + * This is a very fast MMX implementation. + * The implementation uses a combination of cmp/and/not operations to + * completly remove the need of conditional jumps. This trick give the + * major speed improvement. + * Also, using the 8 bytes MMX registers more than one pixel are computed + * at the same time. + * Before calling this function you must ensure that the currenct CPU supports + * the MMX instruction set. After calling it you must be sure to call the EMMS + * instruction before any floating-point operation. + * The pixels over the left and right borders are assumed of the same color of + * the pixels on the border. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. It must + * be at least 16 and a multiple of 8. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + if (count % 8 != 0 || count < 16) { + scale2x_8_def(dst0, dst1, src0, src1, src2, count); + } else { + assert(count >= 16); + assert(count % 8 == 0); + + scale2x_8_mmx_single(dst0, src0, src1, src2, count); + scale2x_8_mmx_single(dst1, src2, src1, src0, count); + } +} + +/** + * Scale by a factor of 2 a row of pixels of 16 bits. + * This function operates like scale2x_8_mmx() but for 16 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. It must + * be at least 8 and a multiple of 4. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count) +{ + if (count % 4 != 0 || count < 8) { + scale2x_16_def(dst0, dst1, src0, src1, src2, count); + } else { + assert(count >= 8); + assert(count % 4 == 0); + + scale2x_16_mmx_single(dst0, src0, src1, src2, count); + scale2x_16_mmx_single(dst1, src2, src1, src0, count); + } +} + +/** + * Scale by a factor of 2 a row of pixels of 32 bits. + * This function operates like scale2x_8_mmx() but for 32 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. It must + * be at least 4 and a multiple of 2. + * \param dst0 First destination row, double length in pixels. + * \param dst1 Second destination row, double length in pixels. + */ +void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count) +{ + if (count % 2 != 0 || count < 4) { + scale2x_32_def(dst0, dst1, src0, src1, src2, count); + } else { + assert(count >= 4); + assert(count % 2 == 0); + + scale2x_32_mmx_single(dst0, src0, src1, src2, count); + scale2x_32_mmx_single(dst1, src2, src1, src0, count); + } +} + +#endif + diff --git a/graphics/scaler/scale2x.h b/graphics/scaler/scale2x.h new file mode 100644 index 0000000000..2101790905 --- /dev/null +++ b/graphics/scaler/scale2x.h @@ -0,0 +1,56 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __SCALE2X_H +#define __SCALE2X_H + +#if defined(_MSC_VER) +#define __restrict__ +#endif + +typedef unsigned char scale2x_uint8; +typedef unsigned short scale2x_uint16; +typedef unsigned scale2x_uint32; + +void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count); +void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count); +void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count); + +#if defined(__GNUC__) && defined(__i386__) + +void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count); +void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count); +void scale2x_32_mmx(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count); + +/** + * End the use of the MMX instructions. + * This function must be called before using any floating-point operations. + */ +static inline void scale2x_mmx_emms(void) +{ + __asm__ __volatile__ ( + "emms" + ); +} + +#endif + +#endif + diff --git a/graphics/scaler/scale3x.cpp b/graphics/scaler/scale3x.cpp new file mode 100644 index 0000000000..22366d7cdb --- /dev/null +++ b/graphics/scaler/scale3x.cpp @@ -0,0 +1,238 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains a C and MMX implementation of the Scale2x effect. + * + * You can find an high level description of the effect at : + * + * http://scale2x.sourceforge.net/ + * + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + * - derivative works of the program are allowed. + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include "common/scaler/intern.h" + +#include "scale3x.h" + +#include + +/***************************************************************************/ +/* Scale3x C implementation */ + +static inline void scale3x_8_def_border(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0]; + dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0]; + dst[2] = src1[1] == src0[0] ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +static inline void scale3x_8_def_center(scale3x_uint8* __restrict__ dst, const scale3x_uint8* __restrict__ src0, const scale3x_uint8* __restrict__ src1, const scale3x_uint8* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0]; + dst[1] = src1[0]; + dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +static inline void scale3x_16_def_border(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0]; + dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0]; + dst[2] = src1[1] == src0[0] ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +static inline void scale3x_16_def_center(scale3x_uint16* __restrict__ dst, const scale3x_uint16* __restrict__ src0, const scale3x_uint16* __restrict__ src1, const scale3x_uint16* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0]; + dst[1] = src1[0]; + dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +static inline void scale3x_32_def_border(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = src1[-1] == src0[0] ? src1[-1] : src1[0]; + dst[1] = (src1[-1] == src0[0] && src1[0] != src0[1]) || (src1[1] == src0[0] && src1[0] != src0[-1]) ? src0[0] : src1[0]; + dst[2] = src1[1] == src0[0] ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +static inline void scale3x_32_def_center(scale3x_uint32* __restrict__ dst, const scale3x_uint32* __restrict__ src0, const scale3x_uint32* __restrict__ src1, const scale3x_uint32* __restrict__ src2, unsigned count) +{ + /* central pixels */ + while (count) { + if (src0[0] != src2[0] && src1[-1] != src1[1]) { + dst[0] = (src1[-1] == src0[0] && src1[0] != src2[-1]) || (src1[-1] == src2[0] && src1[0] != src0[-1]) ? src1[-1] : src1[0]; + dst[1] = src1[0]; + dst[2] = (src1[1] == src0[0] && src1[0] != src2[1]) || (src1[1] == src2[0] && src1[0] != src0[1]) ? src1[1] : src1[0]; + } else { + dst[0] = src1[0]; + dst[1] = src1[0]; + dst[2] = src1[0]; + } + + ++src0; + ++src1; + ++src2; + dst += 3; + --count; + } +} + +/** + * Scale by a factor of 3 a row of pixels of 8 bits. + * The function is implemented in C. + * The pixels over the left and right borders are assumed of the same color of + * the pixels on the border. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, triple length in pixels. + * \param dst1 Second destination row, triple length in pixels. + * \param dst2 Third destination row, triple length in pixels. + */ +void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count) +{ + scale3x_8_def_border(dst0, src0, src1, src2, count); + scale3x_8_def_center(dst1, src0, src1, src2, count); + scale3x_8_def_border(dst2, src2, src1, src0, count); +} + +/** + * Scale by a factor of 3 a row of pixels of 16 bits. + * This function operates like scale3x_8_def() but for 16 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, triple length in pixels. + * \param dst1 Second destination row, triple length in pixels. + * \param dst2 Third destination row, triple length in pixels. + */ +void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count) +{ + scale3x_16_def_border(dst0, src0, src1, src2, count); + scale3x_16_def_center(dst1, src0, src1, src2, count); + scale3x_16_def_border(dst2, src2, src1, src0, count); +} + +/** + * Scale by a factor of 3 a row of pixels of 32 bits. + * This function operates like scale3x_8_def() but for 32 bits pixels. + * \param src0 Pointer at the first pixel of the previous row. + * \param src1 Pointer at the first pixel of the current row. + * \param src2 Pointer at the first pixel of the next row. + * \param count Length in pixels of the src0, src1 and src2 rows. + * It must be at least 2. + * \param dst0 First destination row, triple length in pixels. + * \param dst1 Second destination row, triple length in pixels. + * \param dst2 Third destination row, triple length in pixels. + */ +void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count) +{ + scale3x_32_def_border(dst0, src0, src1, src2, count); + scale3x_32_def_center(dst1, src0, src1, src2, count); + scale3x_32_def_border(dst2, src2, src1, src0, count); +} + diff --git a/graphics/scaler/scale3x.h b/graphics/scaler/scale3x.h new file mode 100644 index 0000000000..3f24d884e1 --- /dev/null +++ b/graphics/scaler/scale3x.h @@ -0,0 +1,37 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __SCALE3X_H +#define __SCALE3X_H + +#if defined(_MSC_VER) +#define __restrict__ +#endif + +typedef unsigned char scale3x_uint8; +typedef unsigned short scale3x_uint16; +typedef unsigned scale3x_uint32; + +void scale3x_8_def(scale3x_uint8* dst0, scale3x_uint8* dst1, scale3x_uint8* dst2, const scale3x_uint8* src0, const scale3x_uint8* src1, const scale3x_uint8* src2, unsigned count); +void scale3x_16_def(scale3x_uint16* dst0, scale3x_uint16* dst1, scale3x_uint16* dst2, const scale3x_uint16* src0, const scale3x_uint16* src1, const scale3x_uint16* src2, unsigned count); +void scale3x_32_def(scale3x_uint32* dst0, scale3x_uint32* dst1, scale3x_uint32* dst2, const scale3x_uint32* src0, const scale3x_uint32* src1, const scale3x_uint32* src2, unsigned count); + +#endif + diff --git a/graphics/scaler/scalebit.cpp b/graphics/scaler/scalebit.cpp new file mode 100644 index 0000000000..71d9172534 --- /dev/null +++ b/graphics/scaler/scalebit.cpp @@ -0,0 +1,357 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2003 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains an example implementation of the Scale effect + * applyed to a generic bitmap. + * + * You can find an high level description of the effect at : + * + * http://scale2x.sourceforge.net/ + * + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + * - derivative works of the program are allowed. + */ + +#if defined(HAVE_CONFIG_H) +#include +#endif + +#include "common/scaler/intern.h" + +#include "scale2x.h" +#include "scale3x.h" + +#if defined(HAVE_ALLOCA_H) +#include +#endif + +#include +#include + +#define DST(bits, num) (scale2x_uint ## bits *)dst ## num +#define SRC(bits, num) (const scale2x_uint ## bits *)src ## num + +/** + * Apply the Scale2x effect on a group of rows. Used internally. + */ +static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row) +{ + switch (pixel) { +#if defined(__GNUC__) && defined(__i386__) + case 1 : scale2x_8_mmx(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break; + case 2 : scale2x_16_mmx(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break; + case 4 : scale2x_32_mmx(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break; +#else + case 1 : scale2x_8_def(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break; + case 2 : scale2x_16_def(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break; + case 4 : scale2x_32_def(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break; +#endif + } +} + +/** + * Apply the Scale3x effect on a group of rows. Used internally. + */ +static inline void stage_scale3x(void* dst0, void* dst1, void* dst2, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row) +{ + switch (pixel) { + case 1 : scale3x_8_def(DST(8,0), DST(8,1), DST(8,2), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break; + case 2 : scale3x_16_def(DST(16,0), DST(16,1), DST(16,2), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break; + case 4 : scale3x_32_def(DST(32,0), DST(32,1), DST(32,2), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break; + } +} + +/** + * Apply the Scale4x effect on a group of rows. Used internally. + */ +static inline void stage_scale4x(void* dst0, void* dst1, void* dst2, void* dst3, const void* src0, const void* src1, const void* src2, const void* src3, unsigned pixel, unsigned pixel_per_row) +{ + stage_scale2x(dst0, dst1, src0, src1, src2, pixel, 2 * pixel_per_row); + stage_scale2x(dst2, dst3, src1, src2, src3, pixel, 2 * pixel_per_row); +} + +#define SCDST(i) (dst+(i)*dst_slice) +#define SCSRC(i) (src+(i)*src_slice) +#define SCMID(i) (mid[(i)]) + +/** + * Apply the Scale2x effect on a bitmap. + * The destination bitmap is filled with the scaled version of the source bitmap. + * The source bitmap isn't modified. + * The destination bitmap must be manually allocated before calling the function, + * note that the resulting size is exactly 2x2 times the size of the source bitmap. + * \param void_dst Pointer at the first pixel of the destination bitmap. + * \param dst_slice Size in bytes of a destination bitmap row. + * \param void_src Pointer at the first pixel of the source bitmap. + * \param src_slice Size in bytes of a source bitmap row. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + */ +static void scale2x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height) +{ + unsigned char* dst = (unsigned char*)void_dst; + const unsigned char* src = (const unsigned char*)void_src; + unsigned count; + + assert(height >= 2); + + count = height; + + while (count) { + stage_scale2x(SCDST(0), SCDST(1), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width); + + dst = SCDST(2); + src = SCSRC(1); + + --count; + } + +#if defined(__GNUC__) && defined(__i386__) + scale2x_mmx_emms(); +#endif +} + +/** + * Apply the Scale32x effect on a bitmap. + * The destination bitmap is filled with the scaled version of the source bitmap. + * The source bitmap isn't modified. + * The destination bitmap must be manually allocated before calling the function, + * note that the resulting size is exactly 3x3 times the size of the source bitmap. + * \param void_dst Pointer at the first pixel of the destination bitmap. + * \param dst_slice Size in bytes of a destination bitmap row. + * \param void_src Pointer at the first pixel of the source bitmap. + * \param src_slice Size in bytes of a source bitmap row. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + */ +static void scale3x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height) +{ + unsigned char* dst = (unsigned char*)void_dst; + const unsigned char* src = (const unsigned char*)void_src; + unsigned count; + + assert(height >= 2); + + count = height; + + while (count) { + stage_scale3x(SCDST(0), SCDST(1), SCDST(2), SCSRC(0), SCSRC(1), SCSRC(2), pixel, width); + + dst = SCDST(3); + src = SCSRC(1); + + --count; + } +} + +/** + * Apply the Scale4x effect on a bitmap. + * The destination bitmap is filled with the scaled version of the source bitmap. + * The source bitmap isn't modified. + * The destination bitmap must be manually allocated before calling the function, + * note that the resulting size is exactly 4x4 times the size of the source bitmap. + * \note This function requires also a small buffer bitmap used internally to store + * intermediate results. This bitmap must have at least an horizontal size in bytes of 2*width*pixel, + * and a vertical size of 6 rows. The memory of this buffer must not be allocated + * in video memory because it's also read and not only written. Generally + * a heap (malloc) or a stack (alloca) buffer is the best choices. + * \param void_dst Pointer at the first pixel of the destination bitmap. + * \param dst_slice Size in bytes of a destination bitmap row. + * \param void_mid Pointer at the first pixel of the buffer bitmap. + * \param mid_slice Size in bytes of a buffer bitmap row. + * \param void_src Pointer at the first pixel of the source bitmap. + * \param src_slice Size in bytes of a source bitmap row. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + */ +static void scale4x_buf(void* void_dst, unsigned dst_slice, void* void_mid, unsigned mid_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height) +{ + unsigned char* dst = (unsigned char*)void_dst; + const unsigned char* src = (const unsigned char*)void_src; + unsigned count; + unsigned char* mid[6]; + + assert(height >= 4); + + count = height; + + /* set the 6 buffer pointers */ + mid[0] = (unsigned char*)void_mid; + mid[1] = mid[0] + mid_slice; + mid[2] = mid[1] + mid_slice; + mid[3] = mid[2] + mid_slice; + mid[4] = mid[3] + mid_slice; + mid[5] = mid[4] + mid_slice; + + while (count) { + unsigned char* tmp; + + stage_scale2x(SCMID(4), SCMID(5), SCSRC(2), SCSRC(3), SCSRC(4), pixel, width); + stage_scale4x(SCDST(0), SCDST(1), SCDST(2), SCDST(3), SCMID(1), SCMID(2), SCMID(3), SCMID(4), pixel, width); + + dst = SCDST(4); + src = SCSRC(1); + + tmp = SCMID(0); /* shift by 2 position */ + SCMID(0) = SCMID(2); + SCMID(2) = SCMID(4); + SCMID(4) = tmp; + tmp = SCMID(1); + SCMID(1) = SCMID(3); + SCMID(3) = SCMID(5); + SCMID(5) = tmp; + + --count; + } + +#if defined(__GNUC__) && defined(__i386__) + scale2x_mmx_emms(); +#endif +} + +/** + * Apply the Scale4x effect on a bitmap. + * The destination bitmap is filled with the scaled version of the source bitmap. + * The source bitmap isn't modified. + * The destination bitmap must be manually allocated before calling the function, + * note that the resulting size is exactly 4x4 times the size of the source bitmap. + * \note This function operates like ::scale4x_buf() but the intermediate buffer is + * automatically allocated in the stack. + * \param void_dst Pointer at the first pixel of the destination bitmap. + * \param dst_slice Size in bytes of a destination bitmap row. + * \param void_src Pointer at the first pixel of the source bitmap. + * \param src_slice Size in bytes of a source bitmap row. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + */ +static void scale4x(void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height) +{ + unsigned mid_slice; + void* mid; + + mid_slice = 2 * pixel * width; /* required space for 1 row buffer */ + + mid_slice = (mid_slice + 0x7) & ~0x7; /* align to 8 bytes */ + +#if defined(HAVE_ALLOCA) + mid = alloca(6 * mid_slice); /* allocate space for 6 row buffers */ + + assert(mid != 0); /* alloca should never fails */ +#else + mid = malloc(6 * mid_slice); /* allocate space for 6 row buffers */ + + if (!mid) + return; +#endif + + scale4x_buf(void_dst, dst_slice, mid, mid_slice, void_src, src_slice, pixel, width, height); + +#if !defined(HAVE_ALLOCA) + free(mid); +#endif +} + +/** + * Check if the scale implementation is applicable at the given arguments. + * \param scale Scale factor. 2, 3 or 4. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + * \return + * - -1 on precondition violated. + * - 0 on success. + */ +int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height) +{ + if (scale != 2 && scale != 3 && scale != 4) + return -1; + + if (pixel != 1 && pixel != 2 && pixel != 4) + return -1; + + switch (scale) { + case 2 : + case 3 : + if (height < 2) + return -1; + break; + case 4 : + if (height < 4) + return -1; + break; + } + +#if defined(__GNUC__) && defined(__i386__) + switch (scale) { + case 2 : + case 4 : + if (width < (16 / pixel)) + return -1; + if (width % (8 / pixel) != 0) + return -1; + break; + case 3 : + if (width < 2) + return -1; + break; + } +#else + if (width < 2) + return -1; +#endif + + return 0; +} + +/** + * Apply the Scale effect on a bitmap. + * This function is simply a common interface for ::scale2x(), ::scale3x() and ::scale4x(). + * \param scale Scale factor. 2, 3 or 4. + * \param void_dst Pointer at the first pixel of the destination bitmap. + * \param dst_slice Size in bytes of a destination bitmap row. + * \param void_src Pointer at the first pixel of the source bitmap. + * \param src_slice Size in bytes of a source bitmap row. + * \param pixel Bytes per pixel of the source and destination bitmap. + * \param width Horizontal size in pixels of the source bitmap. + * \param height Vertical size in pixels of the source bitmap. + */ +void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height) +{ + switch (scale) { + case 2 : + scale2x(void_dst, dst_slice, void_src, src_slice, pixel, width, height); + break; + case 3 : + scale3x(void_dst, dst_slice, void_src, src_slice, pixel, width, height); + break; + case 4 : + scale4x(void_dst, dst_slice, void_src, src_slice, pixel, width, height); + break; + } +} + diff --git a/graphics/scaler/scalebit.h b/graphics/scaler/scalebit.h new file mode 100644 index 0000000000..dd46883f97 --- /dev/null +++ b/graphics/scaler/scalebit.h @@ -0,0 +1,43 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2003 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains an example implementation of the Scale effect + * applyed to a generic bitmap. + * + * You can find an high level description of the effect at : + * + * http://scale2x.sourceforge.net/ + * + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + * - derivative works of the program are allowed. + */ + +#ifndef __SCALEBIT_H +#define __SCALEBIT_H + +int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned height); +void scale(unsigned scale, void* void_dst, unsigned dst_slice, const void* void_src, unsigned src_slice, unsigned pixel, unsigned width, unsigned height); + +#endif + diff --git a/graphics/scaler/thumbnail.cpp b/graphics/scaler/thumbnail.cpp new file mode 100644 index 0000000000..25e00363f1 --- /dev/null +++ b/graphics/scaler/thumbnail.cpp @@ -0,0 +1,194 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001 Ludvig Strigeus + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +#include "common/stdafx.h" +#include "common/scummsys.h" +#include "common/system.h" + +#include "common/scaler/intern.h" + +template +uint16 quadBlockInterpolate(const uint8* src, uint32 srcPitch) { + uint16 colorx1y1 = *(((const uint16*)src)); + uint16 colorx2y1 = *(((const uint16*)src) + 1); + + uint16 colorx1y2 = *(((const uint16*)(src + srcPitch))); + uint16 colorx2y2 = *(((const uint16*)(src + srcPitch)) + 1); + + return Q_INTERPOLATE(colorx1y1, colorx2y1, colorx1y2, colorx2y2); +} + +template +void createThumbnail_2(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) { + assert(width % 2 == 0); + assert(height % 2 == 0); + for (int y = 0; y < height; y += 2) { + for (int x = 0; x < width; x += 2, dstPtr += 2) { + *((uint16*)dstPtr) = quadBlockInterpolate(src + 2 * x, srcPitch); + } + dstPtr += (dstPitch - 2 * width / 2); + src += 2 * srcPitch; + } +} + +template +void createThumbnail_4(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) { + assert(width % 4 == 0); + assert(height % 4 == 0); + for (int y = 0; y < height; y += 4) { + for (int x = 0; x < width; x += 4, dstPtr += 2) { + uint16 upleft = quadBlockInterpolate(src + 2 * x, srcPitch); + uint16 upright = quadBlockInterpolate(src + 2 * (x + 2), srcPitch); + uint16 downleft = quadBlockInterpolate(src + srcPitch * 2 + 2 * x, srcPitch); + uint16 downright = quadBlockInterpolate(src + srcPitch * 2 + 2 * (x + 2), srcPitch); + + *((uint16*)dstPtr) = Q_INTERPOLATE(upleft, upright, downleft, downright); + } + dstPtr += (dstPitch - 2 * width / 4); + src += 4 * srcPitch; + } +} + +#include "common/scaler.h" + +void createThumbnail(const uint8* src, uint32 srcPitch, uint8* dstPtr, uint32 dstPitch, int width, int height) { + // only 1/2 and 1/4 downscale supported + if (width != 320 && width != 640) + return; + + int downScaleMode = (width == 320) ? 2 : 4; + + if (downScaleMode == 2) { + if (gBitFormat == 565) + createThumbnail_2<565>(src, srcPitch, dstPtr, dstPitch, width, height); + else if (gBitFormat == 555) + createThumbnail_2<555>(src, srcPitch, dstPtr, dstPitch, width, height); + } else if (downScaleMode == 4) { + if (gBitFormat == 565) + createThumbnail_4<565>(src, srcPitch, dstPtr, dstPitch, width, height); + else if (gBitFormat == 555) + createThumbnail_4<555>(src, srcPitch, dstPtr, dstPitch, width, height); + } +} + + +/** + * Copies the current screen contents to a new surface, using RGB565 format. + * WARNING: surf->free() must be called by the user to avoid leaking. + * + * @param surf the surfce to store the data in it + */ +static bool grabScreen565(Graphics::Surface *surf) { + Graphics::Surface screen; + if (!g_system->grabRawScreen(&screen)) + return false; + + assert(screen.bytesPerPixel == 1 && screen.pixels != 0); + + byte palette[256 * 4]; + g_system->grabPalette(&palette[0], 0, 256); + + surf->create(screen.w, screen.h, 2); + + for (uint y = 0; y < screen.h; ++y) { + for (uint x = 0; x < screen.w; ++x) { + byte r, g, b; + r = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4]; + g = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4 + 1]; + b = palette[((uint8*)screen.pixels)[y * screen.pitch + x] * 4 + 2]; + + ((uint16*)surf->pixels)[y * surf->w + x] = (((r >> 3) & 0x1F) << 11) | (((g >> 2) & 0x3F) << 5) | ((b >> 3) & 0x1F); + } + } + + screen.free(); + return true; +} + +bool createThumbnailFromScreen(Graphics::Surface* surf) { + assert(surf); + + int screenWidth = g_system->getWidth(); + int screenHeight = g_system->getHeight(); + + Graphics::Surface screen; + + if (!grabScreen565(&screen)) + return false; + + uint16 width = screenWidth; + + if (screenWidth < 320) { + // Special case to handle MM NES (uses a screen width of 256) + width = 320; + + // center MM NES screen + Graphics::Surface newscreen; + newscreen.create(width, screen.h, screen.bytesPerPixel); + + uint8 *dst = (uint8*)newscreen.getBasePtr((320 - screenWidth) / 2, 0); + uint8 *src = (uint8*)screen.getBasePtr(0, 0); + uint16 height = screen.h; + + while (height--) { + memcpy(dst, src, screen.pitch); + dst += newscreen.pitch; + src += screen.pitch; + } + + screen.free(); + screen = newscreen; + } else if (screenWidth == 720) { + // Special case to handle Hercules mode + width = 640; + screenHeight = 400; + + // cut off menu and so on.. + Graphics::Surface newscreen; + newscreen.create(width, 400, screen.bytesPerPixel); + + uint8 *dst = (uint8*)newscreen.getBasePtr(0, (400 - 240) / 2); + uint8 *src = (uint8*)screen.getBasePtr(41, 28); + + for (int y = 0; y < 240; ++y) { + memcpy(dst, src, 640 * screen.bytesPerPixel); + dst += newscreen.pitch; + src += screen.pitch; + } + + screen.free(); + screen = newscreen; + } + + uint16 newHeight = !(screenHeight % 240) ? kThumbnailHeight2 : kThumbnailHeight1; + + int gBitFormatBackUp = gBitFormat; + gBitFormat = 565; + surf->create(kThumbnailWidth, newHeight, sizeof(uint16)); + createThumbnail((const uint8*)screen.pixels, width * sizeof(uint16), (uint8*)surf->pixels, surf->pitch, width, screenHeight); + gBitFormat = gBitFormatBackUp; + + screen.free(); + + return true; +} -- cgit v1.2.3