aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugene Sandulenko2004-05-21 02:08:48 +0000
committerEugene Sandulenko2004-05-21 02:08:48 +0000
commitd33b24036e7a0753a6de8a416c5c649f7ba84180 (patch)
tree03ce5d46129d6731a81eb894a4ddfdd3902aab00
parent5464e0951bf5145faa6f4dbe54d012da2364ce27 (diff)
downloadscummvm-rg350-d33b24036e7a0753a6de8a416c5c649f7ba84180.tar.gz
scummvm-rg350-d33b24036e7a0753a6de8a416c5c649f7ba84180.tar.bz2
scummvm-rg350-d33b24036e7a0753a6de8a416c5c649f7ba84180.zip
Added assembly versions of HQ2x and HQ3x scalers.
svn-id: r13844
-rw-r--r--Makefile.common7
-rw-r--r--NEWS1
-rw-r--r--README5
-rw-r--r--common/module.mk6
-rw-r--r--common/scaler.cpp25
-rw-r--r--common/scaler/hq2x.cpp23
-rw-r--r--common/scaler/hq2x_i386.asm1941
-rw-r--r--common/scaler/hq3x.cpp23
-rw-r--r--common/scaler/hq3x_i386.asm2533
-rw-r--r--common/scaler/intern.h2
-rwxr-xr-xconfigure83
-rw-r--r--doc/09.tex5
12 files changed, 4645 insertions, 9 deletions
diff --git a/Makefile.common b/Makefile.common
index 78968451bb..c4f4f9b1e0 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -158,6 +158,13 @@ else
$(CXX) -Wp,-MMD,"$(*D)/$(DEPDIR)/$(*F).d",-MQ,"$@",-MP $(CXXFLAGS) $(CPPFLAGS) -c $(<) -o $*.o
endif
+ifdef HAVE_NASM
+.SUFFIXES: .asm
+
+.asm.o:
+ $(NASM) -O1 $(NASMFLAGS) -g -o $*.o $(<)
+endif
+
# Include the dependency tracking files. We add /dev/null at the end
# of the list to avoid a warning/error if no .d file exist
-include $(wildcard $(addsuffix /*.d,$(DEPDIRS))) /dev/null
diff --git a/NEWS b/NEWS
index a16dd64613..752a1fae46 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ For a more comprehensive changelog for the latest experimental CVS code, see:
- Added an 'On Screen Display' to the SDL backend
- Rewrote the backend API partially
- Comments in config files are preserved now
+ - Added MMX i386 assembler HQ2x and HQ3x scalers
SCUMM:
- Added graphics decoders for 3DO Humongous Entertainment games
diff --git a/README b/README
index d659b5f8cb..0be4baec96 100644
--- a/README
+++ b/README
@@ -999,6 +999,11 @@ Visual C++ are supported. If you wish to use MP3-compressed CD tracks or
USE_MAD. Tools for compressing .SOU files to .SO3 files can be
found in the 'tools' CVS module, or in the 'scummvm-tools' package.
+Some parts of ScummVM, particularly scalers, have highly optimized versions
+written in assembler. If you wish to use this option, you will need to install
+nasm assembler (see http://nasm.sf.net). Note, that currently we have only x86
+MMX optimized versions, and they will not compile on other processors.
+
On Win9x/NT/XP you can define USE_WINDBG and attach WinDbg to browse debug
messages (see http://www.sysinternals.com/ntw2k/freeware/debugview.shtml).
diff --git a/common/module.mk b/common/module.mk
index 715b735882..6d9d8a8079 100644
--- a/common/module.mk
+++ b/common/module.mk
@@ -16,6 +16,12 @@ MODULE_OBJS := \
common/scaler/hq2x.o \
common/scaler/hq3x.o
+ifdef HAVE_NASM
+MODULE_OBJS += \
+ common/scaler/hq2x_i386.o \
+ common/scaler/hq3x_i386.o
+endif
+
MODULE_DIRS += \
common \
common/scaler
diff --git a/common/scaler.cpp b/common/scaler.cpp
index bf5da84ca1..92d557fc7f 100644
--- a/common/scaler.cpp
+++ b/common/scaler.cpp
@@ -27,7 +27,22 @@
int gBitFormat = 565;
// RGB-to-YUV lookup table
-int RGBtoYUV[65536];
+extern "C" {
+
+#ifdef USE_NASM
+// NOTE: if your compiler uses different mangled names, add another
+// condition here
+
+#ifndef _MSC_VER
+#define RGBtoYUV _RGBtoYUV
+#define LUT16to32 _LUT16to32
+#endif
+
+#endif
+
+uint RGBtoYUV[65536];
+uint LUT16to32[65536];
+}
static const uint16 dotmatrix_565[16] = {
0x01E0, 0x0007, 0x3800, 0x0000,
@@ -63,6 +78,10 @@ void InitLUT(uint32 BitFormat) {
int Y, u, v;
int gInc, gShift;
+ for (int i = 0; i < 65536; i++) {
+ LUT16to32[i] = ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3);
+ }
+
if (BitFormat == 565) {
gInc = 256 >> 6;
gShift = 6 - 3;
@@ -76,8 +95,8 @@ void InitLUT(uint32 BitFormat) {
for (b = 0; b < 256; b += 8) {
Y = (r + g + b) >> 2;
u = 128 + ((r - b) >> 2);
- v = 128 + ((-r + 2 * g -b) >> 3);
- RGBtoYUV[ (r << (5+gShift)) + (g << gShift) + (b >> 3) ] = (Y << 16) + (u << 8) + v;
+ v = 128 + ((-r + 2 * g - b) >> 3);
+ RGBtoYUV[ (r << (5 + gShift)) + (g << gShift) + (b >> 3) ] = (Y << 16) + (u << 8) + v;
}
}
}
diff --git a/common/scaler/hq2x.cpp b/common/scaler/hq2x.cpp
index 98790e1383..ab4d7b371f 100644
--- a/common/scaler/hq2x.cpp
+++ b/common/scaler/hq2x.cpp
@@ -22,6 +22,25 @@
#include "common/scaler/intern.h"
+#ifdef USE_NASM
+// Assembly version of HQ2x
+
+extern "C" {
+
+#ifndef _MSC_VER
+#define hq2x_16 _hq2x_16
+#endif
+
+void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+
+}
+
+void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+}
+
+#else
+
#ifdef HAS_ALTIVEC
#include <sys/sysctl.h>
@@ -120,7 +139,6 @@ void HQ2x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
#undef bitFormat
#endif
-
void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
#ifdef HAS_ALTIVEC
if (isAltiVecAvailable()) {
@@ -131,8 +149,11 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
return;
}
#endif
+
if (gBitFormat == 565)
HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
else
HQ2x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
}
+
+#endif //Assembly version
diff --git a/common/scaler/hq2x_i386.asm b/common/scaler/hq2x_i386.asm
new file mode 100644
index 0000000000..64beb4ec62
--- /dev/null
+++ b/common/scaler/hq2x_i386.asm
@@ -0,0 +1,1941 @@
+;hq2x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq2x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter resd 1
+cross resd 1
+nextline resd 1
+prevline resd 1
+w1 resd 1
+w2 resd 1
+w3 resd 1
+w4 resd 1
+w5 resd 1
+w6 resd 1
+w7 resd 1
+w8 resd 1
+w9 resd 1
+
+SECTION .data
+
+reg_blank dd 0,0
+const3 dd 0x00030003,0x00000003
+const5 dd 0x00050005,0x00000005
+const6 dd 0x00060006,0x00000006
+const14 dd 0x000E000E,0x0000000E
+threshold dd 0x00300706,0x00000000
+zerolowbits dd 0xF7DEF7DE
+moduloSrc dd 0
+moduloDst dd 0
+
+SECTION .text
+
+%macro TestDiff 2
+ xor ecx,ecx
+ mov edx,[%1]
+ cmp edx,[%2]
+ je %%fin
+ mov ecx,_RGBtoYUV
+ movd mm1,[ecx+edx*4]
+ movq mm5,mm1
+ mov edx,[%2]
+ movd mm2,[ecx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ jmp %%fin
+%%same:
+ %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ jmp %%fin
+%%same:
+ %5
+ %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ jmp %%fin
+%%same:
+ %6
+ %7
+ %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ %6
+ jmp %%fin
+%%same:
+ %7
+ %8
+ %9
+ %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp2 4
+ mov edx,%3
+ mov ecx,%4
+ cmp edx,ecx
+ je %%fin1
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+%%fin1
+ mov edx,%2
+ cmp edx,ecx
+ je %%fin2
+ and ecx,[zerolowbits]
+ and edx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin2
+ mov %1,dx
+%endmacro
+
+%macro Interp5 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp6 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const5]
+ psllw mm2, 1
+ paddw mm1, mm3
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp7 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const6]
+ paddw mm2, mm3
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp9 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ psllw mm1, 1
+ paddw mm2, mm3
+ pmullw mm2, [const3]
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp10 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ pmullw mm1, [const14]
+ paddw mm2, mm3
+ paddw mm1, mm2
+ psrlw mm1, 6
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro PIXEL00_0 0
+ mov [edi],ax
+%endmacro
+
+%macro PIXEL00_10 0
+ Interp1 [edi],eax,[w1]
+%endmacro
+
+%macro PIXEL00_11 0
+ Interp1 [edi],eax,[w4]
+%endmacro
+
+%macro PIXEL00_12 0
+ Interp1 [edi],eax,[w2]
+%endmacro
+
+%macro PIXEL00_20 0
+ Interp2 [edi],eax,[w4],[w2]
+%endmacro
+
+%macro PIXEL00_21 0
+ Interp2 [edi],eax,[w1],[w2]
+%endmacro
+
+%macro PIXEL00_22 0
+ Interp2 [edi],eax,[w1],[w4]
+%endmacro
+
+%macro PIXEL00_60 0
+ Interp6 [edi],[w2],[w4]
+%endmacro
+
+%macro PIXEL00_61 0
+ Interp6 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_70 0
+ Interp7 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_90 0
+ Interp9 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_100 0
+ Interp10 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL01_0 0
+ mov [edi+2],ax
+%endmacro
+
+%macro PIXEL01_10 0
+ Interp1 [edi+2],eax,[w3]
+%endmacro
+
+%macro PIXEL01_11 0
+ Interp1 [edi+2],eax,[w2]
+%endmacro
+
+%macro PIXEL01_12 0
+ Interp1 [edi+2],eax,[w6]
+%endmacro
+
+%macro PIXEL01_20 0
+ Interp2 [edi+2],eax,[w2],[w6]
+%endmacro
+
+%macro PIXEL01_21 0
+ Interp2 [edi+2],eax,[w3],[w6]
+%endmacro
+
+%macro PIXEL01_22 0
+ Interp2 [edi+2],eax,[w3],[w2]
+%endmacro
+
+%macro PIXEL01_60 0
+ Interp6 [edi+2],[w6],[w2]
+%endmacro
+
+%macro PIXEL01_61 0
+ Interp6 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_70 0
+ Interp7 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_90 0
+ Interp9 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL01_100 0
+ Interp10 [edi+2],[w2],[w6]
+%endmacro
+
+%macro PIXEL10_0 0
+ mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL10_10 0
+ Interp1 [edi+ebx],eax,[w7]
+%endmacro
+
+%macro PIXEL10_11 0
+ Interp1 [edi+ebx],eax,[w8]
+%endmacro
+
+%macro PIXEL10_12 0
+ Interp1 [edi+ebx],eax,[w4]
+%endmacro
+
+%macro PIXEL10_20 0
+ Interp2 [edi+ebx],eax,[w8],[w4]
+%endmacro
+
+%macro PIXEL10_21 0
+ Interp2 [edi+ebx],eax,[w7],[w4]
+%endmacro
+
+%macro PIXEL10_22 0
+ Interp2 [edi+ebx],eax,[w7],[w8]
+%endmacro
+
+%macro PIXEL10_60 0
+ Interp6 [edi+ebx],[w4],[w8]
+%endmacro
+
+%macro PIXEL10_61 0
+ Interp6 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_70 0
+ Interp7 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_90 0
+ Interp9 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL10_100 0
+ Interp10 [edi+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL11_0 0
+ mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL11_10 0
+ Interp1 [edi+ebx+2],eax,[w9]
+%endmacro
+
+%macro PIXEL11_11 0
+ Interp1 [edi+ebx+2],eax,[w6]
+%endmacro
+
+%macro PIXEL11_12 0
+ Interp1 [edi+ebx+2],eax,[w8]
+%endmacro
+
+%macro PIXEL11_20 0
+ Interp2 [edi+ebx+2],eax,[w6],[w8]
+%endmacro
+
+%macro PIXEL11_21 0
+ Interp2 [edi+ebx+2],eax,[w9],[w8]
+%endmacro
+
+%macro PIXEL11_22 0
+ Interp2 [edi+ebx+2],eax,[w9],[w6]
+%endmacro
+
+%macro PIXEL11_60 0
+ Interp6 [edi+ebx+2],[w8],[w6]
+%endmacro
+
+%macro PIXEL11_61 0
+ Interp6 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_70 0
+ Interp7 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_90 0
+ Interp9 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+%macro PIXEL11_100 0
+ Interp10 [edi+ebx+2],[w6],[w8]
+%endmacro
+
+inbuffer equ 8
+outbuffer equ 12
+Xres equ 16
+Yres equ 20
+srcPitch equ 24
+dstPitch equ 28
+
+_hq2x_16:
+ push ebp
+ mov ebp,esp
+ pushad
+
+ mov esi,[ebp+inbuffer]
+ mov edi,[ebp+outbuffer]
+ mov edx,[ebp+Yres]
+ mov [linesleft],edx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+dstPitch]
+ mov dword[moduloDst],ebx
+ sub dword[moduloDst],ecx
+ shl dword[moduloDst],1
+ mov ebx,[ebp+srcPitch]
+ mov dword[prevline],0
+ mov dword[nextline],ebx
+ mov dword[moduloSrc],ebx
+ sub dword[moduloSrc],ecx
+.loopy
+ mov ecx,[ebp+Xres]
+ sub ecx,2 ; x={Xres-2, Xres-1} are special cases.
+ mov dword[xcounter],ecx
+ ; x=0 - special case
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx]
+ movq mm6,[esi]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ mov [w2],edx
+ shr eax,16
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ mov [w5],edx
+ shr eax,16
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ mov [w8],edx
+ shr eax,16
+ mov [w9],eax
+ jmp .flags
+.loopx
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-2]
+ movq mm6,[esi-2]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-2]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w3],edx
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ movzx edx,ax
+ mov [w6],edx
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ movzx edx,ax
+ mov [w9],edx
+.flags
+ mov ebx,_RGBtoYUV
+ mov eax,[w5]
+ xor ecx,ecx
+ movd mm5,[ebx+eax*4]
+ mov dword[cross],0
+
+ mov edx,[w2]
+ cmp eax,edx
+ je .noflag2
+ or dword[cross],1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag2
+ or ecx,2
+.noflag2
+ mov edx,[w4]
+ cmp eax,edx
+ je .noflag4
+ or dword[cross],2
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag4
+ or ecx,8
+.noflag4
+ mov edx,[w6]
+ cmp eax,edx
+ je .noflag6
+ or dword[cross],4
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag6
+ or ecx,16
+.noflag6
+ mov edx,[w8]
+ cmp eax,edx
+ je .noflag8
+ or dword[cross],8
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag8
+ or ecx,64
+.noflag8
+ test ecx,ecx
+ jnz .testflag1
+ mov ecx,[cross]
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable2+ecx*4]
+.testflag1
+ mov edx,[w1]
+ cmp eax,edx
+ je .noflag1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag1
+ or ecx,1
+.noflag1
+ mov edx,[w3]
+ cmp eax,edx
+ je .noflag3
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag3
+ or ecx,4
+.noflag3
+ mov edx,[w7]
+ cmp eax,edx
+ je .noflag7
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag7
+ or ecx,32
+.noflag7
+ mov edx,[w9]
+ cmp eax,edx
+ je .noflag9
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag9
+ or ecx,128
+.noflag9
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+ PIXEL00_21
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag18
+..@flag50
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_20
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag80
+..@flag81
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_20
+ jmp .loopx_end
+..@flag72
+..@flag76
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag10
+..@flag138
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag66
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag24
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag22
+..@flag54
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag208
+..@flag209
+ PIXEL00_20
+ PIXEL01_22
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag104
+..@flag108
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag11
+..@flag139
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag19
+..@flag51
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_10,PIXEL00_60,PIXEL01_90
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag146
+..@flag178
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_20
+ jmp .loopx_end
+..@flag84
+..@flag85
+ PIXEL00_20
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_10,PIXEL01_60,PIXEL11_90
+ PIXEL10_21
+ jmp .loopx_end
+..@flag112
+..@flag113
+ PIXEL00_20
+ PIXEL01_22
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_10,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag200
+..@flag204
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag73
+..@flag77
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_10,PIXEL00_61,PIXEL10_90
+ PIXEL01_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag42
+..@flag170
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_21
+ PIXEL11_20
+ jmp .loopx_end
+..@flag14
+..@flag142
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag67
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag70
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag28
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag152
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag194
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag98
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag56
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag25
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag26
+..@flag31
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag82
+..@flag214
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag88
+..@flag248
+ PIXEL00_21
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag74
+..@flag107
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag27
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag86
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ PIXEL11_10
+ jmp .loopx_end
+..@flag216
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag106
+ PIXEL00_10
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag30
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag210
+ PIXEL00_22
+ PIXEL01_10
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag120
+ PIXEL00_21
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag75
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_22
+ jmp .loopx_end
+..@flag29
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_21
+ jmp .loopx_end
+..@flag198
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag184
+ PIXEL00_21
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag99
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag57
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag71
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_22
+ jmp .loopx_end
+..@flag156
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag226
+ PIXEL00_22
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag60
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag195
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag102
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag153
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag58
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag83
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag92
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag202
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag78
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_22
+ jmp .loopx_end
+..@flag154
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag114
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag89
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag90
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag55
+..@flag23
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+ PIXEL10_20
+ PIXEL11_21
+ jmp .loopx_end
+..@flag182
+..@flag150
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_20
+ jmp .loopx_end
+..@flag213
+..@flag212
+ PIXEL00_20
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+ PIXEL10_21
+ jmp .loopx_end
+..@flag241
+..@flag240
+ PIXEL00_20
+ PIXEL01_22
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag236
+..@flag232
+ PIXEL00_21
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag109
+..@flag105
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+ PIXEL01_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag171
+..@flag43
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_21
+ PIXEL11_20
+ jmp .loopx_end
+..@flag143
+..@flag15
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_22
+ PIXEL11_20
+ jmp .loopx_end
+..@flag124
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag203
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ PIXEL10_10
+ PIXEL11_11
+ jmp .loopx_end
+..@flag62
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag211
+ PIXEL00_11
+ PIXEL01_10
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag118
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_12
+ PIXEL11_10
+ jmp .loopx_end
+..@flag217
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag110
+ PIXEL00_10
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag155
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag188
+ PIXEL00_21
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag185
+ PIXEL00_12
+ PIXEL01_22
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag61
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag157
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag103
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_22
+ jmp .loopx_end
+..@flag227
+ PIXEL00_11
+ PIXEL01_21
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag230
+ PIXEL00_22
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag199
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_21
+ PIXEL11_11
+ jmp .loopx_end
+..@flag220
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag158
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag234
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_11
+ jmp .loopx_end
+..@flag242
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag59
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag121
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag87
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag79
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_22
+ jmp .loopx_end
+..@flag122
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag94
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag218
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag91
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag229
+ PIXEL00_20
+ PIXEL01_20
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag167
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_20
+ PIXEL11_20
+ jmp .loopx_end
+..@flag173
+ PIXEL00_12
+ PIXEL01_20
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag181
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag186
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag115
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag93
+ PIXEL00_12
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag206
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag205
+..@flag201
+ PIXEL00_12
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+ PIXEL11_11
+ jmp .loopx_end
+..@flag174
+..@flag46
+ DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag179
+..@flag147
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag117
+..@flag116
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+ jmp .loopx_end
+..@flag189
+ PIXEL00_12
+ PIXEL01_11
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag231
+ PIXEL00_11
+ PIXEL01_12
+ PIXEL10_12
+ PIXEL11_11
+ jmp .loopx_end
+..@flag126
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag219
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag125
+ DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+ PIXEL01_11
+ PIXEL11_10
+ jmp .loopx_end
+..@flag221
+ PIXEL00_12
+ DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+ PIXEL10_10
+ jmp .loopx_end
+..@flag207
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+ PIXEL10_10
+ PIXEL11_11
+ jmp .loopx_end
+..@flag238
+ PIXEL00_10
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+ jmp .loopx_end
+..@flag190
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+ PIXEL10_11
+ jmp .loopx_end
+..@flag187
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+ PIXEL01_10
+ PIXEL11_12
+ jmp .loopx_end
+..@flag243
+ PIXEL00_11
+ PIXEL01_10
+ DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+ jmp .loopx_end
+..@flag119
+ DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+ PIXEL10_12
+ PIXEL11_10
+ jmp .loopx_end
+..@flag237
+..@flag233
+ PIXEL00_12
+ PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag175
+..@flag47
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ PIXEL10_11
+ PIXEL11_20
+ jmp .loopx_end
+..@flag183
+..@flag151
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_20
+ PIXEL11_12
+ jmp .loopx_end
+..@flag245
+..@flag244
+ PIXEL00_20
+ PIXEL01_11
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag250
+ PIXEL00_10
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag123
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag95
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_10
+ PIXEL11_10
+ jmp .loopx_end
+..@flag222
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag252
+ PIXEL00_21
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag249
+ PIXEL00_12
+ PIXEL01_22
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag235
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_21
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag111
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_22
+ jmp .loopx_end
+..@flag63
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_11
+ PIXEL11_21
+ jmp .loopx_end
+..@flag159
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_22
+ PIXEL11_12
+ jmp .loopx_end
+..@flag215
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_21
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag246
+ PIXEL00_22
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag254
+ PIXEL00_10
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag253
+ PIXEL00_12
+ PIXEL01_11
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag251
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ PIXEL01_10
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag239
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ PIXEL01_12
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ PIXEL11_11
+ jmp .loopx_end
+..@flag127
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+ PIXEL11_10
+ jmp .loopx_end
+..@flag191
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_11
+ PIXEL11_12
+ jmp .loopx_end
+..@flag223
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_10
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+ jmp .loopx_end
+..@flag247
+ PIXEL00_11
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ PIXEL10_12
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+..@flag255
+ DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+ DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+ DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+ DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+ jmp .loopx_end
+
+
+..@cross0
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross1
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w2]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov ecx,edx
+ shl edx,16
+ or edx,ecx
+ mov [edi],edx
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross2
+ shl eax,16
+ mov ecx,[w4]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross4
+ mov ecx,[w6]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ shl edx,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+ebx],eax
+ jmp .loopx_end
+..@cross8
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w8]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov ecx,edx
+ shl edx,16
+ or edx,ecx
+ mov [edi],eax
+ mov [edi+ebx],edx
+ jmp .loopx_end
+
+.loopx_end
+ add esi,2
+ add edi,4
+ dec dword[xcounter]
+ jle .xres_2
+ jmp .loopx
+.xres_2
+ ; x=Xres-2 - special case
+ jl .xres_1
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-4]
+ movq mm6,[esi-4]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-4]
+ psrlq mm5,16
+ psrlq mm6,16
+ psrlq mm7,16
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ mov [w9],eax
+ jmp .flags
+.xres_1
+ cmp dword[xcounter],-1
+ jl .nexty
+ ; x=Xres-1 - special case
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-6]
+ movq mm6,[esi-6]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-6]
+ psrlq mm5,32
+ psrlq mm6,32
+ psrlq mm7,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ mov [w9],eax
+ jmp .flags
+.nexty
+ add esi,dword[moduloSrc]
+ add edi,dword[moduloDst]
+ dec dword[linesleft]
+ jz .fin
+ mov ebx,[ebp+srcPitch]
+ cmp dword[linesleft],1
+ je .lastline
+ mov dword[nextline],ebx
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.lastline
+ mov dword[nextline],0
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.fin
+ emms
+ popad
+ mov esp,ebp
+ pop ebp
+ ret
+
+SECTION .data
+FuncTable
+ dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+ dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+ dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+ dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+ dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+ dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+ dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+ dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+ dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+ dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+ dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+ dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+ dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+ dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+ dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+ dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+ dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+ dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+ dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+ dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+ dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+ dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+ dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+ dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+ dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+ dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+ dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+ dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+ dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+ dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+ dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+ dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+ dd ..@cross0, ..@cross1, ..@cross2, ..@flag0,
+ dd ..@cross4, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@cross8, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@flag0, ..@flag0, ..@flag0, ..@flag0
+
diff --git a/common/scaler/hq3x.cpp b/common/scaler/hq3x.cpp
index 39156275d8..60f8b7b30a 100644
--- a/common/scaler/hq3x.cpp
+++ b/common/scaler/hq3x.cpp
@@ -22,6 +22,25 @@
#include "common/scaler/intern.h"
+#ifdef USE_NASM
+// Assembly version of HQ3x
+
+extern "C" {
+
+#ifndef _MSC_VER
+#define hq3x_16 _hq3x_16
+#endif
+
+void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+
+}
+
+void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+ hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+}
+
+#else
+
#ifdef HAS_ALTIVEC
#include <sys/sysctl.h>
@@ -122,7 +141,6 @@ void HQ3x_555(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
#undef bitFormat
#endif
-
void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
#ifdef HAS_ALTIVEC
if (isAltiVecAvailable()) {
@@ -133,8 +151,11 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
return;
}
#endif
+
if (gBitFormat == 565)
HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
else
HQ3x_555(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
}
+
+#endif
diff --git a/common/scaler/hq3x_i386.asm b/common/scaler/hq3x_i386.asm
new file mode 100644
index 0000000000..5f31138246
--- /dev/null
+++ b/common/scaler/hq3x_i386.asm
@@ -0,0 +1,2533 @@
+;hq3x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq3x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter resd 1
+cross resd 1
+nextline resd 1
+prevline resd 1
+w1 resd 1
+w2 resd 1
+w3 resd 1
+w4 resd 1
+w5 resd 1
+w6 resd 1
+w7 resd 1
+w8 resd 1
+w9 resd 1
+
+SECTION .data
+
+reg_blank dd 0,0
+const7 dd 0x00070007,0x00000007
+threshold dd 0x00300706,0x00000000
+zerolowbits dd 0xF7DEF7DE
+moduloSrc dd 0
+moduloDst dd 0
+
+SECTION .text
+
+%macro TestDiff 2
+ xor ecx,ecx
+ mov edx,[%1]
+ cmp edx,[%2]
+ je %%fin
+ mov ecx,_RGBtoYUV
+ movd mm1,[ecx+edx*4]
+ movq mm5,mm1
+ mov edx,[%2]
+ movd mm2,[ecx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ jmp %%fin
+%%same:
+ %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ jmp %%fin
+%%same:
+ %5
+ %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ jmp %%fin
+%%same:
+ %6
+ %7
+ %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+ TestDiff %1,%2
+ test ecx,ecx
+ jz %%same
+ %3
+ %4
+ %5
+ %6
+ jmp %%fin
+%%same:
+ %7
+ %8
+ %9
+ %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro Interp2 4
+ mov edx,%3
+ mov ecx,%4
+ cmp edx,ecx
+ je %%fin1
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+%%fin1
+ mov edx,%2
+ cmp edx,ecx
+ je %%fin2
+ and ecx,[zerolowbits]
+ and edx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin2
+ mov %1,dx
+%endmacro
+
+%macro Interp3 2
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ pmullw mm1, [const7]
+ paddw mm1, mm2
+ psrlw mm1, 5
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp4 3
+ mov ecx, _LUT16to32
+ movd mm1, [ecx+eax*4]
+ mov edx, %2
+ movd mm2, [ecx+edx*4]
+ mov edx, %3
+ movd mm3, [ecx+edx*4]
+ punpcklbw mm1, [reg_blank]
+ punpcklbw mm2, [reg_blank]
+ punpcklbw mm3, [reg_blank]
+ psllw mm1, 1
+ paddw mm2, mm3
+ pmullw mm2, [const7]
+ paddw mm1, mm2
+ psrlw mm1, 6
+ packuswb mm1, [reg_blank]
+ movd edx, mm1
+ shl dl, 2
+ shr edx, 1
+ shl dx, 3
+ shr edx, 5
+ mov %1, dx
+%endmacro
+
+%macro Interp5 3
+ mov edx,%2
+ mov ecx,%3
+ cmp edx,ecx
+ je %%fin
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+%%fin
+ mov %1,dx
+%endmacro
+
+%macro PIXEL00_1M 0
+ Interp1 [edi],eax,[w1]
+%endmacro
+
+%macro PIXEL00_1U 0
+ Interp1 [edi],eax,[w2]
+%endmacro
+
+%macro PIXEL00_1L 0
+ Interp1 [edi],eax,[w4]
+%endmacro
+
+%macro PIXEL00_2 0
+ Interp2 [edi],eax,[w4],[w2]
+%endmacro
+
+%macro PIXEL00_4 0
+ Interp4 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_5 0
+ Interp5 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_C 0
+ mov [edi],ax
+%endmacro
+
+%macro PIXEL01_1 0
+ Interp1 [edi+2],eax,[w2]
+%endmacro
+
+%macro PIXEL01_3 0
+ Interp3 [edi+2],[w2]
+%endmacro
+
+%macro PIXEL01_6 0
+ Interp1 [edi+2],[w2],eax
+%endmacro
+
+%macro PIXEL01_C 0
+ mov [edi+2],ax
+%endmacro
+
+%macro PIXEL02_1M 0
+ Interp1 [edi+4],eax,[w3]
+%endmacro
+
+%macro PIXEL02_1U 0
+ Interp1 [edi+4],eax,[w2]
+%endmacro
+
+%macro PIXEL02_1R 0
+ Interp1 [edi+4],eax,[w6]
+%endmacro
+
+%macro PIXEL02_2 0
+ Interp2 [edi+4],eax,[w2],[w6]
+%endmacro
+
+%macro PIXEL02_4 0
+ Interp4 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_5 0
+ Interp5 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_C 0
+ mov [edi+4],ax
+%endmacro
+
+%macro PIXEL10_1 0
+ Interp1 [edi+ebx],eax,[w4]
+%endmacro
+
+%macro PIXEL10_3 0
+ Interp3 [edi+ebx],[w4]
+%endmacro
+
+%macro PIXEL10_6 0
+ Interp1 [edi+ebx],[w4],eax
+%endmacro
+
+%macro PIXEL10_C 0
+ mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL11 0
+ mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL12_1 0
+ Interp1 [edi+ebx+4],eax,[w6]
+%endmacro
+
+%macro PIXEL12_3 0
+ Interp3 [edi+ebx+4],[w6]
+%endmacro
+
+%macro PIXEL12_6 0
+ Interp1 [edi+ebx+4],[w6],eax
+%endmacro
+
+%macro PIXEL12_C 0
+ mov [edi+ebx+4],ax
+%endmacro
+
+%macro PIXEL20_1M 0
+ Interp1 [edi+ebx*2],eax,[w7]
+%endmacro
+
+%macro PIXEL20_1D 0
+ Interp1 [edi+ebx*2],eax,[w8]
+%endmacro
+
+%macro PIXEL20_1L 0
+ Interp1 [edi+ebx*2],eax,[w4]
+%endmacro
+
+%macro PIXEL20_2 0
+ Interp2 [edi+ebx*2],eax,[w8],[w4]
+%endmacro
+
+%macro PIXEL20_4 0
+ Interp4 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_5 0
+ Interp5 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_C 0
+ mov [edi+ebx*2],ax
+%endmacro
+
+%macro PIXEL21_1 0
+ Interp1 [edi+ebx*2+2],eax,[w8]
+%endmacro
+
+%macro PIXEL21_3 0
+ Interp3 [edi+ebx*2+2],[w8]
+%endmacro
+
+%macro PIXEL21_6 0
+ Interp1 [edi+ebx*2+2],[w8],eax
+%endmacro
+
+%macro PIXEL21_C 0
+ mov [edi+ebx*2+2],ax
+%endmacro
+
+%macro PIXEL22_1M 0
+ Interp1 [edi+ebx*2+4],eax,[w9]
+%endmacro
+
+%macro PIXEL22_1D 0
+ Interp1 [edi+ebx*2+4],eax,[w8]
+%endmacro
+
+%macro PIXEL22_1R 0
+ Interp1 [edi+ebx*2+4],eax,[w6]
+%endmacro
+
+%macro PIXEL22_2 0
+ Interp2 [edi+ebx*2+4],eax,[w6],[w8]
+%endmacro
+
+%macro PIXEL22_4 0
+ Interp4 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_5 0
+ Interp5 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_C 0
+ mov [edi+ebx*2+4],ax
+%endmacro
+
+inbuffer equ 8
+outbuffer equ 12
+Xres equ 16
+Yres equ 20
+srcPitch equ 24
+dstPitch equ 28
+
+_hq3x_16:
+ push ebp
+ mov ebp,esp
+ pushad
+
+ mov esi,[ebp+inbuffer]
+ mov edi,[ebp+outbuffer]
+ mov edx,[ebp+Yres]
+ mov [linesleft],edx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+dstPitch]
+ mov dword[moduloDst],ebx
+ sub dword[moduloDst],ecx
+ mov ecx,dword[moduloDst]
+ shl dword[moduloDst],1
+ add dword[moduloDst],ecx
+ mov ecx,[ebp+Xres]
+ shl ecx,1
+ mov ebx,[ebp+srcPitch]
+ mov dword[prevline],0
+ mov dword[nextline],ebx
+ mov dword[moduloSrc],ebx
+ sub dword[moduloSrc],ecx
+.loopy
+ mov ecx,[ebp+Xres]
+ sub ecx,2 ; x={Xres-2, Xres-1} are special cases.
+ mov dword[xcounter],ecx
+ ; x=0 - special case
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx]
+ movq mm6,[esi]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ mov [w2],edx
+ shr eax,16
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ mov [w5],edx
+ shr eax,16
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ mov [w8],edx
+ shr eax,16
+ mov [w9],eax
+ jmp .flags
+.loopx
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-2]
+ movq mm6,[esi-2]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-2]
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w3],edx
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ movzx edx,ax
+ mov [w6],edx
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ movzx edx,ax
+ mov [w9],edx
+.flags
+ mov ebx,_RGBtoYUV
+ mov eax,[w5]
+ xor ecx,ecx
+ movd mm5,[ebx+eax*4]
+ mov dword[cross],0
+
+ mov edx,[w2]
+ cmp eax,edx
+ je .noflag2
+ or dword[cross],1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag2
+ or ecx,2
+.noflag2
+ mov edx,[w4]
+ cmp eax,edx
+ je .noflag4
+ or dword[cross],2
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag4
+ or ecx,8
+.noflag4
+ mov edx,[w6]
+ cmp eax,edx
+ je .noflag6
+ or dword[cross],4
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag6
+ or ecx,16
+.noflag6
+ mov edx,[w8]
+ cmp eax,edx
+ je .noflag8
+ or dword[cross],8
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag8
+ or ecx,64
+.noflag8
+ test ecx,ecx
+ jnz .testflag1
+ mov ecx,[cross]
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable2+ecx*4]
+.testflag1
+ mov edx,[w1]
+ cmp eax,edx
+ je .noflag1
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag1
+ or ecx,1
+.noflag1
+ mov edx,[w3]
+ cmp eax,edx
+ je .noflag3
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag3
+ or ecx,4
+.noflag3
+ mov edx,[w7]
+ cmp eax,edx
+ je .noflag7
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag7
+ or ecx,32
+.noflag7
+ mov edx,[w9]
+ cmp eax,edx
+ je .noflag9
+ movq mm1,mm5
+ movd mm2,[ebx+edx*4]
+ psubusb mm1,mm2
+ psubusb mm2,mm5
+ por mm1,mm2
+ psubusb mm1,[threshold]
+ movd edx,mm1
+ test edx,edx
+ jz .noflag9
+ or ecx,128
+.noflag9
+ mov ebx,[ebp+dstPitch]
+ jmp [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag18
+..@flag50
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag80
+..@flag81
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag72
+..@flag76
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag10
+..@flag138
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag66
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag24
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag22
+..@flag54
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag208
+..@flag209
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag104
+..@flag108
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag11
+..@flag139
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag19
+..@flag51
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag146
+..@flag178
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ jmp .loopx_end
+..@flag84
+..@flag85
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag112
+..@flag113
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag200
+..@flag204
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag73
+..@flag77
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag42
+..@flag170
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag14
+..@flag142
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag67
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag70
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag28
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag152
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag194
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag98
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag56
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag25
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag26
+..@flag31
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag82
+..@flag214
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag88
+..@flag248
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag74
+..@flag107
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag27
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag86
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag216
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag106
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag30
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag210
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag120
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag75
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag29
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag198
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag184
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag99
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag57
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag71
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag156
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag226
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag60
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag195
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag102
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag153
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag58
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag83
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag92
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag202
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag78
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag154
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag114
+ PIXEL00_1M
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag89
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag90
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag55
+..@flag23
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag182
+..@flag150
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_2
+ PIXEL21_1
+ jmp .loopx_end
+..@flag213
+..@flag212
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag241
+..@flag240
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag236
+..@flag232
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag109
+..@flag105
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL11
+ PIXEL12_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag171
+..@flag43
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag143
+..@flag15
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag124
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag203
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag62
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag211
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag118
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag217
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag110
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag155
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag188
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag185
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag61
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag157
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag103
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag227
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag230
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag199
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag220
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag158
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag234
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag242
+ PIXEL00_1M
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag59
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag121
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag87
+ PIXEL00_1L
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag79
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag122
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag94
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag218
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag91
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag229
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag167
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag173
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag181
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag186
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag115
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag93
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag206
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag205
+..@flag201
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag174
+..@flag46
+ DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag179
+..@flag147
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag117
+..@flag116
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+ jmp .loopx_end
+..@flag189
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag231
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag126
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag219
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+ PIXEL02_1M
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag125
+ DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag221
+ DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1M
+ jmp .loopx_end
+..@flag207
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag238
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL11
+ PIXEL12_1
+ jmp .loopx_end
+..@flag190
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+ PIXEL00_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ jmp .loopx_end
+..@flag187
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+ PIXEL02_1M
+ PIXEL11
+ PIXEL12_C
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag243
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+ PIXEL00_1L
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL10_1
+ PIXEL11
+ jmp .loopx_end
+..@flag119
+ DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+ PIXEL10_1
+ PIXEL11
+ PIXEL20_1L
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag237
+..@flag233
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag175
+..@flag47
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_2
+ jmp .loopx_end
+..@flag183
+..@flag151
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_2
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag245
+..@flag244
+ PIXEL00_2
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag250
+ PIXEL00_1M
+ PIXEL01_C
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag123
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag95
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ PIXEL21_C
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag222
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag252
+ PIXEL00_1M
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag249
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag235
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag111
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag63
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL10_C
+ PIXEL11
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag159
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag215
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag246
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag254
+ PIXEL00_1M
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
+ jmp .loopx_end
+..@flag253
+ PIXEL00_1U
+ PIXEL01_1
+ PIXEL02_1U
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag251
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+ PIXEL02_1M
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
+ DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+ jmp .loopx_end
+..@flag239
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ PIXEL02_1R
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_1
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ PIXEL22_1R
+ jmp .loopx_end
+..@flag127
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+ PIXEL11
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+ PIXEL22_1M
+ jmp .loopx_end
+..@flag191
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1D
+ PIXEL21_1
+ PIXEL22_1D
+ jmp .loopx_end
+..@flag223
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+ DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
+ PIXEL11
+ PIXEL20_1M
+ DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+ jmp .loopx_end
+..@flag247
+ PIXEL00_1L
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_1
+ PIXEL11
+ PIXEL12_C
+ PIXEL20_1L
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+..@flag255
+ DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+ PIXEL01_C
+ DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+ PIXEL10_C
+ PIXEL11
+ PIXEL12_C
+ DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+ PIXEL21_C
+ DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+ jmp .loopx_end
+
+..@cross0
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov [edi],eax
+ mov [edi+4],ax
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],ax
+ jmp .loopx_end
+..@cross1
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w2]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],dx
+ mov [edi+2],dx
+ mov [edi+4],dx
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],ax
+ jmp .loopx_end
+..@cross2
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w4]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],dx
+ mov [edi+2],eax
+ mov [edi+ebx],dx
+ mov [edi+ebx+2],eax
+ mov [edi+ebx*2],dx
+ mov [edi+ebx*2+2],eax
+ jmp .loopx_end
+..@cross4
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w6]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],eax
+ mov [edi+4],dx
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],dx
+ mov [edi+ebx*2],eax
+ mov [edi+ebx*2+4],dx
+ jmp .loopx_end
+..@cross8
+ mov edx,eax
+ shl eax,16
+ or eax,edx
+ mov ecx,[w8]
+ and edx,[zerolowbits]
+ and ecx,[zerolowbits]
+ add ecx,edx
+ shr ecx,1
+ add ecx,0x0821
+ and ecx,[zerolowbits]
+ add edx,ecx
+ shr edx,1
+ mov [edi],eax
+ mov [edi+4],ax
+ mov [edi+ebx],eax
+ mov [edi+ebx+4],ax
+ mov [edi+ebx*2],dx
+ mov [edi+ebx*2+2],dx
+ mov [edi+ebx*2+4],dx
+ jmp .loopx_end
+
+.loopx_end
+ add esi,2
+ add edi,6
+ dec dword[xcounter]
+ jle .xres_2
+ jmp .loopx
+.xres_2
+ ; x=Xres-2 - special case
+ jl .xres_1
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-4]
+ movq mm6,[esi-4]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-4]
+ psrlq mm5,16
+ psrlq mm6,16
+ psrlq mm7,16
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ psrlq mm5,32
+ movd eax,mm5
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ psrlq mm6,32
+ movd eax,mm6
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ psrlq mm7,32
+ movd eax,mm7
+ mov [w9],eax
+ jmp .flags
+.xres_1
+ cmp dword[xcounter],-1
+ jl .nexty
+ ; x=Xres-1 - special case
+ mov ebx,[prevline]
+ movq mm5,[esi+ebx-6]
+ movq mm6,[esi-6]
+ mov ebx,[nextline]
+ movq mm7,[esi+ebx-6]
+ psrlq mm5,32
+ psrlq mm6,32
+ psrlq mm7,32
+ movd eax,mm5
+ movzx edx,ax
+ mov [w1],edx
+ shr eax,16
+ mov [w2],eax
+ mov [w3],eax
+ movd eax,mm6
+ movzx edx,ax
+ mov [w4],edx
+ shr eax,16
+ mov [w5],eax
+ mov [w6],eax
+ movd eax,mm7
+ movzx edx,ax
+ mov [w7],edx
+ shr eax,16
+ mov [w8],eax
+ mov [w9],eax
+ jmp .flags
+.nexty
+ add esi,dword[moduloSrc]
+ add edi,dword[moduloDst]
+ dec dword[linesleft]
+ jz .fin
+ mov ebx,[ebp+srcPitch]
+ cmp dword[linesleft],1
+ je .lastline
+ mov dword[nextline],ebx
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.lastline
+ mov dword[nextline],0
+ neg ebx
+ mov dword[prevline],ebx
+ jmp .loopy
+.fin
+ emms
+ popad
+ mov esp,ebp
+ pop ebp
+ ret
+
+SECTION .data
+FuncTable
+ dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+ dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+ dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+ dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+ dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+ dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+ dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+ dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+ dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+ dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+ dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+ dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+ dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+ dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+ dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+ dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+ dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+ dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+ dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+ dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+ dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+ dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+ dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+ dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+ dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+ dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+ dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+ dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+ dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+ dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+ dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+ dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+ dd ..@cross0, ..@cross1, ..@cross2, ..@flag0,
+ dd ..@cross4, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@cross8, ..@flag0, ..@flag0, ..@flag0,
+ dd ..@flag0, ..@flag0, ..@flag0, ..@flag0
+
diff --git a/common/scaler/intern.h b/common/scaler/intern.h
index 2b92dfb647..c626e1ad79 100644
--- a/common/scaler/intern.h
+++ b/common/scaler/intern.h
@@ -153,7 +153,7 @@ static inline bool diffYUV(int yuv1, int yuv2) {
* 16bit RGB to YUV conversion table. This table is setup by InitLUT().
* Used by the hq scaler family.
*/
-extern int RGBtoYUV[65536];
+extern "C" uint RGBtoYUV[65536];
/** Auxiliary macro to simplify creating those template function wrappers. */
#define MAKE_WRAPPER(FUNC) \
diff --git a/configure b/configure
index 498814c4b0..68cbdf36cf 100755
--- a/configure
+++ b/configure
@@ -42,12 +42,16 @@ _build_kyra=no
_build_saga=no
_need_memalign=no
_build_plugins=no
+_nasm=auto
# more defaults
_backend=sdl
_ranlib=ranlib
_install=install
_sdlconfig=sdl-config
_sdlpath="$PATH"
+_nasmpath="$PATH"
+NASMFLAGS=""
+NASM=""
_prefix=/usr/local
_srcdir=`dirname $0`
@@ -177,6 +181,45 @@ rm -f tmp_find_type_with_size$EXEEXT tmp_find_type_with_size.cpp
echo $datatype
}
+CheckNASM()
+{
+ echocheck "nasm"
+ if test "$_nasm" = no ; then
+ echo "disabled"
+ return;
+ fi
+
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":"
+
+ for path_dir in $_nasmpath; do
+ if test -x "$path_dir/nasm" ; then
+ NASM="$path_dir/nasm"
+ echo $NASM
+ break
+ fi
+ done
+
+ IFS="$ac_save_ifs"
+
+ if test x$NASM = x -o x$NASM = x'"$NASM"'; then
+ echo "not found"
+ _nasm=no
+ else
+ case $_host_os in
+ mingw* | cygwin*)
+ NASMFLAGS="-f win32"
+ ;;
+ openbsd*)
+ NASMFLAGS="-f aoutb"
+ ;;
+ *)
+ NASMFLAGS="-f elf"
+ ;;
+ esac
+ _nasm=yes
+ fi
+}
+
#
# Greet user
#
@@ -245,7 +288,10 @@ Optional Libraries:
--with-mpeg2-prefix=DIR Prefix where libmpeg2 is installed (optional)
--disable-mpeg2 disable mpeg2 codec for cutscenes [autodetect]
- --with-sdl-prefix=DIR Prefix where the sdl-config script is installed
+ --with-sdl-prefix=DIR Prefix where the sdl-config script is installed (optional)
+
+ --with-nasm-prefix=DIR Prefix where nasm executable is installed (optional)
+ --disable-nasm disable assembly language optimizations [autodetect]
Some influential environment variables:
LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
@@ -282,6 +328,8 @@ for ac_option in $@; do
--disable-mad) _mad=no ;;
--enable-zlib) _zlib=yes ;;
--disable-zlib) _zlib=no ;;
+ --enable-nasm) _nasm=yes ;;
+ --disable-nasm) _nasm=no ;;
--disable-mpeg2) _mpeg2=no ;;
--enable-plugins) _build_plugins=yes ;;
--with-mpeg2-prefix=*)
@@ -332,6 +380,10 @@ for ac_option in $@; do
arg=`echo $ac_option | cut -d '=' -f 2`
_sdlpath="$arg:$arg/bin"
;;
+ --with-nasm-prefix=*)
+ arg=`echo $ac_option | cut -d '=' -f 2`
+ _nasmpath="$arg:$arg/bin"
+ ;;
--host=*)
_host=`echo $ac_option | cut -d '=' -f 2`
;;
@@ -509,7 +561,6 @@ else
_mak_saga='# DISABLE_SAGA = 1'
fi
-
if test -n "$_host"; then
# Cross-compiling mode - add your target here if needed
case "$_host" in
@@ -848,6 +899,19 @@ echo "$_mpeg2"
rm -f $TMPC $TMPO$EXEEXT
#
+# Check for nasm
+#
+CheckNASM
+
+if test "$_nasm" = yes ; then
+ _def_nasm='#define USE_NASM'
+ _make_def_HAVE_NASM='HAVE_NASM = 1'
+else
+ _def_nasm='#undef USE_NASM'
+ _make_def_HAVE_NASM='# HAVE_NASM = 1'
+fi
+
+#
# figure out installation directories
#
test -z "$_bindir" && _bindir="$_prefix/bin"
@@ -892,10 +956,17 @@ fi
if test "$_build_kyra" = yes ; then
echo " Legend of Kyrandia"
fi
+
echo
echo_n "Backend... "
-echo "$_backend"
+echo_n "$_backend"
+
+if test "$_nasm" = yes ; then
+ echo ", assembly routines"
+else
+ echo
+fi
#
# Backend related stuff
@@ -954,6 +1025,9 @@ $_def_alsa
$_def_zlib
$_def_mpeg2
+/* Should we use i386 assembly routines */
+$_def_nasm
+
#endif /* CONFIG_H */
EOF
@@ -970,6 +1044,8 @@ BACKEND := $_backend
MODULES += $MODULES
MODULE_DIRS += $MODULE_DIRS
EXEEXT := $EXEEXT
+NASM := $NASM
+NASMFLAGS := $NASMFLAGS
PREFIX := $_prefix
BINDIR := $_bindir
@@ -977,6 +1053,7 @@ MANDIR := $_mandir
$_mak_plugins
$_make_def_HAVE_GCC3
+$_make_def_HAVE_NASM
$_mak_scumm
$_mak_simon
$_mak_sky
diff --git a/doc/09.tex b/doc/09.tex
index ee0527bec0..40b29c21ad 100644
--- a/doc/09.tex
+++ b/doc/09.tex
@@ -13,6 +13,11 @@ Visual C++ are supported. If you wish to use MP3-compressed CD tracks or
USE\_MAD. Tools for compressing .SOU files to .SO3 files can be
found in the 'tools' CVS module, or in the 'scummvm-tools' package.
+Some parts of ScummVM, particularly scalers, have highly optimized versions
+written in assembler. If you wish to use this option, you will need to install
+nasm assembler (see \url{http://nasm.sf.net}). Note, that currently we have only x86
+MMX optimized versions, and they will not compile on other processors.
+
On Win9x/NT/XP you can define USE\_WINDBG and attach WinDbg to browse debug
messages (see \url{http://www.sysinternals.com/ntw2k/freeware/debugview.shtml}).