From 816bd9a7ea4f99aaae67c6e82f686b88dd8c728f Mon Sep 17 00:00:00 2001
From: Robin Watts
Date: Mon, 27 Jul 2009 16:29:36 +0000
Subject: Add ARM code version of Normal2x scaler. Add ARM only aspect ratio
 correcting version of Normal2x scaler.

Make WinCE port use Normal2x by default if the screen is large enough.
Make WinCE port use aspect ratio correcting version if panel is hidden.

svn-id: r42843
---
 graphics/module.mk            |   3 +-
 graphics/scaler.cpp           |  56 ++++++++++++++
 graphics/scaler.h             |   3 +
 graphics/scaler/Normal2xARM.s | 171 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 232 insertions(+), 1 deletion(-)
 create mode 100755 graphics/scaler/Normal2xARM.s

(limited to 'graphics')

diff --git a/graphics/module.mk b/graphics/module.mk
index 90659ce977..46ed564e1e 100644
--- a/graphics/module.mk
+++ b/graphics/module.mk
@@ -37,7 +37,8 @@ MODULE_OBJS += \
 
 ifdef USE_ARM_SCALER_ASM
 MODULE_OBJS += \
-	scaler/scale2xARM.o
+	scaler/scale2xARM.o \
+	scaler/Normal2xARM.o
 endif
 
 ifndef DISABLE_HQ_SCALERS
diff --git a/graphics/scaler.cpp b/graphics/scaler.cpp
index 11767848ed..7620e5b107 100644
--- a/graphics/scaler.cpp
+++ b/graphics/scaler.cpp
@@ -187,6 +187,61 @@ void Normal1x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
 }
 
 #ifndef DISABLE_SCALERS
+#ifdef USE_ARM_SCALER_ASM
+extern "C" void Normal2xAspectMask(const uint8  *srcPtr,
+                                   uint32  srcPitch,
+                                   uint8  *dstPtr,
+                                   uint32  dstPitch,
+                                   int     width,
+                                   int     height,
+                                   uint32  mask);
+                                   
+void Normal2xAspect(const uint8  *srcPtr,
+                          uint32  srcPitch,
+                          uint8  *dstPtr,
+                          uint32  dstPitch,
+                          int     width,
+                          int     height)
+{
+	if (gBitFormat == 565)
+	{
+		Normal2xAspectMask(srcPtr,
+		                   srcPitch,
+		                   dstPtr,
+		                   dstPitch,
+		                   width,
+		                   height,
+		                   0x07e0F81F);
+	}
+	else
+	{
+		Normal2xAspectMask(srcPtr,
+		                   srcPitch,
+		                   dstPtr,
+		                   dstPitch,
+		                   width,
+		                   height,
+		                   0x03e07C1F);
+	}
+}
+
+extern "C" void Normal2xARM(const uint8  *srcPtr,
+                                  uint32  srcPitch,
+                                  uint8  *dstPtr,
+                                  uint32  dstPitch,
+              		    int     width,
+              		    int     height);
+
+void Normal2x(const uint8  *srcPtr,
+                    uint32  srcPitch,
+                    uint8  *dstPtr,
+                    uint32  dstPitch,
+		    int     width,
+		    int     height)
+{
+	Normal2xARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+}
+#else
 /**
  * Trivial nearest-neighbour 2x scaler.
  */
@@ -210,6 +265,7 @@ void Normal2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPit
 		dstPtr += dstPitch << 1;
 	}
 }
+#endif
 
 /**
  * Trivial nearest-neighbour 3x scaler.
diff --git a/graphics/scaler.h b/graphics/scaler.h
index 4cea9ee2fb..bdae161bd1 100644
--- a/graphics/scaler.h
+++ b/graphics/scaler.h
@@ -45,6 +45,9 @@ DECLARE_SCALER(AdvMame2x);
 DECLARE_SCALER(AdvMame3x);
 DECLARE_SCALER(Normal1x);
 DECLARE_SCALER(Normal2x);
+#ifdef USE_ARM_SCALER_ASM
+DECLARE_SCALER(Normal2xAspect);
+#endif
 DECLARE_SCALER(Normal3x);
 DECLARE_SCALER(Normal1o5x);
 DECLARE_SCALER(TV2x);
diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s
new file mode 100755
index 0000000000..5de50d9c17
--- /dev/null
+++ b/graphics/scaler/Normal2xARM.s
@@ -0,0 +1,171 @@
+@ ScummVM Scumm Interpreter
+@ Copyright (C) 2009 The ScummVM project
+@
+@ This program is free software@ you can redistribute it and/or
+@ modify it under the terms of the GNU General Public License
+@ as published by the Free Software Foundation@ either version 2
+@ of the License, or (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program@ if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+@
+@ $URL$
+@ $Id$
+@
+@ @author Robin Watts (robin@wss.co.uk)
+
+        .text
+
+        .global Normal2xARM
+        .global Normal2xAspectMask
+
+
+	@ Assumes dst is aligned (so did the C)
+	@ Assumes 16bit (so did the C)
+Normal2xARM:
+        @ r0 = src
+        @ r1 = srcPitch
+        @ r2 = dst
+        @ r3 = dstPitch
+        @ r4 = w
+        @ r5 = h
+        STMFD   r13!,{r4-r11,r14}
+        LDR     r4, [r13,#4*9]		@ r4 = w
+        LDR     r5, [r13,#4*10]		@ r5 = h
+	ADD	r12,r2, r3
+	SUB	r1, r1, r4, LSL #1
+	SUB	r6, r3, r4, LSL #2
+	ADD	r3, r3, r6
+yloop:
+	SUBS	r14,r4, #4
+	BLT	thin
+xloop:
+	LDRH	r6, [r0], #2
+	LDRH	r7, [r0], #2
+	LDRH	r8, [r0], #2
+	LDRH	r9, [r0], #2
+	ORR	r6, r6, r6, LSL #16
+	ORR	r7, r7, r7, LSL #16
+	ORR	r8, r8, r8, LSL #16
+	ORR	r9, r9, r9, LSL #16
+	STMIA	r2!, {r6-r9}
+	STMIA	r12!,{r6-r9}
+	SUBS	r14,r14,#4
+	BGE	xloop
+	ADDS	r14,r14,#4
+	BNE	thin
+	ADD	r0, r0, r1
+	ADD	r2, r2, r3
+	ADD	r12,r12,r3
+	SUBS	r5, r5, #1
+	BGT	yloop
+
+        LDMFD   r13!,{r4-r11,PC}
+thin:
+	LDRH	r6, [r0], #2
+	ORR	r6, r6, r6, LSL #16
+	STR	r6, [r2], #4
+	STR	r6, [r12],#4
+	SUBS	r14,r14,#1
+	BGT	thin
+	ADD	r0, r0, r1
+	ADD	r2, r2, r3
+	ADD	r12,r12,r3
+	SUBS	r5, r5, #1
+	BGT	yloop
+
+        LDMFD   r13!,{r4-r11,PC}
+
+
+	@ Horrid filter calculations
+	@ AAAAAAAAAAAABBBBBBBBBBBBCCCCCCCCCCCCDDDDDDDDDDDDEEEEEEEEEEEE
+	@ <-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+->
+	@ Ideal:  A,A,(A*2+B*3)/5,B,(B*4+C)/5,C,C,(C+D*4)/5,D,(D*3+E*2)/5,E,E
+	@ Actual: A,A,(A*3+B*5)/8,B,(B*7+C)/8,C,C,(C+D*7)/8,D,(D*5+E*3)/8,E,E
+
+	@ Assumes dst is aligned (so did the C)
+	@ Assumes 16bit (so did the C)
+Normal2xAspectMask:
+        @ r0 = src
+        @ r1 = srcPitch
+        @ r2 = dst
+        @ r3 = dstPitch
+        @ r4 = w
+        @ r5 = h
+	@ r12= mask
+        STMFD   r13!,{r4-r11,r14}
+        LDR     r4, [r13,#4*9]		@ r4 = w
+        LDR     r5, [r13,#4*10]		@ r5 = h
+	LDR	r12,[r13,#4*11]		@ r12= mask
+	MOV	r11,#2
+	SUB	r11,r11,r1, LSL #2	@ r11= 2-srcPitch*4
+	MOV	r14,#4
+	SUB	r14,r14,r3, LSL #3
+	SUB	r14,r14,r3, LSL #1
+	SUB	r14,r14,r3		@ r14 = 4-dstPitch*11
+yloop_aspect:
+xloop_aspect:
+	LDRH	r6, [r0], r1		@ r6 = A
+	LDRH	r7, [r0], r1            @ r7 = B
+	LDRH	r8, [r0], r1		@ r8 = C
+	LDRH	r9, [r0], r1		@ r9 = D
+	LDRH	r10,[r0], r11		@ r10= E
+	ORR	r6, r6, r6, LSL #16	@ r6 = output 0, 1
+	ORR	r7, r7, r7, LSL #16	@ r7 = output 3
+	ORR	r8, r8, r8, LSL #16	@ r8 = output 5,6
+	ORR	r9, r9, r9, LSL #16	@ r9 = output 8
+	ORR	r10,r10,r10,LSL #16	@ r10= output 10, 11
+	STR	r6, [r2], r3		@ output 0 (A)
+	STR	r6, [r2], r3		@ output 1 (A)
+	AND	r6, r6, r12		@ r6 = A split
+	ADD	r6, r6, r6, LSL #1	@ r6 = A*3
+	STR	r7, [r2, r3]		@ output 3 (B)
+	AND	r7, r7, r12		@ r7 = B split
+	ADD	r6, r6, r7		@ r6 = A*3 + B
+	ADD	r6, r6, r7, LSL #2	@ r6 = A*3 + B*5
+	AND	r6, r12,r6, LSR #3	@ r6 = (A*3 + B*5)>>3
+	ORR	r6, r6, r6, ROR #16	@ r6 = output 2
+	STR	r6, [r2], r3, LSL #1	@ output 2 (A*3+B*5)>>3
+	RSB	r7, r7, r7, LSL #3	@ r7 = B*7
+	AND	r6, r8, r12		@ r6 = C split
+	ADD	r7, r7, r6		@ r7 = B*7+C
+	AND	r7, r12,r7, LSR #3	@ r7 = (B*7 + C)>>3
+	ORR	r7, r7, r7, ROR #16	@ r7 = output 4
+	STR	r7, [r2], r3		@ output 4 (B*7+C)>>3
+	STR	r8, [r2], r3		@ output 5 (C)
+	STR	r8, [r2], r3		@ output 6 (C)
+	STR	r9, [r2, r3]		@ output 8 (D)
+	AND	r9, r9, r12		@ r9 = D split
+	RSB	r7, r9, r9, LSL #3	@ r7 = D*7
+	ADD	r6, r6, r7		@ r6 = C+D*7
+	AND	r6, r12,r6, LSR #3	@ r6 = (C + D*7)>>3
+	ORR	r6, r6, r6, ROR #16	@ r6 = output 7
+	STR	r6, [r2], r3, LSL #1	@ output 7 (C+D*7)>>3
+	ADD	r9, r9, r9, LSL #2	@ r9 = D*5
+	AND	r6, r10,r12		@ r6 = E split
+	ADD	r9, r9, r6		@ r9 = D*5+E
+	ADD	r9, r9, r6, LSL #1	@ r9 = D*5+E*3
+	AND	r9, r12,r9, LSR #3	@ r9 = (D*5 + E*3)>>3
+	ORR	r9, r9, r9, ROR #16	@ r9 = output 9
+	STR	r9, [r2], r3		@ output 9 (D*5+E*3)>>3
+	STR	r10,[r2], r3		@ output 10 (E)
+	STR	r10,[r2], r14		@ output 11 (E)
+	SUBS	r4, r4, #1
+	BGT	xloop_aspect
+        LDR     r4, [r13,#4*9]		@ r4 = w
+	ADD	r0, r0, r1, LSL #2
+	ADD	r0, r0, r1
+	SUB	r0, r0, r4, LSL #1
+	ADD	r2, r2, r3, LSL #3
+	ADD	r2, r2, r3, LSL #2
+	SUB	r2, r2, r4, LSL #2
+	SUBS	r5, r5, #5
+	BGT	yloop_aspect
+
+        LDMFD   r13!,{r4-r11,PC}
-- 
cgit v1.2.3