aboutsummaryrefslogtreecommitdiff
path: root/graphics/scaler
diff options
context:
space:
mode:
Diffstat (limited to 'graphics/scaler')
-rwxr-xr-xgraphics/scaler/Normal2xARM.s171
-rw-r--r--graphics/scaler/scale2x.cpp38
-rw-r--r--graphics/scaler/scale2x.h10
-rwxr-xr-xgraphics/scaler/scale2xARM.s182
-rw-r--r--graphics/scaler/scalebit.cpp12
5 files changed, 389 insertions, 24 deletions
diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s
new file mode 100755
index 0000000000..5de50d9c17
--- /dev/null
+++ b/graphics/scaler/Normal2xARM.s
@@ -0,0 +1,171 @@
+@ ScummVM Scumm Interpreter
+@ Copyright (C) 2009 The ScummVM project
+@
+@ This program is free software@ you can redistribute it and/or
+@ modify it under the terms of the GNU General Public License
+@ as published by the Free Software Foundation@ either version 2
+@ of the License, or (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program@ if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+@
+@ $URL$
+@ $Id$
+@
+@ @author Robin Watts (robin@wss.co.uk)
+
+ .text
+
+ .global Normal2xARM
+ .global Normal2xAspectMask
+
+
+ @ Assumes dst is aligned (so did the C)
+ @ Assumes 16bit (so did the C)
+Normal2xARM:
+ @ r0 = src
+ @ r1 = srcPitch
+ @ r2 = dst
+ @ r3 = dstPitch
+ @ r4 = w
+ @ r5 = h
+ STMFD r13!,{r4-r11,r14}
+ LDR r4, [r13,#4*9] @ r4 = w
+ LDR r5, [r13,#4*10] @ r5 = h
+ ADD r12,r2, r3
+ SUB r1, r1, r4, LSL #1
+ SUB r6, r3, r4, LSL #2
+ ADD r3, r3, r6
+yloop:
+ SUBS r14,r4, #4
+ BLT thin
+xloop:
+ LDRH r6, [r0], #2
+ LDRH r7, [r0], #2
+ LDRH r8, [r0], #2
+ LDRH r9, [r0], #2
+ ORR r6, r6, r6, LSL #16
+ ORR r7, r7, r7, LSL #16
+ ORR r8, r8, r8, LSL #16
+ ORR r9, r9, r9, LSL #16
+ STMIA r2!, {r6-r9}
+ STMIA r12!,{r6-r9}
+ SUBS r14,r14,#4
+ BGE xloop
+ ADDS r14,r14,#4
+ BNE thin
+ ADD r0, r0, r1
+ ADD r2, r2, r3
+ ADD r12,r12,r3
+ SUBS r5, r5, #1
+ BGT yloop
+
+ LDMFD r13!,{r4-r11,PC}
+thin:
+ LDRH r6, [r0], #2
+ ORR r6, r6, r6, LSL #16
+ STR r6, [r2], #4
+ STR r6, [r12],#4
+ SUBS r14,r14,#1
+ BGT thin
+ ADD r0, r0, r1
+ ADD r2, r2, r3
+ ADD r12,r12,r3
+ SUBS r5, r5, #1
+ BGT yloop
+
+ LDMFD r13!,{r4-r11,PC}
+
+
+ @ Horrid filter calculations
+ @ AAAAAAAAAAAABBBBBBBBBBBBCCCCCCCCCCCCDDDDDDDDDDDDEEEEEEEEEEEE
+ @ <-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+-><-+->
+ @ Ideal: A,A,(A*2+B*3)/5,B,(B*4+C)/5,C,C,(C+D*4)/5,D,(D*3+E*2)/5,E,E
+ @ Actual: A,A,(A*3+B*5)/8,B,(B*7+C)/8,C,C,(C+D*7)/8,D,(D*5+E*3)/8,E,E
+
+ @ Assumes dst is aligned (so did the C)
+ @ Assumes 16bit (so did the C)
+Normal2xAspectMask:
+ @ r0 = src
+ @ r1 = srcPitch
+ @ r2 = dst
+ @ r3 = dstPitch
+ @ r4 = w
+ @ r5 = h
+ @ r12= mask
+ STMFD r13!,{r4-r11,r14}
+ LDR r4, [r13,#4*9] @ r4 = w
+ LDR r5, [r13,#4*10] @ r5 = h
+ LDR r12,[r13,#4*11] @ r12= mask
+ MOV r11,#2
+ SUB r11,r11,r1, LSL #2 @ r11= 2-srcPitch*4
+ MOV r14,#4
+ SUB r14,r14,r3, LSL #3
+ SUB r14,r14,r3, LSL #1
+ SUB r14,r14,r3 @ r14 = 4-dstPitch*11
+yloop_aspect:
+xloop_aspect:
+ LDRH r6, [r0], r1 @ r6 = A
+ LDRH r7, [r0], r1 @ r7 = B
+ LDRH r8, [r0], r1 @ r8 = C
+ LDRH r9, [r0], r1 @ r9 = D
+ LDRH r10,[r0], r11 @ r10= E
+ ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
+ ORR r7, r7, r7, LSL #16 @ r7 = output 3
+ ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
+ ORR r9, r9, r9, LSL #16 @ r9 = output 8
+ ORR r10,r10,r10,LSL #16 @ r10= output 10, 11
+ STR r6, [r2], r3 @ output 0 (A)
+ STR r6, [r2], r3 @ output 1 (A)
+ AND r6, r6, r12 @ r6 = A split
+ ADD r6, r6, r6, LSL #1 @ r6 = A*3
+ STR r7, [r2, r3] @ output 3 (B)
+ AND r7, r7, r12 @ r7 = B split
+ ADD r6, r6, r7 @ r6 = A*3 + B
+ ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
+ AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 2
+ STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
+ RSB r7, r7, r7, LSL #3 @ r7 = B*7
+ AND r6, r8, r12 @ r6 = C split
+ ADD r7, r7, r6 @ r7 = B*7+C
+ AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
+ ORR r7, r7, r7, ROR #16 @ r7 = output 4
+ STR r7, [r2], r3 @ output 4 (B*7+C)>>3
+ STR r8, [r2], r3 @ output 5 (C)
+ STR r8, [r2], r3 @ output 6 (C)
+ STR r9, [r2, r3] @ output 8 (D)
+ AND r9, r9, r12 @ r9 = D split
+ RSB r7, r9, r9, LSL #3 @ r7 = D*7
+ ADD r6, r6, r7 @ r6 = C+D*7
+ AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 7
+ STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3
+ ADD r9, r9, r9, LSL #2 @ r9 = D*5
+ AND r6, r10,r12 @ r6 = E split
+ ADD r9, r9, r6 @ r9 = D*5+E
+ ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3
+ AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3
+ ORR r9, r9, r9, ROR #16 @ r9 = output 9
+ STR r9, [r2], r3 @ output 9 (D*5+E*3)>>3
+ STR r10,[r2], r3 @ output 10 (E)
+ STR r10,[r2], r14 @ output 11 (E)
+ SUBS r4, r4, #1
+ BGT xloop_aspect
+ LDR r4, [r13,#4*9] @ r4 = w
+ ADD r0, r0, r1, LSL #2
+ ADD r0, r0, r1
+ SUB r0, r0, r4, LSL #1
+ ADD r2, r2, r3, LSL #3
+ ADD r2, r2, r3, LSL #2
+ SUB r2, r2, r4, LSL #2
+ SUBS r5, r5, #5
+ BGT yloop_aspect
+
+ LDMFD r13!,{r4-r11,PC}
diff --git a/graphics/scaler/scale2x.cpp b/graphics/scaler/scale2x.cpp
index 226d379c91..877bbb9509 100644
--- a/graphics/scaler/scale2x.cpp
+++ b/graphics/scaler/scale2x.cpp
@@ -158,7 +158,7 @@ void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_ui
/***************************************************************************/
/* Scale2x MMX implementation */
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
/*
* Apply the Scale2x effect at a single row.
@@ -205,7 +205,7 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*
__asm__ __volatile__(
/* central runs */
- "shrl $3, %4\n"
+ "shr $3, %4\n"
"jz 1f\n"
"0:\n"
@@ -261,12 +261,12 @@ static inline void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8*
"movq %%mm3, 8(%3)\n"
/* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
+ "add $8, %0\n"
+ "add $8, %1\n"
+ "add $8, %2\n"
+ "add $16, %3\n"
- "decl %4\n"
+ "dec %4\n"
"jnz 0b\n"
"1:\n"
@@ -283,7 +283,7 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint
__asm__ __volatile__(
/* central runs */
- "shrl $2, %4\n"
+ "shr $2, %4\n"
"jz 1f\n"
"0:\n"
@@ -339,12 +339,12 @@ static inline void scale2x_16_mmx_single(scale2x_uint16* dst, const scale2x_uint
"movq %%mm3, 8(%3)\n"
/* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
+ "add $8, %0\n"
+ "add $8, %1\n"
+ "add $8, %2\n"
+ "add $16, %3\n"
- "decl %4\n"
+ "dec %4\n"
"jnz 0b\n"
"1:\n"
@@ -361,7 +361,7 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint
__asm__ __volatile__(
/* central runs */
- "shrl $1, %4\n"
+ "shr $1, %4\n"
"jz 1f\n"
"0:\n"
@@ -417,12 +417,12 @@ static inline void scale2x_32_mmx_single(scale2x_uint32* dst, const scale2x_uint
"movq %%mm3, 8(%3)\n"
/* next */
- "addl $8, %0\n"
- "addl $8, %1\n"
- "addl $8, %2\n"
- "addl $16, %3\n"
+ "add $8, %0\n"
+ "add $8, %1\n"
+ "add $8, %2\n"
+ "add $16, %3\n"
- "decl %4\n"
+ "dec %4\n"
"jnz 0b\n"
"1:\n"
diff --git a/graphics/scaler/scale2x.h b/graphics/scaler/scale2x.h
index 2101790905..cefa14f22a 100644
--- a/graphics/scaler/scale2x.h
+++ b/graphics/scaler/scale2x.h
@@ -33,7 +33,7 @@ void scale2x_8_def(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8
void scale2x_16_def(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
void scale2x_32_def(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
void scale2x_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
void scale2x_16_mmx(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
@@ -52,5 +52,13 @@ static inline void scale2x_mmx_emms(void)
#endif
+#if defined(USE_ARM_SCALER_ASM)
+
+extern "C" void scale2x_8_arm(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count);
+extern "C" void scale2x_16_arm(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint16* src0, const scale2x_uint16* src1, const scale2x_uint16* src2, unsigned count);
+extern "C" void scale2x_32_arm(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint32* src0, const scale2x_uint32* src1, const scale2x_uint32* src2, unsigned count);
+
+#endif
+
#endif
diff --git a/graphics/scaler/scale2xARM.s b/graphics/scaler/scale2xARM.s
new file mode 100755
index 0000000000..3ca237e02d
--- /dev/null
+++ b/graphics/scaler/scale2xARM.s
@@ -0,0 +1,182 @@
+@ ScummVM Scumm Interpreter
+@ Copyright (C) 2009 The ScummVM project
+@
+@ This program is free software@ you can redistribute it and/or
+@ modify it under the terms of the GNU General Public License
+@ as published by the Free Software Foundation@ either version 2
+@ of the License, or (at your option) any later version.
+@
+@ This program is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY@ without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with this program@ if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+@
+@ $URL$
+@ $Id$
+@
+@ @author Robin Watts (robin@wss.co.uk)
+
+ .text
+
+ .global scale2x_8_arm
+ .global scale2x_16_arm
+ .global scale2x_32_arm
+
+ @ r0 = dst0
+ @ r1 = dst1
+ @ r2 = src
+ @ r3 = src_prev
+ @ r4 = src_next
+ @ r5 = len
+
+
+ @ We hold: r10 B
+ @ r8 r14 r7 D E F
+ @ r12 H
+scale2x_8_arm:
+ STMFD r13!,{r4-r5,r7-r8,r10-r11,r14}
+ LDR r4, [r13,#4*7]
+ LDR r5, [r13,#4*8]
+
+ LDRB r14,[r2], #1
+ CMP r3, #0 @ Set NE
+ @ Stall on Xscale
+ MOV r8, r14
+ B loop8
+same8:
+ STRB r14,[r0], #1
+ STRB r14,[r0], #1
+ STRB r14,[r1], #1
+ STRB r14,[r1], #1
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BLT end8
+loop8:
+ LDRNEB r7, [r3], #1 @ As long as we aren't on the last pixel
+ LDRB r10,[r2], #1
+ LDRB r12,[r4], #1
+ @ Stall on Xscale
+ CMP r7, r8
+ CMPNE r10,r12
+ BEQ same8
+ CMP r8, r10
+ STREQB r8, [r0], #1
+ STRNEB r14,[r0], #1
+ CMP r10,r7
+ STREQB r7, [r0], #1
+ STRNEB r14,[r0], #1
+ CMP r8, r12
+ STREQB r8, [r1], #1
+ STRNEB r14,[r1], #1
+ CMP r12,r7
+ STREQB r7, [r1], #1
+ STRNEB r14,[r1], #1
+
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BGE loop8
+end8:
+
+ LDMFD r13!,{r4-r5,r7-r8,r10-r11,PC}
+
+scale2x_16_arm:
+ STMFD r13!,{r4-r5,r7-r8,r10-r11,r14}
+ LDR r4, [r13,#4*7]
+ LDR r5, [r13,#4*8]
+
+ LDRH r14,[r3], #2
+ CMP r3, #0 @ Set NE
+ @ Stall on Xscale
+ MOV r8, r14
+ B loop16
+same16:
+ STRH r14,[r0], #2
+ STRH r14,[r0], #2
+ STRH r14,[r1], #2
+ STRH r14,[r1], #2
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BLT end16
+loop16:
+ LDRNEH r7, [r3], #2 @ As long as we aren't on the last pixel
+ LDRH r10,[r2], #2
+ LDRH r12,[r4], #2
+ @ Stall on Xscale
+ CMP r7, r8
+ CMPNE r10,r12
+ BEQ same16
+ CMP r8, r10
+ STREQH r8, [r0], #2
+ STRNEH r14,[r0], #2
+ CMP r10,r7
+ STREQH r7, [r0], #2
+ STRNEH r14,[r0], #2
+ CMP r8, r12
+ STREQH r8, [r1], #2
+ STRNEH r14,[r1], #2
+ CMP r12,r7
+ STREQH r7, [r1], #2
+ STRNEH r14,[r1], #2
+
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BGE loop16
+end16:
+
+ LDMFD r13!,{r4-r5,r7-r8,r10-r11,PC}
+
+scale2x_32_arm:
+ STMFD r13!,{r4-r5,r7-r8,r10-r11,r14}
+ LDR r4, [r13,#4*7]
+ LDR r5, [r13,#4*8]
+
+ LDR r14,[r3], #4
+ CMP r3, #0 @ Set NE
+ @ Stall on Xscale
+ MOV r8, r14
+ B loop32
+same32:
+ STR r14,[r0], #4
+ STR r14,[r0], #4
+ STR r14,[r1], #4
+ STR r14,[r1], #4
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BLT end32
+loop32:
+ LDRNE r7, [r3], #4 @ As long as we aren't on the last pixel
+ LDR r10,[r2], #4
+ LDR r12,[r4], #4
+ @ Stall on Xscale
+ CMP r7, r8
+ CMPNE r10,r12
+ BEQ same32
+ CMP r8, r10
+ STREQ r8, [r0], #4
+ STRNE r14,[r0], #4
+ CMP r10,r7
+ STREQ r7, [r0], #4
+ STRNE r14,[r0], #4
+ CMP r8, r12
+ STREQ r8, [r1], #4
+ STRNE r14,[r1], #4
+ CMP r12,r7
+ STREQ r7, [r1], #4
+ STRNE r14,[r1], #4
+
+ SUBS r5, r5, #1
+ MOV r8, r14
+ MOV r14,r7
+ BGE loop32
+end32:
+
+ LDMFD r13!,{r4-r5,r7-r8,r10-r11,PC}
diff --git a/graphics/scaler/scalebit.cpp b/graphics/scaler/scalebit.cpp
index a6fd8df58f..6a3b47b0f5 100644
--- a/graphics/scaler/scalebit.cpp
+++ b/graphics/scaler/scalebit.cpp
@@ -55,10 +55,14 @@
static inline void stage_scale2x(void* dst0, void* dst1, const void* src0, const void* src1, const void* src2, unsigned pixel, unsigned pixel_per_row)
{
switch (pixel) {
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
case 1 : scale2x_8_mmx(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
case 2 : scale2x_16_mmx(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
case 4 : scale2x_32_mmx(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
+#elif defined(USE_ARM_SCALER_ASM)
+ case 1 : scale2x_8_arm(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
+ case 2 : scale2x_16_arm(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
+ case 4 : scale2x_32_arm(DST(32,0), DST(32,1), SRC(32,0), SRC(32,1), SRC(32,2), pixel_per_row); break;
#else
case 1 : scale2x_8_def(DST(8,0), DST(8,1), SRC(8,0), SRC(8,1), SRC(8,2), pixel_per_row); break;
case 2 : scale2x_16_def(DST(16,0), DST(16,1), SRC(16,0), SRC(16,1), SRC(16,2), pixel_per_row); break;
@@ -125,7 +129,7 @@ static void scale2x(void* void_dst, unsigned dst_slice, const void* void_src, un
--count;
}
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
scale2x_mmx_emms();
#endif
}
@@ -225,7 +229,7 @@ static void scale4x_buf(void* void_dst, unsigned dst_slice, void* void_mid, unsi
--count;
}
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
scale2x_mmx_emms();
#endif
}
@@ -303,7 +307,7 @@ int scale_precondition(unsigned scale, unsigned pixel, unsigned width, unsigned
break;
}
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
switch (scale) {
case 2 :
case 4 :