aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--graphics/scaler/Normal2xARM.s138
1 files changed, 138 insertions, 0 deletions
diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s
index 2dfb9e703d..e75d917567 100644
--- a/graphics/scaler/Normal2xARM.s
+++ b/graphics/scaler/Normal2xARM.s
@@ -112,6 +112,8 @@ Normal2xAspectMask:
SUB r14,r14,r3, LSL #3
SUB r14,r14,r3, LSL #1
SUB r14,r14,r3 @ r14 = 4-dstPitch*11
+ SUBS r5, r5, #4
+ BLE last_few_lines
yloop_aspect:
xloop_aspect:
LDRH r6, [r0], r1 @ r6 = A
@@ -170,5 +172,141 @@ xloop_aspect:
SUB r2, r2, r4, LSL #2
SUBS r5, r5, #5
BGT yloop_aspect
+last_few_lines:
+ @ r5 = -4 => done, -3 => 1 left, -2 = 2 left, -1 = 3 left, 0 = 4 left
+ ADDS r5, r5, #3
+ @ r5 = -1 => done, 0 => 1 left, 1 = 2 left, 2 = 3 left, 3 = 4 left
+ LDMLTFD r13!,{r4-r11,PC}
+ BEQ last1lines
+ CMP r5, #2
+ BEQ last3lines
+ BGT last4lines
+last2lines:
+ ADD r11,r11,r1
+ ADD r14,r14,r3
+ ADD r11,r11,r1, LSL #1
+ ADD r14,r14,r3, LSL #3
+xloop_aspect2:
+ LDRH r6, [r0], r1 @ r6 = A
+ LDRH r7, [r0], r11 @ r7 = B
+ SUBS r4, r4, #1
+ ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
+ ORR r7, r7, r7, LSL #16 @ r7 = output 3
+ STR r6, [r2], r3 @ output 0 (A)
+ STR r6, [r2], r3 @ output 1 (A)
+ AND r6, r6, r12 @ r6 = A split
+ ADD r6, r6, r6, LSL #1 @ r6 = A*3
+ STR r7, [r2, r3] @ output 3 (B)
+ AND r7, r7, r12 @ r7 = B split
+ ADD r6, r6, r7 @ r6 = A*3 + B
+ ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
+ AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 2
+ STR r6, [r2], r14 @ output 2 (A*3+B*5)>>3
+ BGT xloop_aspect2
+
+ LDMFD r13!,{r4-r11,PC}
+
+last1lines:
+xloop_aspect1:
+ LDRH r6, [r0], #2 @ r6 = A
+ ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
+ STR r6, [r2, r3] @ output 0 (A)
+ STR r6, [r2], #4 @ output 1 (A)
+ SUBS r4, r4, #1
+ BGT xloop_aspect1
+
+ LDMFD r13!,{r4-r11,PC}
+
+last3lines:
+ ADD r14,r14,r3, LSL #2
+ ADD r11,r11,r1
+xloop_aspect3:
+ LDRH r6, [r0], r1 @ r6 = A
+ LDRH r7, [r0], r1 @ r7 = B
+ LDRH r8, [r0], r1 @ r8 = C
+ LDRH r9, [r0], r11 @ r9 = D
+ ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
+ ORR r7, r7, r7, LSL #16 @ r7 = output 3
+ ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
+ ORR r9, r9, r9, LSL #16 @ r9 = output 8
+ STR r6, [r2], r3 @ output 0 (A)
+ STR r6, [r2], r3 @ output 1 (A)
+ AND r6, r6, r12 @ r6 = A split
+ ADD r6, r6, r6, LSL #1 @ r6 = A*3
+ STR r7, [r2, r3] @ output 3 (B)
+ AND r7, r7, r12 @ r7 = B split
+ ADD r6, r6, r7 @ r6 = A*3 + B
+ ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
+ AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 2
+ STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
+ RSB r7, r7, r7, LSL #3 @ r7 = B*7
+ AND r6, r8, r12 @ r6 = C split
+ ADD r7, r7, r6 @ r7 = B*7+C
+ AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
+ ORR r7, r7, r7, ROR #16 @ r7 = output 4
+ STR r7, [r2], r3 @ output 4 (B*7+C)>>3
+ STR r8, [r2], r3 @ output 5 (C)
+ STR r8, [r2], r3 @ output 6 (C)
+ AND r9, r9, r12 @ r9 = D split
+ RSB r7, r9, r9, LSL #3 @ r7 = D*7
+ ADD r6, r6, r7 @ r6 = C+D*7
+ AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 7
+ STR r6, [r2], r14 @ output 7 (C+D*7)>>3
+ SUBS r4, r4, #1
+ BGT xloop_aspect3
+
+ LDMFD r13!,{r4-r11,PC}
+
+last4lines:
+ ADD r14,r14,r3, LSL #1
+xloop_aspect4:
+ LDRH r6, [r0], r1 @ r6 = A
+ LDRH r7, [r0], r1 @ r7 = B
+ LDRH r8, [r0], r1 @ r8 = C
+ LDRH r9, [r0], r1 @ r9 = D
+ LDRH r10,[r0], r11 @ r10= E
+ ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1
+ ORR r7, r7, r7, LSL #16 @ r7 = output 3
+ ORR r8, r8, r8, LSL #16 @ r8 = output 5,6
+ ORR r9, r9, r9, LSL #16 @ r9 = output 8
+ ORR r10,r10,r10,LSL #16 @ r10= output 10, 11
+ STR r6, [r2], r3 @ output 0 (A)
+ STR r6, [r2], r3 @ output 1 (A)
+ AND r6, r6, r12 @ r6 = A split
+ ADD r6, r6, r6, LSL #1 @ r6 = A*3
+ STR r7, [r2, r3] @ output 3 (B)
+ AND r7, r7, r12 @ r7 = B split
+ ADD r6, r6, r7 @ r6 = A*3 + B
+ ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5
+ AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 2
+ STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3
+ RSB r7, r7, r7, LSL #3 @ r7 = B*7
+ AND r6, r8, r12 @ r6 = C split
+ ADD r7, r7, r6 @ r7 = B*7+C
+ AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3
+ ORR r7, r7, r7, ROR #16 @ r7 = output 4
+ STR r7, [r2], r3 @ output 4 (B*7+C)>>3
+ STR r8, [r2], r3 @ output 5 (C)
+ STR r8, [r2], r3 @ output 6 (C)
+ STR r9, [r2, r3] @ output 8 (D)
+ AND r9, r9, r12 @ r9 = D split
+ RSB r7, r9, r9, LSL #3 @ r7 = D*7
+ ADD r6, r6, r7 @ r6 = C+D*7
+ AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3
+ ORR r6, r6, r6, ROR #16 @ r6 = output 7
+ STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3
+ ADD r9, r9, r9, LSL #2 @ r9 = D*5
+ AND r6, r10,r12 @ r6 = E split
+ ADD r9, r9, r6 @ r9 = D*5+E
+ ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3
+ AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3
+ ORR r9, r9, r9, ROR #16 @ r9 = output 9
+ STR r9, [r2], r14 @ output 9 (D*5+E*3)>>3
+ SUBS r4, r4, #1
+ BGT xloop_aspect4
LDMFD r13!,{r4-r11,PC}