diff options
-rw-r--r-- | graphics/scaler/Normal2xARM.s | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s index 2dfb9e703d..e75d917567 100644 --- a/graphics/scaler/Normal2xARM.s +++ b/graphics/scaler/Normal2xARM.s @@ -112,6 +112,8 @@ Normal2xAspectMask: SUB r14,r14,r3, LSL #3 SUB r14,r14,r3, LSL #1 SUB r14,r14,r3 @ r14 = 4-dstPitch*11 + SUBS r5, r5, #4 + BLE last_few_lines yloop_aspect: xloop_aspect: LDRH r6, [r0], r1 @ r6 = A @@ -170,5 +172,141 @@ xloop_aspect: SUB r2, r2, r4, LSL #2 SUBS r5, r5, #5 BGT yloop_aspect +last_few_lines: + @ r5 = -4 => done, -3 => 1 left, -2 = 2 left, -1 = 3 left, 0 = 4 left + ADDS r5, r5, #3 + @ r5 = -1 => done, 0 => 1 left, 1 = 2 left, 2 = 3 left, 3 = 4 left + LDMLTFD r13!,{r4-r11,PC} + BEQ last1lines + CMP r5, #2 + BEQ last3lines + BGT last4lines +last2lines: + ADD r11,r11,r1 + ADD r14,r14,r3 + ADD r11,r11,r1, LSL #1 + ADD r14,r14,r3, LSL #3 +xloop_aspect2: + LDRH r6, [r0], r1 @ r6 = A + LDRH r7, [r0], r11 @ r7 = B + SUBS r4, r4, #1 + ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1 + ORR r7, r7, r7, LSL #16 @ r7 = output 3 + STR r6, [r2], r3 @ output 0 (A) + STR r6, [r2], r3 @ output 1 (A) + AND r6, r6, r12 @ r6 = A split + ADD r6, r6, r6, LSL #1 @ r6 = A*3 + STR r7, [r2, r3] @ output 3 (B) + AND r7, r7, r12 @ r7 = B split + ADD r6, r6, r7 @ r6 = A*3 + B + ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5 + AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 2 + STR r6, [r2], r14 @ output 2 (A*3+B*5)>>3 + BGT xloop_aspect2 + + LDMFD r13!,{r4-r11,PC} + +last1lines: +xloop_aspect1: + LDRH r6, [r0], #2 @ r6 = A + ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1 + STR r6, [r2, r3] @ output 0 (A) + STR r6, [r2], #4 @ output 1 (A) + SUBS r4, r4, #1 + BGT xloop_aspect1 + + LDMFD r13!,{r4-r11,PC} + +last3lines: + ADD r14,r14,r3, LSL #2 + ADD r11,r11,r1 +xloop_aspect3: + LDRH r6, [r0], r1 @ r6 = A + LDRH r7, [r0], r1 @ r7 = B + LDRH r8, [r0], r1 @ r8 = C + LDRH r9, [r0], r11 @ r9 = D + ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1 + ORR r7, r7, r7, LSL #16 @ r7 = output 3 + ORR r8, r8, r8, LSL #16 @ r8 = output 5,6 + ORR r9, r9, r9, LSL #16 @ r9 = output 8 + STR r6, [r2], r3 @ output 0 (A) + STR r6, [r2], r3 @ output 1 (A) + AND r6, r6, r12 @ r6 = A split + ADD r6, r6, r6, LSL #1 @ r6 = A*3 + STR r7, [r2, r3] @ output 3 (B) + AND r7, r7, r12 @ r7 = B split + ADD r6, r6, r7 @ r6 = A*3 + B + ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5 + AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 2 + STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3 + RSB r7, r7, r7, LSL #3 @ r7 = B*7 + AND r6, r8, r12 @ r6 = C split + ADD r7, r7, r6 @ r7 = B*7+C + AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3 + ORR r7, r7, r7, ROR #16 @ r7 = output 4 + STR r7, [r2], r3 @ output 4 (B*7+C)>>3 + STR r8, [r2], r3 @ output 5 (C) + STR r8, [r2], r3 @ output 6 (C) + AND r9, r9, r12 @ r9 = D split + RSB r7, r9, r9, LSL #3 @ r7 = D*7 + ADD r6, r6, r7 @ r6 = C+D*7 + AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 7 + STR r6, [r2], r14 @ output 7 (C+D*7)>>3 + SUBS r4, r4, #1 + BGT xloop_aspect3 + + LDMFD r13!,{r4-r11,PC} + +last4lines: + ADD r14,r14,r3, LSL #1 +xloop_aspect4: + LDRH r6, [r0], r1 @ r6 = A + LDRH r7, [r0], r1 @ r7 = B + LDRH r8, [r0], r1 @ r8 = C + LDRH r9, [r0], r1 @ r9 = D + LDRH r10,[r0], r11 @ r10= E + ORR r6, r6, r6, LSL #16 @ r6 = output 0, 1 + ORR r7, r7, r7, LSL #16 @ r7 = output 3 + ORR r8, r8, r8, LSL #16 @ r8 = output 5,6 + ORR r9, r9, r9, LSL #16 @ r9 = output 8 + ORR r10,r10,r10,LSL #16 @ r10= output 10, 11 + STR r6, [r2], r3 @ output 0 (A) + STR r6, [r2], r3 @ output 1 (A) + AND r6, r6, r12 @ r6 = A split + ADD r6, r6, r6, LSL #1 @ r6 = A*3 + STR r7, [r2, r3] @ output 3 (B) + AND r7, r7, r12 @ r7 = B split + ADD r6, r6, r7 @ r6 = A*3 + B + ADD r6, r6, r7, LSL #2 @ r6 = A*3 + B*5 + AND r6, r12,r6, LSR #3 @ r6 = (A*3 + B*5)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 2 + STR r6, [r2], r3, LSL #1 @ output 2 (A*3+B*5)>>3 + RSB r7, r7, r7, LSL #3 @ r7 = B*7 + AND r6, r8, r12 @ r6 = C split + ADD r7, r7, r6 @ r7 = B*7+C + AND r7, r12,r7, LSR #3 @ r7 = (B*7 + C)>>3 + ORR r7, r7, r7, ROR #16 @ r7 = output 4 + STR r7, [r2], r3 @ output 4 (B*7+C)>>3 + STR r8, [r2], r3 @ output 5 (C) + STR r8, [r2], r3 @ output 6 (C) + STR r9, [r2, r3] @ output 8 (D) + AND r9, r9, r12 @ r9 = D split + RSB r7, r9, r9, LSL #3 @ r7 = D*7 + ADD r6, r6, r7 @ r6 = C+D*7 + AND r6, r12,r6, LSR #3 @ r6 = (C + D*7)>>3 + ORR r6, r6, r6, ROR #16 @ r6 = output 7 + STR r6, [r2], r3, LSL #1 @ output 7 (C+D*7)>>3 + ADD r9, r9, r9, LSL #2 @ r9 = D*5 + AND r6, r10,r12 @ r6 = E split + ADD r9, r9, r6 @ r9 = D*5+E + ADD r9, r9, r6, LSL #1 @ r9 = D*5+E*3 + AND r9, r12,r9, LSR #3 @ r9 = (D*5 + E*3)>>3 + ORR r9, r9, r9, ROR #16 @ r9 = output 9 + STR r9, [r2], r14 @ output 9 (D*5+E*3)>>3 + SUBS r4, r4, #1 + BGT xloop_aspect4 LDMFD r13!,{r4-r11,PC} |