diff options
| -rw-r--r-- | graphics/scaler/Normal2xARM.s | 138 | 
1 files changed, 138 insertions, 0 deletions
diff --git a/graphics/scaler/Normal2xARM.s b/graphics/scaler/Normal2xARM.s index 2dfb9e703d..e75d917567 100644 --- a/graphics/scaler/Normal2xARM.s +++ b/graphics/scaler/Normal2xARM.s @@ -112,6 +112,8 @@ Normal2xAspectMask:  	SUB	r14,r14,r3, LSL #3  	SUB	r14,r14,r3, LSL #1  	SUB	r14,r14,r3		@ r14 = 4-dstPitch*11 +	SUBS	r5, r5, #4 +	BLE	last_few_lines  yloop_aspect:  xloop_aspect:  	LDRH	r6, [r0], r1		@ r6 = A @@ -170,5 +172,141 @@ xloop_aspect:  	SUB	r2, r2, r4, LSL #2  	SUBS	r5, r5, #5  	BGT	yloop_aspect +last_few_lines: +	@ r5 = -4 => done, -3 => 1 left, -2 = 2 left, -1 = 3 left, 0 = 4 left +	ADDS	r5, r5, #3 +	@ r5 = -1 => done, 0 => 1 left, 1 = 2 left, 2 = 3 left, 3 = 4 left +	LDMLTFD	r13!,{r4-r11,PC} +	BEQ	last1lines +	CMP	r5, #2 +	BEQ	last3lines +	BGT	last4lines +last2lines: +	ADD	r11,r11,r1 +	ADD	r14,r14,r3 +	ADD	r11,r11,r1, LSL #1 +	ADD	r14,r14,r3, LSL #3 +xloop_aspect2: +	LDRH	r6, [r0], r1		@ r6 = A +	LDRH	r7, [r0], r11		@ r7 = B +	SUBS	r4, r4, #1 +	ORR	r6, r6, r6, LSL #16	@ r6 = output 0, 1 +	ORR	r7, r7, r7, LSL #16	@ r7 = output 3 +	STR	r6, [r2], r3		@ output 0 (A) +	STR	r6, [r2], r3		@ output 1 (A) +	AND	r6, r6, r12		@ r6 = A split +	ADD	r6, r6, r6, LSL #1	@ r6 = A*3 +	STR	r7, [r2, r3]		@ output 3 (B) +	AND	r7, r7, r12		@ r7 = B split +	ADD	r6, r6, r7		@ r6 = A*3 + B +	ADD	r6, r6, r7, LSL #2	@ r6 = A*3 + B*5 +	AND	r6, r12,r6, LSR #3	@ r6 = (A*3 + B*5)>>3 +	ORR	r6, r6, r6, ROR #16	@ r6 = output 2 +	STR	r6, [r2], r14		@ output 2 (A*3+B*5)>>3 +	BGT	xloop_aspect2 + +	LDMFD	r13!,{r4-r11,PC} + +last1lines: +xloop_aspect1: +	LDRH	r6, [r0], #2		@ r6 = A +	ORR	r6, r6, r6, LSL #16	@ r6 = output 0, 1 +	STR	r6, [r2, r3]		@ output 0 (A) +	STR	r6, [r2], #4		@ output 1 (A) +	SUBS	r4, r4, #1 +	BGT	xloop_aspect1 + +	LDMFD	r13!,{r4-r11,PC} + +last3lines: +	ADD	r14,r14,r3, LSL #2 +	ADD	r11,r11,r1 +xloop_aspect3: +	LDRH	r6, [r0], r1		@ r6 = A +	LDRH	r7, [r0], r1		@ r7 = B +	LDRH	r8, [r0], r1		@ r8 = C +	LDRH	r9, [r0], r11		@ r9 = D +	ORR	r6, r6, r6, LSL #16	@ r6 = output 0, 1 +	ORR	r7, r7, r7, LSL #16	@ r7 = output 3 +	ORR	r8, r8, r8, LSL #16	@ r8 = output 5,6 +	ORR	r9, r9, r9, LSL #16	@ r9 = output 8 +	STR	r6, [r2], r3		@ output 0 (A) +	STR	r6, [r2], r3		@ output 1 (A) +	AND	r6, r6, r12		@ r6 = A split +	ADD	r6, r6, r6, LSL #1	@ r6 = A*3 +	STR	r7, [r2, r3]		@ output 3 (B) +	AND	r7, r7, r12		@ r7 = B split +	ADD	r6, r6, r7		@ r6 = A*3 + B +	ADD	r6, r6, r7, LSL #2	@ r6 = A*3 + B*5 +	AND	r6, r12,r6, LSR #3	@ r6 = (A*3 + B*5)>>3 +	ORR	r6, r6, r6, ROR #16	@ r6 = output 2 +	STR	r6, [r2], r3, LSL #1	@ output 2 (A*3+B*5)>>3 +	RSB	r7, r7, r7, LSL #3	@ r7 = B*7 +	AND	r6, r8, r12		@ r6 = C split +	ADD	r7, r7, r6		@ r7 = B*7+C +	AND	r7, r12,r7, LSR #3	@ r7 = (B*7 + C)>>3 +	ORR	r7, r7, r7, ROR #16	@ r7 = output 4 +	STR	r7, [r2], r3		@ output 4 (B*7+C)>>3 +	STR	r8, [r2], r3		@ output 5 (C) +	STR	r8, [r2], r3		@ output 6 (C) +	AND	r9, r9, r12		@ r9 = D split +	RSB	r7, r9, r9, LSL #3	@ r7 = D*7 +	ADD	r6, r6, r7		@ r6 = C+D*7 +	AND	r6, r12,r6, LSR #3	@ r6 = (C + D*7)>>3 +	ORR	r6, r6, r6, ROR #16	@ r6 = output 7 +	STR	r6, [r2], r14		@ output 7 (C+D*7)>>3 +	SUBS	r4, r4, #1 +	BGT	xloop_aspect3 + +	LDMFD	r13!,{r4-r11,PC} + +last4lines: +	ADD	r14,r14,r3, LSL #1 +xloop_aspect4: +	LDRH	r6, [r0], r1		@ r6 = A +	LDRH	r7, [r0], r1		@ r7 = B +	LDRH	r8, [r0], r1		@ r8 = C +	LDRH	r9, [r0], r1		@ r9 = D +	LDRH	r10,[r0], r11		@ r10= E +	ORR	r6, r6, r6, LSL #16	@ r6 = output 0, 1 +	ORR	r7, r7, r7, LSL #16	@ r7 = output 3 +	ORR	r8, r8, r8, LSL #16	@ r8 = output 5,6 +	ORR	r9, r9, r9, LSL #16	@ r9 = output 8 +	ORR	r10,r10,r10,LSL #16	@ r10= output 10, 11 +	STR	r6, [r2], r3		@ output 0 (A) +	STR	r6, [r2], r3		@ output 1 (A) +	AND	r6, r6, r12		@ r6 = A split +	ADD	r6, r6, r6, LSL #1	@ r6 = A*3 +	STR	r7, [r2, r3]		@ output 3 (B) +	AND	r7, r7, r12		@ r7 = B split +	ADD	r6, r6, r7		@ r6 = A*3 + B +	ADD	r6, r6, r7, LSL #2	@ r6 = A*3 + B*5 +	AND	r6, r12,r6, LSR #3	@ r6 = (A*3 + B*5)>>3 +	ORR	r6, r6, r6, ROR #16	@ r6 = output 2 +	STR	r6, [r2], r3, LSL #1	@ output 2 (A*3+B*5)>>3 +	RSB	r7, r7, r7, LSL #3	@ r7 = B*7 +	AND	r6, r8, r12		@ r6 = C split +	ADD	r7, r7, r6		@ r7 = B*7+C +	AND	r7, r12,r7, LSR #3	@ r7 = (B*7 + C)>>3 +	ORR	r7, r7, r7, ROR #16	@ r7 = output 4 +	STR	r7, [r2], r3		@ output 4 (B*7+C)>>3 +	STR	r8, [r2], r3		@ output 5 (C) +	STR	r8, [r2], r3		@ output 6 (C) +	STR	r9, [r2, r3]		@ output 8 (D) +	AND	r9, r9, r12		@ r9 = D split +	RSB	r7, r9, r9, LSL #3	@ r7 = D*7 +	ADD	r6, r6, r7		@ r6 = C+D*7 +	AND	r6, r12,r6, LSR #3	@ r6 = (C + D*7)>>3 +	ORR	r6, r6, r6, ROR #16	@ r6 = output 7 +	STR	r6, [r2], r3, LSL #1	@ output 7 (C+D*7)>>3 +	ADD	r9, r9, r9, LSL #2	@ r9 = D*5 +	AND	r6, r10,r12		@ r6 = E split +	ADD	r9, r9, r6		@ r9 = D*5+E +	ADD	r9, r9, r6, LSL #1	@ r9 = D*5+E*3 +	AND	r9, r12,r9, LSR #3	@ r9 = (D*5 + E*3)>>3 +	ORR	r9, r9, r9, ROR #16	@ r9 = output 9 +	STR	r9, [r2], r14		@ output 9 (D*5+E*3)>>3 +	SUBS	r4, r4, #1 +	BGT	xloop_aspect4  	LDMFD	r13!,{r4-r11,PC}  | 
