diff options
-rw-r--r-- | gp2x/upscale_aspect.s | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/gp2x/upscale_aspect.s b/gp2x/upscale_aspect.s index ed67c63..41bcf0e 100644 --- a/gp2x/upscale_aspect.s +++ b/gp2x/upscale_aspect.s @@ -316,5 +316,264 @@ loop2: ldmfd sp!,{r4-r11,pc} + +.global upscale_aspect_row @ void *dst, void *linesx4, u32 row +upscale_aspect_row: + stmfd sp!,{r4-r11,lr} + mov lr, #0x0000001f + orr lr, lr, #0x0000f800 @ for "unpacked" form of + orr lr, lr, #0x07e00000 @ 00000ggg'ggg00000'rrrrr000'000bbbbb + mov r12, #0x00000001 + orr r12,r12,#0x00000800 + orr r12,r12,#0x00200000 @ rounding constant + + mov r8, #(240/6) @ cols + + add r0, r0, #(240*320)*2 + add r0, r0, #12*2 + add r0, r0, r2, lsl #3 + +uar_loop: + ldr r10,[r1] + ldr r11,[r1, #240*2*1] + + unpack_lo r4, r10 + unpack_hi r5, r10 + unpack_lo r6, r11 + unpack_hi r7, r11 + + ldr r11,[r1, #240*2*2] + + do_3_5 r4, r6 + orr r2, r2, r2, lsr #16 + mov r3, r10, lsl #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2]! @ 0,8 + + unpack_lo r10,r11 + unpack_hi r9, r11 + + do_3_5 r10,r6 + orr r2, r2, r2, lsl #16 + mov r3, r11, lsl #16 + orr r2, r3, r2, lsr #16 + str r2, [r0, #4] @ 16,24 + + do_3_5 r4, r5 + orr r3, r2, r2, lsl #16 + + do_14_7_7_4 r7, r5, r6, r4 + orr r2, r2, r2, lsr #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2]! @ 1,9 + + ldr r11,[r1, #4] + + do_14_7_7_4 r7, r6, r9, r10 + orr r3, r2, r2, lsl #16 + + do_3_5 r10,r9 + orr r2, r2, r2, lsr #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #4] @ 17,25 + + ldr r10,[r1, #240*2*1+4] + + unpack_lo r4, r11 + unpack_lo r6, r10 + + do_3_5 r4, r5 + orr r3, r2, r2, lsl #16 + + do_14_7_7_4 r7, r5, r6, r4 + orr r2, r2, r2, lsr #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2]! @ 2,10 + + do_3_5 r4, r6 + + ldr r4, [r1, #240*2*2+4] + + orr r2, r2, r2, lsr #16 + mov r3, r11, lsl #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2] @ 3,11 + + unpack_lo r5, r4 + + do_14_7_7_4 r7, r6, r9, r5 + orr r3, r2, r2, lsl #16 + + do_3_5 r5, r9 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r3, lsr #16 + str r2, [r0, #4] @ 18,26 + + do_3_5 r5, r6 + orr r2, r2, r2, lsl #16 + mov r3, r4, lsl #16 + orr r2, r3, r2, lsr #16 + str r2, [r0, #-240*2+4] @ 19,27 + + unpack_hi r5, r11 + unpack_hi r6, r10 + unpack_hi r7, r4 + + ldr r10,[r1, #8] + + do_3_5 r5, r6 + orr r2, r2, r2, lsr #16 + mov r3, r11, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2*2]! @ 4,12 + + ldr r11,[r1, #240*2*1+8] + + do_3_5 r7, r6 + orr r2, r2, r2, lsl #16 + mov r3, r4, lsr #16 + mov r3, r3, lsl #16 + orr r2, r3, r2, lsr #16 + str r2, [r0, #4] @ 20,28 + + unpack_lo r4, r10 + unpack_lo r9, r11 + + ldr r11,[r1, #240*2*2+8] + + do_3_5 r5, r4 + orr r3, r2, r2, lsl #16 + + do_14_7_7_4 r9, r4, r6, r5 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r3, lsr #16 + str r2, [r0, #-240*2]! @ 5,13 + + unpack_lo r5, r11 + + do_14_7_7_4 r9, r5, r6, r7 + orr r3, r2, r2, lsl #16 + + do_3_5 r7, r5 + orr r2, r2, r2, lsr #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #4] @ 21,29 + + ldr r7, [r1, #240*2*1+8] + + unpack_hi r6, r10 + unpack_hi r7, r7 + + do_3_5 r6, r4 + orr r3, r2, r2, lsl #16 + + do_14_7_7_4 r9, r4, r7, r6 + orr r2, r2, r2, lsr #16 + mov r2, r2, lsl #16 + orr r2, r2, r3, lsr #16 + str r2, [r0, #-240*2]! @ 6,14 + + unpack_hi r4, r11 + + do_14_7_7_4 r9, r5, r7, r4 + orr r3, r2, r2, lsl #16 + + do_3_5 r4, r5 + orr r2, r2, r2, lsr #16 + mov r3, r3, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #4] @ 22,30 + + do_3_5 r6, r7 + orr r2, r2, r2, lsr #16 + mov r3, r10, lsr #16 + orr r2, r3, r2, lsl #16 + str r2, [r0, #-240*2]! @ 7,15 + + do_3_5 r4, r7 + orr r2, r2, r2, lsl #16 + mov r3, r11, lsr #16 + mov r3, r3, lsl #16 + orr r2, r3, r2, lsr #16 + str r2, [r0, #4] @ 23,31 + + subs r8, r8, #1 + add r1, r1, #12 + bne uar_loop + + ldmfd sp!,{r4-r11,pc} + + +@ bonus function + +@ input: r2-r5 +@ output: r7,r8 +@ trash: r6 +.macro rb_line_low + mov r6, r2, lsl #16 + mov r7, r3, lsl #16 + orr r7, r7, r6, lsr #16 + mov r6, r4, lsl #16 + mov r8, r5, lsl #16 + orr r8, r8, r6, lsr #16 +.endm + +.macro rb_line_hi + mov r6, r2, lsr #16 + mov r7, r3, lsr #16 + orr r7, r6, r7, lsl #16 + mov r6, r4, lsr #16 + mov r8, r5, lsr #16 + orr r8, r6, r8, lsl #16 +.endm + +.global do_rotated_blit @ void *dst, void *linesx4, u32 y +do_rotated_blit: + stmfd sp!,{r4-r8,lr} + + add r0, r0, #(240*320)*2 + sub r0, r0, #(240*40)*2 + sub r0, r0, #(240-40+4)*2 @ y starts from 4 + add r0, r0, r2, lsl #1 + + mov lr, #240/4 + +rotated_blit_loop: + ldr r2, [r1, #240*0*2] + ldr r3, [r1, #240*1*2] + ldr r4, [r1, #240*2*2] + ldr r5, [r1, #240*3*2] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + ldr r2, [r1, #240*0*2+4] + ldr r3, [r1, #240*1*2+4] + ldr r4, [r1, #240*2*2+4] + ldr r5, [r1, #240*3*2+4] + rb_line_low + stmia r0, {r7,r8} + sub r0, r0, #240*2 + rb_line_hi + stmia r0, {r7,r8} + sub r0, r0, #240*2 + + subs lr, lr, #1 + add r1, r1, #8 + bne rotated_blit_loop + + ldmfd sp!,{r4-r8,pc} + @ vim:filetype=armasm |