added upscaler

author: notaz 2009-06-05 17:36:30 +0300
committer: notaz 2009-06-05 17:36:30 +0300
commit: 788343bb4cbc9ae133ac748f9ce59731bad67f22 (patch)
tree: b15bbf5f0c7c88d187124dd0096f2bd70023dd53 /gp2x
parent: 4cdfc0bc7b3dd2051b8027da4e2f35bbb9897307 (diff)
download: picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.tar.gz
picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.tar.bz2
picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.zip
2 files changed, 324 insertions, 1 deletions
diff --git a/gp2x/Makefile b/gp2x/Makefile
index be06a86..cf0b3a5 100644
--- a/gp2x/Makefile
+++ b/gp2x/Makefile
@@ -16,7 +16,7 @@ endif
 
 OBJS      = main.o cpu.o memory.u video.o input.o sound.o gp2x.o gui.o        \
             cheats.o zip.o cpu_threaded.z arm_stub.o video_blend.o            \
-            flush_cache.o warm.o sys_cacheflush.o
+            warm.o sys_cacheflush.o upscale_aspect.o
 BIN       = gpsp.gpe 
 
 # Platform specific definitions 
@@ -57,6 +57,9 @@ LIBS       += -ggdb
 %.o: %.S
 	$(CC) $(ASFLAGS) $(INCLUDES) -c -o $@ $<
 
+%.o: %.s
+	$(CC) $(ASFLAGS) $(INCLUDES) -c -o $@ $<
+
 all:	$(OBJS)
 	$(CC) $(OBJS) $(LIBS) -o $(BIN)  
 #	$(STRIP) $(BIN)
diff --git a/gp2x/upscale_aspect.s b/gp2x/upscale_aspect.s
new file mode 100644
index 0000000..b7694ca
--- /dev/null
+++ b/gp2x/upscale_aspect.s
@@ -0,0 +1,320 @@
+/*
+ * 240x160 -> 320x213 upscaler for ARM with interpolation
+ *
+ * Written by Gražvydas "notaz" Ignotas
+ * Prototyped by Rokas
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the organization nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  0   1   2 :  3   4   5
+ *  6   7   8 :  9  10  11
+ * 12  13  14 : 15  16  17
+ *             v
+ *  0   1   2   3 :  4   5   6   7
+ *  8   9  10  11 : 12  13  14  15
+ * 16  17  18  19 : 20  21  22  23
+ * 24  25  26  27 : 28  29  30  31
+ */
+
+.macro unpack_hi dst, src
+    mov     \dst, \src, lsr #16
+    orr     \dst, \dst, lsl #16
+    and     \dst, \dst, lr
+.endm
+
+.macro unpack_lo dst, src
+    mov     \dst, \src, lsl #16
+    orr     \dst, \dst, lsr #16
+    and     \dst, \dst, lr
+.endm
+
+@ do 3:5 summing: r2 = (s1*3 + s2*5 + 4) / 8
+@ s2 != r2
+.macro do_3_5 s1, s2
+    add     r2,\s1,\s1, lsl #1  @ r2  = s1 * 3
+    add     r2, r2,\s2, lsl #2
+    add     r2, r2,\s2          @ r2 += s2 * 5
+    add     r2, r2, r12,lsl #2  @ sum += round * 4
+    and     r2, lr, r2, lsr #3  @ mask_to_unpacked(sum / 8)
+.endm
+
+@ do 14:7:7:4: r2 = (s1*14 + s2*7 + s3*7 + s4*4 + 16) / 32
+@ {s2,s3,s4} != r2
+.macro do_14_7_7_4 s1, s2, s3, s4
+    mov     r2,\s1,     lsl #4
+    sub     r2, r2,\s1, lsl #1	@ r2  = s1 * 14
+    add     r2, r2,\s2, lsl #3
+    sub     r2, r2,\s2          @ r2 += s2 * 7
+    add     r2, r2,\s3, lsl #3
+    sub     r2, r2,\s3          @ r2 += s3 * 7
+    add     r2, r2,\s4, lsl #2	@ r2 += s4 * 4
+    add     r2, r2, r12,lsl #3	@ sum += round * 16
+    and     r2, lr, r2, lsr #5  @ mask_to_unpacked(sum / 32)
+.endm
+
+.global upscale_aspect @ u16 *dst, u16 *src
+upscale_aspect:
+    stmfd   sp!,{r4-r11,lr}
+    mov     lr,     #0x0000001f
+    orr     lr, lr, #0x0000f800 @ for "unpacked" form of
+    orr     lr, lr, #0x07e00000	@ 00000ggg'ggg00000'rrrrr000'000bbbbb
+    mov     r12,    #0x00000001
+    orr     r12,r12,#0x00000800
+    orr     r12,r12,#0x00200000 @ rounding constant
+
+    mov     r8,     #((240/6)-1) << 24 @ cols
+    orr     r8, r8, #160/3	@ rows
+
+    add     r0, r0, #320*2*13
+loop1:
+    ldr     r10,[r1]
+    ldr     r11,[r1, #320*2*1]
+
+    unpack_lo r4, r10
+    unpack_hi r5, r10
+    unpack_lo r6, r11
+    unpack_hi r7, r11
+
+    ldr     r11,[r1, #4]
+
+    do_3_5  r4, r5
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r10,    lsl #16
+    mov     r3, r3,     lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0]		  @ 0,1
+
+    ldr     r10,[r1, #320*2*2]
+
+    do_3_5  r4, r6
+    orr     r3, r2, r2, lsl #16
+    mov     r3, r3,     lsr #16	  @ 8
+
+    do_14_7_7_4 r7, r5, r6, r4
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*1]	  @ 8,9
+
+    unpack_lo r4, r10
+    unpack_hi r9, r10
+
+    do_3_5  r4, r6
+    orr     r3, r2, r2, lsl #16
+    mov     r3, r3,     lsr #16
+
+    do_14_7_7_4 r7, r9, r6, r4
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*2]	  @ 16,17
+
+    do_3_5  r4, r9
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r10,    lsl #16
+    mov     r3, r3,     lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*3]	  @ 24,25
+
+    ldr     r10,[r1, #320*2*1+4]
+
+    unpack_lo r6, r11
+    unpack_lo r4, r10
+
+    do_3_5  r6, r5
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r11,    lsl #16
+    orr     r2, r3, r2, lsr #16
+    str     r2, [r0, #4]	  @ 2,3
+
+    do_14_7_7_4 r7, r4, r5, r6
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    ldr     r5, [r1, #320*2*2+4]
+
+    do_3_5  r6, r4
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*1+4]  @ 10,11
+
+    unpack_lo r6, r5
+
+    do_14_7_7_4 r7, r4, r9, r6
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    do_3_5  r6, r4
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*2+4]  @ 18,19
+
+    unpack_hi r4, r10
+
+    ldr     r10,[r1, #8]
+
+    do_3_5  r6, r9
+    orr     r2, r2, r2, lsl #16
+    mov     r2, r2,     lsr #16
+    orr     r2, r2, r5, lsl #16
+    str     r2, [r0, #320*2*3+4]  @ 26,27
+
+    unpack_hi r6, r11
+    unpack_lo r7, r10
+
+    do_3_5  r6, r7
+    orr     r2, r2, r2, lsr #16
+    mov     r2, r2,     lsl #16
+    orr     r2, r2, r11,lsr #16
+    str     r2, [r0, #8]	  @ 4,5
+
+    ldr     r11,[r1, #320*2*1+8]
+
+    unpack_hi r9, r10
+
+    do_3_5  r9, r7
+    orr     r2, r2, r2, lsr #16
+    mov     r2, r2,     lsl #16
+    orr     r2, r2, r10,lsr #16
+    mov     r2, r2,     ror #16
+    str     r2, [r0, #12]	  @ 6,7
+
+    unpack_lo r10,r11
+
+    do_3_5  r6, r4
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    do_14_7_7_4 r10, r4, r7, r6
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*1+8]  @ 12,13
+
+    unpack_hi r6, r11
+
+    ldr     r11,[r1, #320*2*2+8]
+
+    do_14_7_7_4 r10, r6, r7, r9
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    do_3_5  r9, r6
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*1+12] @ 14,15
+
+    unpack_hi r7, r5
+    unpack_lo r9, r11
+
+    do_3_5  r7, r4
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    do_14_7_7_4 r10, r4, r9, r7
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*2+8]  @ 20,21
+
+    do_3_5  r7, r9
+    orr     r2, r2, r2, lsr #16
+    mov     r2, r2,     lsl #16
+    orr     r2, r2, r5, lsr #16
+    str     r2, [r0, #320*2*3+8]  @ 28,29
+
+    unpack_hi r5, r11
+
+    do_14_7_7_4 r10, r6, r9, r5
+    orr     r2, r2, r2, lsl #16
+    mov     r3, r2,     lsr #16
+
+    do_3_5  r5, r6
+    orr     r2, r2, r2, lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0, #320*2*2+12] @ 22,23
+
+    do_3_5  r5, r9
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r11,    lsr #16
+    orr     r2, r3, r2, lsl #16
+    mov     r2, r2,     ror #16
+    str     r2, [r0, #320*2*3+12]  @ 30,31
+
+    add     r0, r0, #16
+    add     r1, r1, #12
+
+    subs    r8, r8, #1<<24
+    bpl     loop1
+
+    add     r0, r0, #320*3*2
+    add     r1, r1, #(320*2+80)*2
+    sub     r8, r8, #1
+    tst     r8, #0xff
+    add     r8, r8, #(240/6) << 24 @ cols
+    bne     loop1
+
+    @@ last line
+    mov     r8, #240/6
+
+loop2:
+    ldmia   r1!,{r9,r10,r11}
+
+    unpack_lo r4, r9
+    unpack_hi r5, r9
+
+    do_3_5  r4, r5
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r9,     lsl #16
+    mov     r3, r3,     lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0], #4
+
+    unpack_lo r6, r10
+    unpack_hi r7, r10
+
+    do_3_5  r6, r5
+    orr     r2, r2, r2, lsl #16
+    mov     r2, r2,     lsr #16
+    orr     r2, r2, r10,lsl #16
+    str     r2, [r0], #4
+
+    unpack_lo r4, r11
+    unpack_hi r5, r11
+
+    do_3_5  r7, r4
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r10,    lsr #16
+    orr     r2, r3, r2, lsl #16
+    str     r2, [r0], #4
+
+    do_3_5  r5, r4
+    orr     r2, r2, r2, lsr #16
+    mov     r3, r11,    lsr #16
+    orr     r2, r3, r2, lsl #16
+    mov     r2, r2,     ror #16
+    str     r2, [r0], #4
+
+    subs    r8, r8, #1
+    bne     loop2
+
+    ldmfd   sp!,{r4-r11,pc}
+
+@ vim:filetype=armasm
+
author	notaz	2009-06-05 17:36:30 +0300
committer	notaz	2009-06-05 17:36:30 +0300
commit	788343bb4cbc9ae133ac748f9ce59731bad67f22 (patch)
tree	b15bbf5f0c7c88d187124dd0096f2bd70023dd53 /gp2x
parent	4cdfc0bc7b3dd2051b8027da4e2f35bbb9897307 (diff)
download	picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.tar.gz picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.tar.bz2 picogpsp-788343bb4cbc9ae133ac748f9ce59731bad67f22.zip