aboutsummaryrefslogtreecommitdiff
path: root/plugins/dfsound/arm_utils.S
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/dfsound/arm_utils.S')
-rw-r--r--plugins/dfsound/arm_utils.S164
1 files changed, 164 insertions, 0 deletions
diff --git a/plugins/dfsound/arm_utils.S b/plugins/dfsound/arm_utils.S
new file mode 100644
index 0000000..22e5844
--- /dev/null
+++ b/plugins/dfsound/arm_utils.S
@@ -0,0 +1,164 @@
+/*
+ * (C) GraÅžvydas "notaz" Ignotas, 2011
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ * - GNU GPL, version 2 or later.
+ * - GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+
+.text
+.align 2
+
+.macro load_varadr reg var
+#if defined(__ARM_ARCH_7A__) && !defined(__PIC__)
+ movw \reg, #:lower16:\var
+ movt \reg, #:upper16:\var
+#else
+ ldr \reg, =\var
+#endif
+.endm
+
+#ifdef __ARM_NEON__
+
+.global mix_chan @ (int start, int count, int lv, int rv)
+mix_chan:
+ vmov.32 d14[0], r2
+ vmov.32 d14[1], r3 @ multipliers
+ mov r12, r0
+ load_varadr r0, ChanBuf
+ load_varadr r2, SSumLR
+ add r0, r12, lsl #2
+ add r2, r12, lsl #3
+0:
+ vldmia r0!, {d0-d1}
+ vldmia r2, {d2-d5}
+ vmul.s32 d10, d14, d0[0]
+ vmul.s32 d11, d14, d0[1]
+ vmul.s32 d12, d14, d1[0]
+ vmul.s32 d13, d14, d1[1]
+ vsra.s32 q1, q5, #14
+ vsra.s32 q2, q6, #14
+ subs r1, #4
+ blt mc_finish
+ vstmia r2!, {d2-d5}
+ bgt 0b
+ nop
+ bxeq lr
+
+mc_finish:
+ vstmia r2!, {d2}
+ cmp r1, #-2
+ vstmiage r2!, {d3}
+ cmp r1, #-1
+ vstmiage r2!, {d4}
+ bx lr
+
+
+.global mix_chan_rvb @ (int start, int count, int lv, int rv)
+mix_chan_rvb:
+ vmov.32 d14[0], r2
+ vmov.32 d14[1], r3 @ multipliers
+ mov r12, r0
+ load_varadr r0, ChanBuf
+ load_varadr r3, sRVBStart
+ load_varadr r2, SSumLR
+ ldr r3, [r3]
+ add r0, r12, lsl #2
+ add r2, r12, lsl #3
+ add r3, r12, lsl #3
+0:
+ vldmia r0!, {d0-d1}
+ vldmia r2, {d2-d5}
+ vldmia r3, {d6-d9}
+ vmul.s32 d10, d14, d0[0]
+ vmul.s32 d11, d14, d0[1]
+ vmul.s32 d12, d14, d1[0]
+ vmul.s32 d13, d14, d1[1]
+ vsra.s32 q1, q5, #14
+ vsra.s32 q2, q6, #14
+ vsra.s32 q3, q5, #14
+ vsra.s32 q4, q6, #14
+ subs r1, #4
+ blt mcr_finish
+ vstmia r2!, {d2-d5}
+ vstmia r3!, {d6-d9}
+ bgt 0b
+ nop
+ bxeq lr
+
+mcr_finish:
+ vstmia r2!, {d2}
+ vstmia r3!, {d6}
+ cmp r1, #-2
+ vstmiage r2!, {d3}
+ vstmiage r3!, {d7}
+ cmp r1, #-1
+ vstmiage r2!, {d4}
+ vstmiage r3!, {d8}
+ bx lr
+
+#else
+
+.global mix_chan @ (int start, int count, int lv, int rv)
+mix_chan:
+ stmfd sp!, {r4-r8,lr}
+ orr r3, r2, r3, lsl #16
+ lsl r3, #1 @ packed multipliers << 1
+ mov r12, r0
+ load_varadr r0, ChanBuf
+ load_varadr r2, SSumLR
+ add r0, r12, lsl #2
+ add r2, r12, lsl #3
+0:
+ ldmia r0!, {r4,r5}
+ ldmia r2, {r6-r8,lr}
+ lsl r4, #1 @ adjust for mul
+ lsl r5, #1
+ smlawb r6, r4, r3, r6
+ smlawt r7, r4, r3, r7
+ smlawb r8, r5, r3, r8
+ smlawt lr, r5, r3, lr
+ subs r1, #2
+ blt mc_finish
+ stmia r2!, {r6-r8,lr}
+ bgt 0b
+ ldmeqfd sp!, {r4-r8,pc}
+
+mc_finish:
+ stmia r2!, {r6,r7}
+ ldmfd sp!, {r4-r8,pc}
+
+
+.global mix_chan_rvb @ (int start, int count, int lv, int rv)
+mix_chan_rvb:
+ stmfd sp!, {r4-r8,lr}
+ orr lr, r2, r3, lsl #16
+ lsl lr, #1
+ load_varadr r3, sRVBStart
+ load_varadr r2, SSumLR
+ load_varadr r4, ChanBuf
+ ldr r3, [r3]
+ add r2, r2, r0, lsl #3
+ add r3, r3, r0, lsl #3
+ add r0, r4, r0, lsl #2
+0:
+ ldr r4, [r0], #4
+ ldmia r2, {r6,r7}
+ ldmia r3, {r8,r12}
+ lsl r4, #1
+ smlawb r6, r4, lr, r6 @ supposedly takes single cycle?
+ smlawt r7, r4, lr, r7
+ smlawb r8, r4, lr, r8
+ smlawt r12,r4, lr, r12
+ subs r1, #1
+ stmia r2!, {r6,r7}
+ stmia r3!, {r8,r12}
+ bgt 0b
+ ldmfd sp!, {r4-r8,pc}
+
+#endif
+
+@ vim:filetype=armasm