1 files changed, 39 insertions, 25 deletions
diff --git a/apps/eq_arm.S b/apps/eq_arm.S
index 85617dc2fb..0c1961d2d3 100644
--- a/apps/eq_arm.S
+++ b/apps/eq_arm.S
@@ -7,7 +7,7 @@
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
- * Copyright (C) 2006 Thom Johansen
+ * Copyright (C) 2006-2007 Thom Johansen
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
@@ -17,6 +17,15 @@
 *
 ****************************************************************************/
+/* uncomment this to make filtering calculate lower bits after shifting.
+ * without this, "shift" of the lower bits will be lost here.
+ */
+/* #define HIGH_PRECISION */
+/*
+ * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
+ *                unsigned channels, unsigned shift)
+ */
    .text
    .global eq_filter
 eq_filter:
@@ -33,35 +42,40 @@ eq_filter:
    ldr r14, [sp, #8]       @ r14 = numsamples
    ldmia r10, { r0-r3 }    @ load history, r10 should be filter struct addr
    str r10, [sp, #4]       @ save it for loop end
-.loop:
    /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
-       r12 = shift amount, r14 = number of samples.
+     * r12 = shift amount, r14 = number of samples.
-       See eq_cf.S for explanation of what this loop does. Primary difference
-       is the reordering of the equation we do here, which is done for register
-       reuse reasons, we're pretty short on regs.
     */
-    smull r10, r11, r6, r1  @ acc = b2*x[i - 2]
+.loop:
-    mov r1, r0              @ fix input history
+    /* Direct form 1 filtering code.
-    smlal r10, r11, r5, r0  @ acc += b1*x[i - 1]
+     * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-    ldr r0, [r9]            @ load input and fix history in same operation
+     * where y[] is output and x[] is input. This is performed out of order to
-    smlal r10, r11, r4, r0  @ acc += b0*x[i]
+     * reuse registers, we're pretty short on regs.
-    smlal r10, r11, r7, r2  @ acc += a1*y[i - 1]
+     */
-    smlal r10, r11, r8, r3  @ acc += a2*y[i - 2]
+    smull r10, r11, r6, r1     @ acc = b2*x[i - 2]
-    mov r3, r2              @ fix output history
+    mov r1, r0                 @ fix input history
-    mov r2, r11, lsl r12    @ get result
+    smlal r10, r11, r5, r0     @ acc += b1*x[i - 1]
-    @ TODO: arm makes it easy to mix in lower bits from r10 for extended
+    ldr r0, [r9]               @ load input and fix history in same operation
-    @ precision here, but we don't have enough regs to save the shift factor
+    smlal r10, r11, r4, r0     @ acc += b0*x[i]
-    @ we would need (32 - r12).
+    smlal r10, r11, r7, r2     @ acc += a1*y[i - 1]
-    str r2, [r9], #4        @ save result
+    smlal r10, r11, r8, r3     @ acc += a2*y[i - 2]
-    subs r14, r14, #1       @ are we done with this channel?
+    mov r3, r2                 @ fix output history
+    mov r2, r11, asl r12       @ get upper part of result and shift left
+#ifdef HIGH_PRECISION
+    rsb r11, r12, #32          @ get shift amount for lower part
+    orr r2, r2, r10, lsr r11   @ then mix in correctly shifted lower part
+#endif
+    str r2, [r9], #4           @ save result
+    subs r14, r14, #1          @ are we done with this channel?
    bne .loop
-    ldr r10, [sp, #4]       @ load filter struct pointer
+    ldr r10, [sp, #4]          @ load filter struct pointer
-    stmia r10!, { r0-r3 }   @ save back history
+    stmia r10!, { r0-r3 }      @ save back history
-    ldr r11, [sp, #12]      @ load number of channels
+    ldr r11, [sp, #12]         @ load number of channels
-    subs r11, r11, #1       @ all channels processed?
+    subs r11, r11, #1          @ all channels processed?
    strne r11, [sp, #12]
    bne .filterloop
-    add sp, sp, #16         @ compensate for temp storage
+    add sp, sp, #16            @ compensate for temp storage
    ldmia sp!, { r4-r11, pc }

diff --git a/apps/eq_arm.S b/apps/eq_arm.S index 85617dc2fb..0c1961d2d3 100644 --- a/apps/eq_arm.S +++ b/apps/eq_arm.S
@@ -7,7 +7,7 @@
7	* \/ \/ \/ \/ \/	7	* \/ \/ \/ \/ \/
8	* $Id$	8	* $Id$
9	*	9	*
10	* Copyright (C) 2006 Thom Johansen	10	* Copyright (C) 2006-2007 Thom Johansen
11	*	11	*
12	* All files in this archive are subject to the GNU General Public License.	12	* All files in this archive are subject to the GNU General Public License.
13	* See the file COPYING in the source tree root for full license agreement.	13	* See the file COPYING in the source tree root for full license agreement.
@@ -17,6 +17,15 @@
17	*	17	*
18	****************************************************************************/	18	****************************************************************************/
19		19
		20	/* uncomment this to make filtering calculate lower bits after shifting.
		21	* without this, "shift" of the lower bits will be lost here.
		22	*/
		23	/* #define HIGH_PRECISION */
		24
		25	/*
		26	* void eq_filter(int32_t *x, struct eqfilter f, unsigned num,
		27	* unsigned channels, unsigned shift)
		28	*/
20	.text	29	.text
21	.global eq_filter	30	.global eq_filter
22	eq_filter:	31	eq_filter:
@@ -33,35 +42,40 @@ eq_filter:
33	ldr r14, [sp, #8] @ r14 = numsamples	42	ldr r14, [sp, #8] @ r14 = numsamples
34	ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr	43	ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr
35	str r10, [sp, #4] @ save it for loop end	44	str r10, [sp, #4] @ save it for loop end
36	.loop:	45
37	/* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,	46	/* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
38	r12 = shift amount, r14 = number of samples.	47	* r12 = shift amount, r14 = number of samples.
39	See eq_cf.S for explanation of what this loop does. Primary difference
40	is the reordering of the equation we do here, which is done for register
41	reuse reasons, we're pretty short on regs.
42	*/	48	*/
43	smull r10, r11, r6, r1 @ acc = b2*x[i - 2]	49	.loop:
44	mov r1, r0 @ fix input history	50	/* Direct form 1 filtering code.
45	smlal r10, r11, r5, r0 @ acc += b1*x[i - 1]	51	* y[n] = b0x[i] + b1x[i - 1] + b2x[i - 2] + a1y[i - 1] + a2*y[i - 2],
46	ldr r0, [r9] @ load input and fix history in same operation	52	* where y[] is output and x[] is input. This is performed out of order to
47	smlal r10, r11, r4, r0 @ acc += b0*x[i]	53	* reuse registers, we're pretty short on regs.
48	smlal r10, r11, r7, r2 @ acc += a1*y[i - 1]	54	*/
49	smlal r10, r11, r8, r3 @ acc += a2*y[i - 2]	55	smull r10, r11, r6, r1 @ acc = b2*x[i - 2]
50	mov r3, r2 @ fix output history	56	mov r1, r0 @ fix input history
51	mov r2, r11, lsl r12 @ get result	57	smlal r10, r11, r5, r0 @ acc += b1*x[i - 1]
52	@ TODO: arm makes it easy to mix in lower bits from r10 for extended	58	ldr r0, [r9] @ load input and fix history in same operation
53	@ precision here, but we don't have enough regs to save the shift factor	59	smlal r10, r11, r4, r0 @ acc += b0*x[i]
54	@ we would need (32 - r12).	60	smlal r10, r11, r7, r2 @ acc += a1*y[i - 1]
55	str r2, [r9], #4 @ save result	61	smlal r10, r11, r8, r3 @ acc += a2*y[i - 2]
56	subs r14, r14, #1 @ are we done with this channel?	62	mov r3, r2 @ fix output history
		63	mov r2, r11, asl r12 @ get upper part of result and shift left
		64	#ifdef HIGH_PRECISION
		65	rsb r11, r12, #32 @ get shift amount for lower part
		66	orr r2, r2, r10, lsr r11 @ then mix in correctly shifted lower part
		67	#endif
		68	str r2, [r9], #4 @ save result
		69	subs r14, r14, #1 @ are we done with this channel?
57	bne .loop	70	bne .loop
58		71
59	ldr r10, [sp, #4] @ load filter struct pointer	72	ldr r10, [sp, #4] @ load filter struct pointer
60	stmia r10!, { r0-r3 } @ save back history	73	stmia r10!, { r0-r3 } @ save back history
61	ldr r11, [sp, #12] @ load number of channels	74	ldr r11, [sp, #12] @ load number of channels
62	subs r11, r11, #1 @ all channels processed?	75	subs r11, r11, #1 @ all channels processed?
63	strne r11, [sp, #12]	76	strne r11, [sp, #12]
64	bne .filterloop	77	bne .filterloop
65		78
66	add sp, sp, #16 @ compensate for temp storage	79	add sp, sp, #16 @ compensate for temp storage
67	ldmia sp!, { r4-r11, pc }	80	ldmia sp!, { r4-r11, pc }
		81