summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-02-22 13:55:49 +0000
committerThom Johansen <thomj@rockbox.org>2007-02-22 13:55:49 +0000
commitc4ccd9ee860428dd127b9d5affff81a4f9420c6c (patch)
treeacb6f134d396e4c812c52b85ec89ca2101bf403c
parent6c3db6e65f60196b7377cbe8d4f74fcb7035a5cf (diff)
downloadrockbox-c4ccd9ee860428dd127b9d5affff81a4f9420c6c.tar.gz
rockbox-c4ccd9ee860428dd127b9d5affff81a4f9420c6c.zip
Commit optional code for high-precision EQ which will almost certainly not make a difference on 16 bit output targets.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12451 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/eq_arm.S64
-rw-r--r--apps/eq_cf.S40
2 files changed, 72 insertions, 32 deletions
diff --git a/apps/eq_arm.S b/apps/eq_arm.S
index 85617dc2fb..0c1961d2d3 100644
--- a/apps/eq_arm.S
+++ b/apps/eq_arm.S
@@ -7,7 +7,7 @@
7 * \/ \/ \/ \/ \/ 7 * \/ \/ \/ \/ \/
8 * $Id$ 8 * $Id$
9 * 9 *
10 * Copyright (C) 2006 Thom Johansen 10 * Copyright (C) 2006-2007 Thom Johansen
11 * 11 *
12 * All files in this archive are subject to the GNU General Public License. 12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement. 13 * See the file COPYING in the source tree root for full license agreement.
@@ -17,6 +17,15 @@
17 * 17 *
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/* uncomment this to make filtering calculate lower bits after shifting.
21 * without this, "shift" of the lower bits will be lost here.
22 */
23/* #define HIGH_PRECISION */
24
25/*
26 * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
27 * unsigned channels, unsigned shift)
28 */
20 .text 29 .text
21 .global eq_filter 30 .global eq_filter
22eq_filter: 31eq_filter:
@@ -33,35 +42,40 @@ eq_filter:
33 ldr r14, [sp, #8] @ r14 = numsamples 42 ldr r14, [sp, #8] @ r14 = numsamples
34 ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr 43 ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr
35 str r10, [sp, #4] @ save it for loop end 44 str r10, [sp, #4] @ save it for loop end
36.loop: 45
37 /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator, 46 /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
38 r12 = shift amount, r14 = number of samples. 47 * r12 = shift amount, r14 = number of samples.
39 See eq_cf.S for explanation of what this loop does. Primary difference
40 is the reordering of the equation we do here, which is done for register
41 reuse reasons, we're pretty short on regs.
42 */ 48 */
43 smull r10, r11, r6, r1 @ acc = b2*x[i - 2] 49.loop:
44 mov r1, r0 @ fix input history 50 /* Direct form 1 filtering code.
45 smlal r10, r11, r5, r0 @ acc += b1*x[i - 1] 51 * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
46 ldr r0, [r9] @ load input and fix history in same operation 52 * where y[] is output and x[] is input. This is performed out of order to
47 smlal r10, r11, r4, r0 @ acc += b0*x[i] 53 * reuse registers, we're pretty short on regs.
48 smlal r10, r11, r7, r2 @ acc += a1*y[i - 1] 54 */
49 smlal r10, r11, r8, r3 @ acc += a2*y[i - 2] 55 smull r10, r11, r6, r1 @ acc = b2*x[i - 2]
50 mov r3, r2 @ fix output history 56 mov r1, r0 @ fix input history
51 mov r2, r11, lsl r12 @ get result 57 smlal r10, r11, r5, r0 @ acc += b1*x[i - 1]
52 @ TODO: arm makes it easy to mix in lower bits from r10 for extended 58 ldr r0, [r9] @ load input and fix history in same operation
53 @ precision here, but we don't have enough regs to save the shift factor 59 smlal r10, r11, r4, r0 @ acc += b0*x[i]
54 @ we would need (32 - r12). 60 smlal r10, r11, r7, r2 @ acc += a1*y[i - 1]
55 str r2, [r9], #4 @ save result 61 smlal r10, r11, r8, r3 @ acc += a2*y[i - 2]
56 subs r14, r14, #1 @ are we done with this channel? 62 mov r3, r2 @ fix output history
63 mov r2, r11, asl r12 @ get upper part of result and shift left
64#ifdef HIGH_PRECISION
65 rsb r11, r12, #32 @ get shift amount for lower part
66 orr r2, r2, r10, lsr r11 @ then mix in correctly shifted lower part
67#endif
68 str r2, [r9], #4 @ save result
69 subs r14, r14, #1 @ are we done with this channel?
57 bne .loop 70 bne .loop
58 71
59 ldr r10, [sp, #4] @ load filter struct pointer 72 ldr r10, [sp, #4] @ load filter struct pointer
60 stmia r10!, { r0-r3 } @ save back history 73 stmia r10!, { r0-r3 } @ save back history
61 ldr r11, [sp, #12] @ load number of channels 74 ldr r11, [sp, #12] @ load number of channels
62 subs r11, r11, #1 @ all channels processed? 75 subs r11, r11, #1 @ all channels processed?
63 strne r11, [sp, #12] 76 strne r11, [sp, #12]
64 bne .filterloop 77 bne .filterloop
65 78
66 add sp, sp, #16 @ compensate for temp storage 79 add sp, sp, #16 @ compensate for temp storage
67 ldmia sp!, { r4-r11, pc } 80 ldmia sp!, { r4-r11, pc }
81
diff --git a/apps/eq_cf.S b/apps/eq_cf.S
index c9458cdc77..75bfcafb3a 100644
--- a/apps/eq_cf.S
+++ b/apps/eq_cf.S
@@ -7,7 +7,7 @@
7 * \/ \/ \/ \/ \/ 7 * \/ \/ \/ \/ \/
8 * $Id$ 8 * $Id$
9 * 9 *
10 * Copyright (C) 2006 Thom Johansen 10 * Copyright (C) 2006-2007 Thom Johansen
11 * 11 *
12 * All files in this archive are subject to the GNU General Public License. 12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement. 13 * See the file COPYING in the source tree root for full license agreement.
@@ -17,14 +17,27 @@
17 * 17 *
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/* uncomment this to make filtering calculate lower bits after shifting.
21 * without this, "shift" - 1 of the lower bits will be lost here.
22 */
23/* #define HIGH_PRECISION */
24
25/*
26 * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
27 * unsigned channels, unsigned shift)
28 */
20 .text 29 .text
21 .global eq_filter 30 .global eq_filter
22eq_filter: 31eq_filter:
23 lea.l (-11*4, %sp), %sp 32 lea.l (-11*4, %sp), %sp
24 movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs 33 movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs
25 move.l (11*4+8, %sp), %a5 | fetch filter structure address 34 move.l (11*4+8, %sp), %a5 | fetch filter structure address
26 movem.l (11*4+16, %sp), %d6-%d7 | load num. channels and shift count 35 move.l (11*4+20, %sp), %d7 | load shift count
27 subq.l #1, %d7 | EMAC gives us one free shift 36 subq.l #1, %d7 | EMAC gives us one free shift
37#ifdef HIGH_PRECISION
38 moveq.l #8, %d6
39 sub.l %d7, %d6 | shift for lower part of accumulator
40#endif
28 movem.l (%a5), %a0-%a4 | load coefs 41 movem.l (%a5), %a0-%a4 | load coefs
29 lea.l (5*4, %a5), %a5 | point to filter history 42 lea.l (5*4, %a5), %a5 | point to filter history
30 43
@@ -34,11 +47,16 @@ eq_filter:
34 move.l (%a6), %a6 47 move.l (%a6), %a6
35 move.l (11*4+12, %sp), %d5 | number of samples 48 move.l (11*4+12, %sp), %d5 | number of samples
36 movem.l (%a5), %d0-%d3 | load filter history 49 movem.l (%a5), %d0-%d3 | load filter history
50
51 /* d0-r3 = history, d4 = number of channels, d5 = sample count,
52 * d6 = lower shift amount, d7 = upper shift amount, a0-a4 = coefs,
53 * a5 = history pointer, a6 = x[]
54 */
37.loop: 55.loop:
38 /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode. 56 /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
39 y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], 57 * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
40 where y[] is output and x[] is input. This is performed out of order 58 * where y[] is output and x[] is input. This is performed out of order
41 to do parallel load of input value. 59 * to do parallel load of input value.
42 */ 60 */
43 mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2] 61 mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2]
44 move.l %d0, %d1 | fix input history 62 move.l %d0, %d1 | fix input history
@@ -47,15 +65,23 @@ eq_filter:
47 mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1] 65 mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1]
48 mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2] 66 mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2]
49 move.l %d2, %d3 | fix output history 67 move.l %d2, %d3 | fix output history
50 movclr.l %acc0, %d2 | fetch and write result 68#ifdef HIGH_PRECISION
69 move.l %accext01, %d2 | fetch lower part of accumulator
70 move.b %d2, %d4 | clear upper three bytes
71 lsr.l %d6, %d4 | shift lower bits
72#endif
73 movclr.l %acc0, %d2 | fetch upper part of result
51 asl.l %d7, %d2 | restore fixed point format 74 asl.l %d7, %d2 | restore fixed point format
75#ifdef HIGH_PRECISION
76 or.l %d2, %d4 | combine lower and upper parts
77#endif
52 move.l %d2, (%a6)+ | save result 78 move.l %d2, (%a6)+ | save result
53 subq.l #1, %d5 | are we done with this channel? 79 subq.l #1, %d5 | are we done with this channel?
54 jne .loop 80 jne .loop
55 81
56 movem.l %d0-%d3, (%a5) | save history back to struct 82 movem.l %d0-%d3, (%a5) | save history back to struct
57 lea.l (4*4, %a5), %a5 | point to next channel's history 83 lea.l (4*4, %a5), %a5 | point to next channel's history
58 subq.l #1, %d6 | have we processed both channels? 84 subq.l #1, (11*4+16, %sp) | have we processed both channels?
59 jne .filterloop 85 jne .filterloop
60 86
61 movem.l (%sp), %d2-%d7/%a2-%a6 87 movem.l (%sp), %d2-%d7/%a2-%a6