diff options
author | Thom Johansen <thomj@rockbox.org> | 2007-10-22 23:44:19 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2007-10-22 23:44:19 +0000 |
commit | 9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b (patch) | |
tree | 19e01cb71c3da504525eea6fd0d8836493b2ae12 | |
parent | 4b259e9553c644e6af393042a728dd4b74da20a9 (diff) | |
download | rockbox-9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b.tar.gz rockbox-9d9225ed1ddefab985ab3ffd7e77bccf979f1c5b.zip |
iir_mem16() in assembler for Coldfire for a decent performance boost. Add EMAC init in nb_celp.c, since all modes need this as a base.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15274 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/codecs/libspeex/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libspeex/config-speex.h | 5 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters.c | 2 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters_cf.S | 165 | ||||
-rw-r--r-- | apps/codecs/libspeex/nb_celp.c | 3 |
5 files changed, 178 insertions, 0 deletions
diff --git a/apps/codecs/libspeex/SOURCES b/apps/codecs/libspeex/SOURCES index df73565734..a5d4318282 100644 --- a/apps/codecs/libspeex/SOURCES +++ b/apps/codecs/libspeex/SOURCES | |||
@@ -42,3 +42,6 @@ vbr.c | |||
42 | vorbis_psy.c | 42 | vorbis_psy.c |
43 | vq.c | 43 | vq.c |
44 | window.c | 44 | window.c |
45 | #ifdef CPU_COLDFIRE | ||
46 | filters_cf.S | ||
47 | #endif | ||
diff --git a/apps/codecs/libspeex/config-speex.h b/apps/codecs/libspeex/config-speex.h index b7385eee02..31ebe80741 100644 --- a/apps/codecs/libspeex/config-speex.h +++ b/apps/codecs/libspeex/config-speex.h | |||
@@ -11,6 +11,11 @@ | |||
11 | #define ARM4_ASM | 11 | #define ARM4_ASM |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | /* Make use of Coldfire assembly optimizations */ | ||
15 | #if defined(CPU_COLDFIRE) | ||
16 | #define COLDFIRE_ASM | ||
17 | #endif | ||
18 | |||
14 | /* Make use of Blackfin assembly optimizations */ | 19 | /* Make use of Blackfin assembly optimizations */ |
15 | /* #undef BFIN_ASM */ | 20 | /* #undef BFIN_ASM */ |
16 | 21 | ||
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c index a6a5f62d26..02f93a27b1 100644 --- a/apps/codecs/libspeex/filters.c +++ b/apps/codecs/libspeex/filters.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #include "filters_sse.h" | 45 | #include "filters_sse.h" |
46 | #elif defined (ARM4_ASM) || defined(ARM5E_ASM) | 46 | #elif defined (ARM4_ASM) || defined(ARM5E_ASM) |
47 | #include "filters_arm4.h" | 47 | #include "filters_arm4.h" |
48 | #elif defined (COLDFIRE_ASM) | ||
49 | #define OVERRIDE_IIR_MEM16 | ||
48 | #elif defined (BFIN_ASM) | 50 | #elif defined (BFIN_ASM) |
49 | #include "filters_bfin.h" | 51 | #include "filters_bfin.h" |
50 | #endif | 52 | #endif |
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S new file mode 100644 index 0000000000..579af11581 --- /dev/null +++ b/apps/codecs/libspeex/filters_cf.S | |||
@@ -0,0 +1,165 @@ | |||
1 | /* Copyright (C) 2007 Thom Johansen */ | ||
2 | /** | ||
3 | @file filters_cf.S | ||
4 | @brief Various analysis/synthesis filters (Coldfire version) | ||
5 | */ | ||
6 | /* | ||
7 | Redistribution and use in source and binary forms, with or without | ||
8 | modification, are permitted provided that the following conditions | ||
9 | are met: | ||
10 | |||
11 | - Redistributions of source code must retain the above copyright | ||
12 | notice, this list of conditions and the following disclaimer. | ||
13 | |||
14 | - Redistributions in binary form must reproduce the above copyright | ||
15 | notice, this list of conditions and the following disclaimer in the | ||
16 | documentation and/or other materials provided with the distribution. | ||
17 | |||
18 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
19 | contributors may be used to endorse or promote products derived from | ||
20 | this software without specific prior written permission. | ||
21 | |||
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | .text | ||
36 | /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ | ||
37 | .global iir_mem16 | ||
38 | iir_mem16: | ||
39 | lea.l (-44, %sp), %sp | ||
40 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
41 | movem.l (44+4, %sp), %a3-%a5 | a3 = x, a4 = den, a5 = y | ||
42 | movem.l (44+20, %sp), %d0/%a6 | d0 = ord, a6 = mem | ||
43 | moveq.l #8, %d1 | Jump to correct routine based on 'ord' | ||
44 | cmp.l %d1, %d0 | ||
45 | jeq .order_8 | ||
46 | moveq.l #10, %d1 | ||
47 | cmp.l %d1, %d0 | ||
48 | jeq .order_10 | ||
49 | jra .exit | ||
50 | |||
51 | | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7] | ||
52 | | a3 = x, a4 = den, a5 = y, a6 = temp | ||
53 | .order_8: | ||
54 | movem.l (%a6), %d1-%d7/%a0 | Fetch mem[] array | ||
55 | 0: | ||
56 | moveq.l #13, %d0 | ||
57 | add.l #4096, %d1 | ||
58 | asr.l %d0, %d1 | mem[0] >> 13 with rounding | ||
59 | move.w (%a3)+, %d0 | ||
60 | ext.l %d0 | ||
61 | add.l %d1, %d0 | Add with x[i] | ||
62 | move.l #32768, %d1 | ||
63 | add.l %d1, %d0 | Bias result to [0..65535] | ||
64 | cmp.l #65535, %d0 | Clip to [0..65535] range | ||
65 | jle 1f | ||
66 | spl.b %d0 | ||
67 | ext.w %d0 | ||
68 | 1: | ||
69 | sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] | ||
70 | neg.l %d0 | msac.w is bugged in gas, do this for now | ||
71 | move.w %d0, (%a5)+ | Write result to y[i] | ||
72 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | ||
73 | mac.w %a6u, %d0l, %acc0 | ||
74 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
75 | mac.w %a6u, %d0l, %acc2 | ||
76 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
77 | movclr.l %acc0, %d1 | ||
78 | add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] | ||
79 | movclr.l %acc1, %d2 | ||
80 | add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] | ||
81 | movclr.l %acc2, %d3 | ||
82 | add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] | ||
83 | movclr.l %acc3, %d4 | ||
84 | add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] | ||
85 | mac.w %a6u, %d0l, %acc0 | ||
86 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
87 | mac.w %a6u, %d0l, %acc2 | ||
88 | mac.w %a6l, %d0l, %acc3 | ||
89 | lea.l (-16, %a4), %a4 | wrap den pointer back to den[0] | ||
90 | movclr.l %acc0, %d5 | ||
91 | add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] | ||
92 | movclr.l %acc1, %d6 | ||
93 | add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] | ||
94 | movclr.l %acc2, %d7 | ||
95 | add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] | ||
96 | movclr.l %acc3, %a0 | mem[7] = -den[7]*y[i] | ||
97 | subq.l #1, (44+16, %sp) | Have we done all samples? | ||
98 | jne 0b | ||
99 | move.l (44+24, %sp), %a6 | Fetch mem pointer | ||
100 | movem.l %d1-%d7/%a0, (%a6) | Save back mem[] | ||
101 | jra .exit | ||
102 | |||
103 | | d0 = y[i], d1-d7, a0-a2 = mem[0] .. mem[9] | ||
104 | | a3 = x, a4 = den, a5 = y, a6 = temp | ||
105 | .order_10: | ||
106 | movem.l (%a6), %d1-%d7/%a0-%a2 | Fetch mem[] array | ||
107 | 0: | ||
108 | moveq.l #13, %d0 | ||
109 | add.l #4096, %d1 | ||
110 | asr.l %d0, %d1 | mem[0] >> 13 with rounding | ||
111 | move.w (%a3)+, %d0 | ||
112 | ext.l %d0 | ||
113 | add.l %d1, %d0 | Add with x[i] | ||
114 | move.l #32768, %d1 | ||
115 | add.l %d1, %d0 | Bias result to [0..65535] | ||
116 | cmp.l #65535, %d0 | Clip to [0..65535] range | ||
117 | jle 1f | ||
118 | spl.b %d0 | ||
119 | ext.w %d0 | ||
120 | 1: | ||
121 | sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] | ||
122 | neg.l %d0 | msac.w is bugged in gas, do this for now | ||
123 | move.w %d0, (%a5)+ | Write result to y[i] | ||
124 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | ||
125 | mac.w %a6u, %d0l, %acc0 | ||
126 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
127 | mac.w %a6u, %d0l, %acc2 | ||
128 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
129 | movclr.l %acc0, %d1 | ||
130 | add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] | ||
131 | movclr.l %acc1, %d2 | ||
132 | add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] | ||
133 | movclr.l %acc2, %d3 | ||
134 | add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] | ||
135 | movclr.l %acc3, %d4 | ||
136 | add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] | ||
137 | mac.w %a6u, %d0l, %acc0 | ||
138 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
139 | mac.w %a6u, %d0l, %acc2 | ||
140 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
141 | lea.l (-20, %a4), %a4 | wrap den pointer back to den[0] | ||
142 | movclr.l %acc0, %d5 | ||
143 | add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] | ||
144 | movclr.l %acc1, %d6 | ||
145 | add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] | ||
146 | movclr.l %acc2, %d7 | ||
147 | add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] | ||
148 | movclr.l %acc3, %a0 | ||
149 | add.l %a1, %a0 | mem[7] = mem[8] - den[7]*y[i] | ||
150 | mac.w %a6u, %d0l, %acc0 | ||
151 | mac.w %a6l, %d0l, %acc1 | ||
152 | movclr.l %acc0, %a1 | ||
153 | add.l %a2, %a1 | mem[8] = mem[9] - den[8]*y[i] | ||
154 | movclr.l %acc1, %a2 | mem[9] = -den[9]*y[i] | ||
155 | |||
156 | subq.l #1, (44+16, %sp) | Have we done all samples? | ||
157 | jne 0b | ||
158 | move.l (44+24, %sp), %a6 | Fetch mem pointer | ||
159 | movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] | ||
160 | |||
161 | .exit: | ||
162 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
163 | lea.l (44, %sp), %sp | ||
164 | rts | ||
165 | |||
diff --git a/apps/codecs/libspeex/nb_celp.c b/apps/codecs/libspeex/nb_celp.c index 0798ffbcf4..5d167a2aa3 100644 --- a/apps/codecs/libspeex/nb_celp.c +++ b/apps/codecs/libspeex/nb_celp.c | |||
@@ -1108,6 +1108,9 @@ void *nb_decoder_init(const SpeexMode *m) | |||
1108 | st->isWideband = 0; | 1108 | st->isWideband = 0; |
1109 | st->highpass_enabled = 1; | 1109 | st->highpass_enabled = 1; |
1110 | 1110 | ||
1111 | #ifdef CPU_COLDFIRE | ||
1112 | coldfire_set_macsr(0); // Integer mode | ||
1113 | #endif | ||
1111 | #ifdef ENABLE_VALGRIND | 1114 | #ifdef ENABLE_VALGRIND |
1112 | VALGRIND_MAKE_READABLE(st, NB_DEC_STACK); | 1115 | VALGRIND_MAKE_READABLE(st, NB_DEC_STACK); |
1113 | #endif | 1116 | #endif |