diff options
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/filters_cf.S')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/filters_cf.S | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/filters_cf.S b/lib/rbcodec/codecs/libspeex/filters_cf.S new file mode 100644 index 0000000000..a48af85095 --- /dev/null +++ b/lib/rbcodec/codecs/libspeex/filters_cf.S | |||
@@ -0,0 +1,356 @@ | |||
1 | /* Copyright (C) 2007 Thom Johansen */ | ||
2 | /** | ||
3 | @file filters_cf.S | ||
4 | @brief Various analysis/synthesis filters (Coldfire version) | ||
5 | */ | ||
6 | /* | ||
7 | Redistribution and use in source and binary forms, with or without | ||
8 | modification, are permitted provided that the following conditions | ||
9 | are met: | ||
10 | |||
11 | - Redistributions of source code must retain the above copyright | ||
12 | notice, this list of conditions and the following disclaimer. | ||
13 | |||
14 | - Redistributions in binary form must reproduce the above copyright | ||
15 | notice, this list of conditions and the following disclaimer in the | ||
16 | documentation and/or other materials provided with the distribution. | ||
17 | |||
18 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
19 | contributors may be used to endorse or promote products derived from | ||
20 | this software without specific prior written permission. | ||
21 | |||
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | .text | ||
36 | /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ | ||
37 | .global iir_mem16 | ||
38 | iir_mem16: | ||
39 | lea.l (-44, %sp), %sp | ||
40 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
41 | movem.l (44+4, %sp), %a3-%a5 | a3 = x, a4 = den, a5 = y | ||
42 | movem.l (44+20, %sp), %d0/%a6 | d0 = ord, a6 = mem | ||
43 | moveq.l #8, %d1 | Jump to correct routine based on 'ord' | ||
44 | cmp.l %d1, %d0 | ||
45 | jeq .order_8 | ||
46 | moveq.l #10, %d1 | ||
47 | cmp.l %d1, %d0 | ||
48 | jeq .order_10 | ||
49 | jra .exit | ||
50 | |||
51 | | TODO: try using direct form 1 filtering | ||
52 | | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7] | ||
53 | | a3 = x, a4 = den, a5 = y, a6 = temp | ||
54 | .order_8: | ||
55 | movem.l (%a6), %d1-%d7/%a0 | Fetch mem[] array | ||
56 | 0: | ||
57 | moveq.l #13, %d0 | ||
58 | add.l #4096, %d1 | ||
59 | asr.l %d0, %d1 | mem[0] >> 13 with rounding | ||
60 | move.w (%a3)+, %d0 | ||
61 | ext.l %d0 | ||
62 | add.l %d1, %d0 | Add with x[i] | ||
63 | move.l #32767, %d1 | ||
64 | move.l #65534, %a6 | ||
65 | add.l %d1, %d0 | Bias result to [-1..65534] | ||
66 | cmp.l %a6, %d0 | Now do clip to [0..65534] range | ||
67 | jls 2f | ||
68 | jpl 1f | ||
69 | clr.l %d0 | Clip low | ||
70 | .word 0x51fa | trapf.w, shadow next insn | ||
71 | 1: | ||
72 | move.l %a6, %d0 | Clip high | ||
73 | 2: | ||
74 | sub.l %d1, %d0 | Bias clipped result back to [-32767..32767] | ||
75 | move.w %d0, (%a5)+ | Write result to y[i] | ||
76 | neg.l %d0 | msac.w is bugged in gas, do this for now | ||
77 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | ||
78 | mac.w %a6u, %d0l, %acc0 | ||
79 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
80 | mac.w %a6u, %d0l, %acc2 | ||
81 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
82 | movclr.l %acc0, %d1 | ||
83 | add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] | ||
84 | movclr.l %acc1, %d2 | ||
85 | add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] | ||
86 | movclr.l %acc2, %d3 | ||
87 | add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] | ||
88 | movclr.l %acc3, %d4 | ||
89 | add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] | ||
90 | mac.w %a6u, %d0l, %acc0 | ||
91 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
92 | mac.w %a6u, %d0l, %acc2 | ||
93 | mac.w %a6l, %d0l, %acc3 | ||
94 | lea.l (-16, %a4), %a4 | wrap den pointer back to den[0] | ||
95 | movclr.l %acc0, %d5 | ||
96 | add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] | ||
97 | movclr.l %acc1, %d6 | ||
98 | add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] | ||
99 | movclr.l %acc2, %d7 | ||
100 | add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] | ||
101 | movclr.l %acc3, %a0 | mem[7] = -den[7]*y[i] | ||
102 | subq.l #1, (44+16, %sp) | Have we done all samples? | ||
103 | jne 0b | ||
104 | move.l (44+24, %sp), %a6 | Fetch mem pointer | ||
105 | movem.l %d1-%d7/%a0, (%a6) | Save back mem[] | ||
106 | jra .exit | ||
107 | |||
108 | | d0 = y[i], d1-d7, a0-a2 = mem[0] .. mem[9] | ||
109 | | a3 = x, a4 = den, a5 = y, a6 = temp | ||
110 | .order_10: | ||
111 | movem.l (%a6), %d1-%d7/%a0-%a2 | Fetch mem[] array | ||
112 | 0: | ||
113 | moveq.l #13, %d0 | ||
114 | add.l #4096, %d1 | ||
115 | asr.l %d0, %d1 | mem[0] >> 13 with rounding | ||
116 | move.w (%a3)+, %d0 | ||
117 | ext.l %d0 | ||
118 | add.l %d1, %d0 | Add with x[i] | ||
119 | move.l #32767, %d1 | ||
120 | move.l #65534, %a6 | ||
121 | add.l %d1, %d0 | Bias result to [-1..65534] | ||
122 | cmp.l %a6, %d0 | Now do clip to [0..65534] range | ||
123 | jls 2f | ||
124 | jpl 1f | ||
125 | clr.l %d0 | Clip low | ||
126 | .word 0x51fa | trapf.w, shadow next insn | ||
127 | 1: | ||
128 | move.l %a6, %d0 | Clip high | ||
129 | 2: | ||
130 | sub.l %d1, %d0 | Bias clipped result back to [-32767..32767] | ||
131 | move.w %d0, (%a5)+ | Write result to y[i] | ||
132 | neg.l %d0 | msac.w is bugged in gas, do this for now | ||
133 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | ||
134 | mac.w %a6u, %d0l, %acc0 | ||
135 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
136 | mac.w %a6u, %d0l, %acc2 | ||
137 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
138 | movclr.l %acc0, %d1 | ||
139 | add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] | ||
140 | movclr.l %acc1, %d2 | ||
141 | add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] | ||
142 | movclr.l %acc2, %d3 | ||
143 | add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] | ||
144 | movclr.l %acc3, %d4 | ||
145 | add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] | ||
146 | mac.w %a6u, %d0l, %acc0 | ||
147 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 | ||
148 | mac.w %a6u, %d0l, %acc2 | ||
149 | mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 | ||
150 | lea.l (-20, %a4), %a4 | wrap den pointer back to den[0] | ||
151 | movclr.l %acc0, %d5 | ||
152 | add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] | ||
153 | movclr.l %acc1, %d6 | ||
154 | add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] | ||
155 | movclr.l %acc2, %d7 | ||
156 | add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] | ||
157 | movclr.l %acc3, %a0 | ||
158 | add.l %a1, %a0 | mem[7] = mem[8] - den[7]*y[i] | ||
159 | mac.w %a6u, %d0l, %acc0 | ||
160 | mac.w %a6l, %d0l, %acc1 | ||
161 | movclr.l %acc0, %a1 | ||
162 | add.l %a2, %a1 | mem[8] = mem[9] - den[8]*y[i] | ||
163 | movclr.l %acc1, %a2 | mem[9] = -den[9]*y[i] | ||
164 | |||
165 | subq.l #1, (44+16, %sp) | Have we done all samples? | ||
166 | jne 0b | ||
167 | move.l (44+24, %sp), %a6 | Fetch mem pointer | ||
168 | movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] | ||
169 | |||
170 | .exit: | ||
171 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
172 | lea.l (44, %sp), %sp | ||
173 | rts | ||
174 | |||
175 | |||
176 | /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */ | ||
177 | .global qmf_synth | ||
178 | qmf_synth: | ||
179 | lea.l (-44, %sp), %sp | ||
180 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
181 | movem.l (44+4, %sp), %a0-%a3 | a0 = x1, a1 = x2, a2 = a, a3 = y | ||
182 | movem.l (44+20, %sp), %d0-%d1/%a4-%a5 | d0 = N, d1 = M, a4 = mem1,a5 = mem2 | ||
183 | move.l #0x80, %macsr | Enable saturation | ||
184 | |||
185 | | Comments make more sense when compared to the reference C version | ||
186 | move.l %a2, %d6 | Backup a | ||
187 | lsr.l #1, %d0 | N2 = N >> 1 | ||
188 | lsr.l #1, %d1 | M2 = M >> 1 | ||
189 | move.l %d1, %d7 | Backup M2 | ||
190 | clr.l %d2 | ||
191 | sub.l %d0, %d2 | ||
192 | sub.l %d1, %d2 | d2 = -(N2 + M2) | ||
193 | lea.l (%sp, %d2.l*2), %a2 | Alloc two buffers of N2 + M2 shorts | ||
194 | lea.l (%a2, %d2.l*2), %a6 | a2 = xx1, a6 = xx2 | ||
195 | move.l %sp, %d3 | ||
196 | move.l %a6, %sp | Update sp | ||
197 | move.l %d3, -(%sp) | Stack old %sp | ||
198 | |||
199 | | Backwards copy x1 and x2 arrays to xx1 and xx2, assume N2 is power of two | ||
200 | | TODO: these copying loops probably have more potential for optimization | ||
201 | lea.l (%a0, %d0.l*2), %a0 | x1 += N2 | ||
202 | lea.l (%a1, %d0.l*2), %a1 | x2 += N2 | ||
203 | move.l %d0, %d2 | Loop counter is N2 | ||
204 | 0: | ||
205 | move.l -(%a0), %d3 | ||
206 | swap.w %d3 | ||
207 | move.l %d3, (%a2)+ | ||
208 | move.l -(%a1), %d3 | ||
209 | swap.w %d3 | ||
210 | move.l %d3, (%a6)+ | ||
211 | subq.l #2, %d2 | ||
212 | jne 0b | ||
213 | |||
214 | | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2 | ||
215 | move.l %d1, %d2 | Loop counter is M2 | ||
216 | addq.l #2, %a4 | a4 = &mem1[1] | ||
217 | addq.l #2, %a5 | a5 = &mem2[1] | ||
218 | move.l %a4, %d3 | Backup mem1 and mem2 | ||
219 | move.l %a5, %d4 | ||
220 | 0: | ||
221 | move.w (%a4), (%a2)+ | ||
222 | move.w (%a5), (%a6)+ | ||
223 | addq.l #4, %a4 | ||
224 | addq.l #4, %a5 | ||
225 | subq.l #1, %d2 | ||
226 | jne 0b | ||
227 | move.l %d3, %a4 | a4 = &mem1[1] | ||
228 | move.l %d4, %a5 | a5 = &mem2[1] | ||
229 | |||
230 | clr.l %d2 | ||
231 | sub.l %d1, %d2 | d2 = -M2 | ||
232 | lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2] | ||
233 | lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2] | ||
234 | move.l %d6, %a2 | a2 = a | ||
235 | |||
236 | | Main loop, register usage: | ||
237 | | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup | ||
238 | | d2 = x10, d3 = x11, d4 = x20, d5 = x21, d6 = [a0, a1] | ||
239 | | a0 = xx1, a1 = xx2, a2 = a, a3 = y, a4 = mem1, a5 = mem2 | ||
240 | 0: | Outerloop | ||
241 | move.l #32768, %d2 | Rounding constant | ||
242 | move.l %d2, %acc0 | ||
243 | move.l %d2, %acc1 | ||
244 | move.l %d2, %acc2 | ||
245 | move.l %d2, %acc3 | ||
246 | move.w (%a0)+, %d2 | d2 = x10 | ||
247 | move.w (%a1)+, %d4 | d4 = x20 | ||
248 | move.l (%a2)+, %d6 | d6 = [a0, a1] | ||
249 | 1: | Innerloop | ||
250 | move.w (%a0)+, %d3 | d3 = x11 | ||
251 | move.w (%a1)+, %d5 | d5 = x21 | ||
252 | mac.w %d6u, %d3l, <<, %acc0 | acc0 += a0*x11 | ||
253 | msac.w %d6u, %d5l, <<, %acc0 | acc0 -= a0*x21 | ||
254 | mac.w %d6l, %d3l, <<, %acc1 | acc1 += a1*x11 | ||
255 | mac.w %d6l, %d5l, <<, %acc1 | acc1 += a1*x21 | ||
256 | mac.w %d6u, %d2l, <<, %acc2 | acc2 += a0*x10 | ||
257 | msac.w %d6u, %d4l, <<, %acc2 | acc2 -= a0*x20 | ||
258 | mac.w %d6l, %d2l, <<, %acc3 | acc3 += a1*x10 | ||
259 | mac.w %d6l, %d4l, <<, (%a2)+, %d6, %acc3 | acc3 += a1*x20 | ||
260 | |||
261 | move.w (%a0)+, %d2 | d2 = x10 | ||
262 | move.w (%a1)+, %d4 | d4 = x20 | ||
263 | mac.w %d6u, %d2l, <<, %acc0 | acc0 += a0*x10 | ||
264 | msac.w %d6u, %d4l, <<, %acc0 | acc0 -= a0*x20 | ||
265 | mac.w %d6l, %d2l, <<, %acc1 | acc1 += a1*x10 | ||
266 | mac.w %d6l, %d4l, <<, %acc1 | acc1 += a1*x20 | ||
267 | mac.w %d6u, %d3l, <<, %acc2 | acc2 += a0*x11 | ||
268 | msac.w %d6u, %d5l, <<, %acc2 | acc2 -= a0*x21 | ||
269 | mac.w %d6l, %d3l, <<, %acc3 | acc3 += a1*x11 | ||
270 | mac.w %d6l, %d5l, <<, (%a2)+, %d6, %acc3 | acc3 += a1*x21 | ||
271 | subq.l #2, %d1 | ||
272 | jne 1b | ||
273 | |||
274 | sub.l %d7, %d1 | d1 = -M2 | ||
275 | lea.l (-4, %a2, %d1.l*4), %a2 | a2 = &a[0] | ||
276 | lea.l (-6, %a0, %d1.l*2), %a0 | a0 = &xx1[N2 - 2 - i] | ||
277 | lea.l (-6, %a1, %d1.l*2), %a1 | a1 = &xx2[N2 - 2 - i] | ||
278 | neg.l %d1 | d1 = M2 | ||
279 | movclr.l %acc0, %d2 | ||
280 | movclr.l %acc1, %d3 | ||
281 | movclr.l %acc2, %d4 | ||
282 | movclr.l %acc3, %d5 | ||
283 | swap.w %d2 | Shift 16 right | ||
284 | swap.w %d3 | ||
285 | swap.w %d4 | ||
286 | swap.w %d5 | ||
287 | | Thanks to the extra shift in the mac chain, we get clipping for free. | ||
288 | | The clipping will be [-32768..32767], not Speex standard [-32767..32767], | ||
289 | | but since qmf_synth() is called so late in the signal chain, it should | ||
290 | | work fine. | ||
291 | move.w %d2, (%a3)+ | Write results to y[] | ||
292 | move.w %d3, (%a3)+ | ||
293 | move.w %d4, (%a3)+ | ||
294 | move.w %d5, (%a3)+ | ||
295 | subq.l #2, %d0 | ||
296 | jne 0b | ||
297 | |||
298 | | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries | ||
299 | addq.l #4, %a0 | a0 = &xx1[0] | ||
300 | addq.l #4, %a1 | a1 = &xx2[0] | ||
301 | 0: | ||
302 | move.w (%a0)+, (%a4) | ||
303 | move.w (%a1)+, (%a5) | ||
304 | addq.l #4, %a4 | ||
305 | addq.l #4, %a5 | ||
306 | subq.l #1, %d1 | ||
307 | jne 0b | ||
308 | |||
309 | move.l #0, %macsr | ||
310 | move.l (%sp), %sp | ||
311 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
312 | lea.l (44, %sp), %sp | ||
313 | rts | ||
314 | |||
315 | |||
316 | /* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */ | ||
317 | .global signal_mul | ||
318 | signal_mul: | ||
319 | lea.l (-20, %sp), %sp | ||
320 | movem.l %d2-%d6, (%sp) | ||
321 | movem.l (20+4, %sp), %a0-%a1 | a0 = x, a1 = y | ||
322 | movem.l (20+12, %sp), %d0-%d1 | d0 = scale, d1 = len | ||
323 | moveq.l #0x20, %d6 | ||
324 | move.l %d6, %macsr | Set MAC unit to fractional mode | ||
325 | asl.l #3, %d0 | Pre-scale 'scale' | ||
326 | moveq.l #9, %d6 | ||
327 | 0: | ||
328 | movem.l (%a0), %d2-%d5 | Fetch input | ||
329 | asl.l %d6, %d2 | Shift each value 9 to the left | ||
330 | asl.l %d6, %d3 | ||
331 | asl.l %d6, %d4 | ||
332 | asl.l %d6, %d5 | ||
333 | mac.l %d2, %d0, %acc0 | Do multiplies | ||
334 | mac.l %d3, %d0, %acc1 | ||
335 | mac.l %d4, %d0, %acc2 | ||
336 | mac.l %d5, %d0, %acc3 | ||
337 | lea.l (16, %a0), %a0 | ||
338 | movclr.l %acc0, %d2 | ||
339 | movclr.l %acc1, %d3 | ||
340 | movclr.l %acc2, %d4 | ||
341 | movclr.l %acc3, %d5 | ||
342 | asl.l #5, %d2 | Adjust to proper format | ||
343 | asl.l #5, %d3 | ||
344 | asl.l #5, %d4 | ||
345 | asl.l #5, %d5 | ||
346 | movem.l %d2-%d5, (%a1) | Save output | ||
347 | lea.l (16, %a1), %a1 | ||
348 | subq.l #4, %d1 | ||
349 | jne 0b | ||
350 | |||
351 | clr.l %d0 | ||
352 | move.l %d0, %macsr | Set MAC unit back to integer mode | ||
353 | movem.l (%sp), %d2-%d6 | ||
354 | lea.l (20, %sp), %sp | ||
355 | rts | ||
356 | |||