summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libspeex/filters_cf.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/filters_cf.S')
-rw-r--r--lib/rbcodec/codecs/libspeex/filters_cf.S356
1 files changed, 356 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/filters_cf.S b/lib/rbcodec/codecs/libspeex/filters_cf.S
new file mode 100644
index 0000000000..a48af85095
--- /dev/null
+++ b/lib/rbcodec/codecs/libspeex/filters_cf.S
@@ -0,0 +1,356 @@
1/* Copyright (C) 2007 Thom Johansen */
2/**
3 @file filters_cf.S
4 @brief Various analysis/synthesis filters (Coldfire version)
5*/
6/*
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 - Neither the name of the Xiph.org Foundation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*/
34
35 .text
36/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */
37 .global iir_mem16
38iir_mem16:
39 lea.l (-44, %sp), %sp
40 movem.l %d2-%d7/%a2-%a6, (%sp)
41 movem.l (44+4, %sp), %a3-%a5 | a3 = x, a4 = den, a5 = y
42 movem.l (44+20, %sp), %d0/%a6 | d0 = ord, a6 = mem
43 moveq.l #8, %d1 | Jump to correct routine based on 'ord'
44 cmp.l %d1, %d0
45 jeq .order_8
46 moveq.l #10, %d1
47 cmp.l %d1, %d0
48 jeq .order_10
49 jra .exit
50
51 | TODO: try using direct form 1 filtering
52 | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7]
53 | a3 = x, a4 = den, a5 = y, a6 = temp
54.order_8:
55 movem.l (%a6), %d1-%d7/%a0 | Fetch mem[] array
560:
57 moveq.l #13, %d0
58 add.l #4096, %d1
59 asr.l %d0, %d1 | mem[0] >> 13 with rounding
60 move.w (%a3)+, %d0
61 ext.l %d0
62 add.l %d1, %d0 | Add with x[i]
63 move.l #32767, %d1
64 move.l #65534, %a6
65 add.l %d1, %d0 | Bias result to [-1..65534]
66 cmp.l %a6, %d0 | Now do clip to [0..65534] range
67 jls 2f
68 jpl 1f
69 clr.l %d0 | Clip low
70 .word 0x51fa | trapf.w, shadow next insn
711:
72 move.l %a6, %d0 | Clip high
732:
74 sub.l %d1, %d0 | Bias clipped result back to [-32767..32767]
75 move.w %d0, (%a5)+ | Write result to y[i]
76 neg.l %d0 | msac.w is bugged in gas, do this for now
77 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
78 mac.w %a6u, %d0l, %acc0
79 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
80 mac.w %a6u, %d0l, %acc2
81 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
82 movclr.l %acc0, %d1
83 add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i]
84 movclr.l %acc1, %d2
85 add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i]
86 movclr.l %acc2, %d3
87 add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i]
88 movclr.l %acc3, %d4
89 add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i]
90 mac.w %a6u, %d0l, %acc0
91 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
92 mac.w %a6u, %d0l, %acc2
93 mac.w %a6l, %d0l, %acc3
94 lea.l (-16, %a4), %a4 | wrap den pointer back to den[0]
95 movclr.l %acc0, %d5
96 add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i]
97 movclr.l %acc1, %d6
98 add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i]
99 movclr.l %acc2, %d7
100 add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i]
101 movclr.l %acc3, %a0 | mem[7] = -den[7]*y[i]
102 subq.l #1, (44+16, %sp) | Have we done all samples?
103 jne 0b
104 move.l (44+24, %sp), %a6 | Fetch mem pointer
105 movem.l %d1-%d7/%a0, (%a6) | Save back mem[]
106 jra .exit
107
108 | d0 = y[i], d1-d7, a0-a2 = mem[0] .. mem[9]
109 | a3 = x, a4 = den, a5 = y, a6 = temp
110.order_10:
111 movem.l (%a6), %d1-%d7/%a0-%a2 | Fetch mem[] array
1120:
113 moveq.l #13, %d0
114 add.l #4096, %d1
115 asr.l %d0, %d1 | mem[0] >> 13 with rounding
116 move.w (%a3)+, %d0
117 ext.l %d0
118 add.l %d1, %d0 | Add with x[i]
119 move.l #32767, %d1
120 move.l #65534, %a6
121 add.l %d1, %d0 | Bias result to [-1..65534]
122 cmp.l %a6, %d0 | Now do clip to [0..65534] range
123 jls 2f
124 jpl 1f
125 clr.l %d0 | Clip low
126 .word 0x51fa | trapf.w, shadow next insn
1271:
128 move.l %a6, %d0 | Clip high
1292:
130 sub.l %d1, %d0 | Bias clipped result back to [-32767..32767]
131 move.w %d0, (%a5)+ | Write result to y[i]
132 neg.l %d0 | msac.w is bugged in gas, do this for now
133 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
134 mac.w %a6u, %d0l, %acc0
135 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
136 mac.w %a6u, %d0l, %acc2
137 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
138 movclr.l %acc0, %d1
139 add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i]
140 movclr.l %acc1, %d2
141 add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i]
142 movclr.l %acc2, %d3
143 add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i]
144 movclr.l %acc3, %d4
145 add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i]
146 mac.w %a6u, %d0l, %acc0
147 mac.w %a6l, %d0l, (%a4)+, %a6, %acc1
148 mac.w %a6u, %d0l, %acc2
149 mac.w %a6l, %d0l, (%a4)+, %a6, %acc3
150 lea.l (-20, %a4), %a4 | wrap den pointer back to den[0]
151 movclr.l %acc0, %d5
152 add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i]
153 movclr.l %acc1, %d6
154 add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i]
155 movclr.l %acc2, %d7
156 add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i]
157 movclr.l %acc3, %a0
158 add.l %a1, %a0 | mem[7] = mem[8] - den[7]*y[i]
159 mac.w %a6u, %d0l, %acc0
160 mac.w %a6l, %d0l, %acc1
161 movclr.l %acc0, %a1
162 add.l %a2, %a1 | mem[8] = mem[9] - den[8]*y[i]
163 movclr.l %acc1, %a2 | mem[9] = -den[9]*y[i]
164
165 subq.l #1, (44+16, %sp) | Have we done all samples?
166 jne 0b
167 move.l (44+24, %sp), %a6 | Fetch mem pointer
168 movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[]
169
170.exit:
171 movem.l (%sp), %d2-%d7/%a2-%a6
172 lea.l (44, %sp), %sp
173 rts
174
175
176/* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
177 .global qmf_synth
178qmf_synth:
179 lea.l (-44, %sp), %sp
180 movem.l %d2-%d7/%a2-%a6, (%sp)
181 movem.l (44+4, %sp), %a0-%a3 | a0 = x1, a1 = x2, a2 = a, a3 = y
182 movem.l (44+20, %sp), %d0-%d1/%a4-%a5 | d0 = N, d1 = M, a4 = mem1,a5 = mem2
183 move.l #0x80, %macsr | Enable saturation
184
185 | Comments make more sense when compared to the reference C version
186 move.l %a2, %d6 | Backup a
187 lsr.l #1, %d0 | N2 = N >> 1
188 lsr.l #1, %d1 | M2 = M >> 1
189 move.l %d1, %d7 | Backup M2
190 clr.l %d2
191 sub.l %d0, %d2
192 sub.l %d1, %d2 | d2 = -(N2 + M2)
193 lea.l (%sp, %d2.l*2), %a2 | Alloc two buffers of N2 + M2 shorts
194 lea.l (%a2, %d2.l*2), %a6 | a2 = xx1, a6 = xx2
195 move.l %sp, %d3
196 move.l %a6, %sp | Update sp
197 move.l %d3, -(%sp) | Stack old %sp
198
199 | Backwards copy x1 and x2 arrays to xx1 and xx2, assume N2 is power of two
200 | TODO: these copying loops probably have more potential for optimization
201 lea.l (%a0, %d0.l*2), %a0 | x1 += N2
202 lea.l (%a1, %d0.l*2), %a1 | x2 += N2
203 move.l %d0, %d2 | Loop counter is N2
2040:
205 move.l -(%a0), %d3
206 swap.w %d3
207 move.l %d3, (%a2)+
208 move.l -(%a1), %d3
209 swap.w %d3
210 move.l %d3, (%a6)+
211 subq.l #2, %d2
212 jne 0b
213
214 | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2
215 move.l %d1, %d2 | Loop counter is M2
216 addq.l #2, %a4 | a4 = &mem1[1]
217 addq.l #2, %a5 | a5 = &mem2[1]
218 move.l %a4, %d3 | Backup mem1 and mem2
219 move.l %a5, %d4
2200:
221 move.w (%a4), (%a2)+
222 move.w (%a5), (%a6)+
223 addq.l #4, %a4
224 addq.l #4, %a5
225 subq.l #1, %d2
226 jne 0b
227 move.l %d3, %a4 | a4 = &mem1[1]
228 move.l %d4, %a5 | a5 = &mem2[1]
229
230 clr.l %d2
231 sub.l %d1, %d2 | d2 = -M2
232 lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2]
233 lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2]
234 move.l %d6, %a2 | a2 = a
235
236 | Main loop, register usage:
237 | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup
238 | d2 = x10, d3 = x11, d4 = x20, d5 = x21, d6 = [a0, a1]
239 | a0 = xx1, a1 = xx2, a2 = a, a3 = y, a4 = mem1, a5 = mem2
2400: | Outerloop
241 move.l #32768, %d2 | Rounding constant
242 move.l %d2, %acc0
243 move.l %d2, %acc1
244 move.l %d2, %acc2
245 move.l %d2, %acc3
246 move.w (%a0)+, %d2 | d2 = x10
247 move.w (%a1)+, %d4 | d4 = x20
248 move.l (%a2)+, %d6 | d6 = [a0, a1]
2491: | Innerloop
250 move.w (%a0)+, %d3 | d3 = x11
251 move.w (%a1)+, %d5 | d5 = x21
252 mac.w %d6u, %d3l, <<, %acc0 | acc0 += a0*x11
253 msac.w %d6u, %d5l, <<, %acc0 | acc0 -= a0*x21
254 mac.w %d6l, %d3l, <<, %acc1 | acc1 += a1*x11
255 mac.w %d6l, %d5l, <<, %acc1 | acc1 += a1*x21
256 mac.w %d6u, %d2l, <<, %acc2 | acc2 += a0*x10
257 msac.w %d6u, %d4l, <<, %acc2 | acc2 -= a0*x20
258 mac.w %d6l, %d2l, <<, %acc3 | acc3 += a1*x10
259 mac.w %d6l, %d4l, <<, (%a2)+, %d6, %acc3 | acc3 += a1*x20
260
261 move.w (%a0)+, %d2 | d2 = x10
262 move.w (%a1)+, %d4 | d4 = x20
263 mac.w %d6u, %d2l, <<, %acc0 | acc0 += a0*x10
264 msac.w %d6u, %d4l, <<, %acc0 | acc0 -= a0*x20
265 mac.w %d6l, %d2l, <<, %acc1 | acc1 += a1*x10
266 mac.w %d6l, %d4l, <<, %acc1 | acc1 += a1*x20
267 mac.w %d6u, %d3l, <<, %acc2 | acc2 += a0*x11
268 msac.w %d6u, %d5l, <<, %acc2 | acc2 -= a0*x21
269 mac.w %d6l, %d3l, <<, %acc3 | acc3 += a1*x11
270 mac.w %d6l, %d5l, <<, (%a2)+, %d6, %acc3 | acc3 += a1*x21
271 subq.l #2, %d1
272 jne 1b
273
274 sub.l %d7, %d1 | d1 = -M2
275 lea.l (-4, %a2, %d1.l*4), %a2 | a2 = &a[0]
276 lea.l (-6, %a0, %d1.l*2), %a0 | a0 = &xx1[N2 - 2 - i]
277 lea.l (-6, %a1, %d1.l*2), %a1 | a1 = &xx2[N2 - 2 - i]
278 neg.l %d1 | d1 = M2
279 movclr.l %acc0, %d2
280 movclr.l %acc1, %d3
281 movclr.l %acc2, %d4
282 movclr.l %acc3, %d5
283 swap.w %d2 | Shift 16 right
284 swap.w %d3
285 swap.w %d4
286 swap.w %d5
287 | Thanks to the extra shift in the mac chain, we get clipping for free.
288 | The clipping will be [-32768..32767], not Speex standard [-32767..32767],
289 | but since qmf_synth() is called so late in the signal chain, it should
290 | work fine.
291 move.w %d2, (%a3)+ | Write results to y[]
292 move.w %d3, (%a3)+
293 move.w %d4, (%a3)+
294 move.w %d5, (%a3)+
295 subq.l #2, %d0
296 jne 0b
297
298 | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries
299 addq.l #4, %a0 | a0 = &xx1[0]
300 addq.l #4, %a1 | a1 = &xx2[0]
3010:
302 move.w (%a0)+, (%a4)
303 move.w (%a1)+, (%a5)
304 addq.l #4, %a4
305 addq.l #4, %a5
306 subq.l #1, %d1
307 jne 0b
308
309 move.l #0, %macsr
310 move.l (%sp), %sp
311 movem.l (%sp), %d2-%d7/%a2-%a6
312 lea.l (44, %sp), %sp
313 rts
314
315
316/* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
317 .global signal_mul
318signal_mul:
319 lea.l (-20, %sp), %sp
320 movem.l %d2-%d6, (%sp)
321 movem.l (20+4, %sp), %a0-%a1 | a0 = x, a1 = y
322 movem.l (20+12, %sp), %d0-%d1 | d0 = scale, d1 = len
323 moveq.l #0x20, %d6
324 move.l %d6, %macsr | Set MAC unit to fractional mode
325 asl.l #3, %d0 | Pre-scale 'scale'
326 moveq.l #9, %d6
3270:
328 movem.l (%a0), %d2-%d5 | Fetch input
329 asl.l %d6, %d2 | Shift each value 9 to the left
330 asl.l %d6, %d3
331 asl.l %d6, %d4
332 asl.l %d6, %d5
333 mac.l %d2, %d0, %acc0 | Do multiplies
334 mac.l %d3, %d0, %acc1
335 mac.l %d4, %d0, %acc2
336 mac.l %d5, %d0, %acc3
337 lea.l (16, %a0), %a0
338 movclr.l %acc0, %d2
339 movclr.l %acc1, %d3
340 movclr.l %acc2, %d4
341 movclr.l %acc3, %d5
342 asl.l #5, %d2 | Adjust to proper format
343 asl.l #5, %d3
344 asl.l #5, %d4
345 asl.l #5, %d5
346 movem.l %d2-%d5, (%a1) | Save output
347 lea.l (16, %a1), %a1
348 subq.l #4, %d1
349 jne 0b
350
351 clr.l %d0
352 move.l %d0, %macsr | Set MAC unit back to integer mode
353 movem.l (%sp), %d2-%d6
354 lea.l (20, %sp), %sp
355 rts
356