diff options
Diffstat (limited to 'apps/codecs/libspeex')
-rw-r--r-- | apps/codecs/libspeex/filters.c | 3 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters_cf.S | 182 |
2 files changed, 168 insertions, 17 deletions
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c index 02f93a27b1..e64f087a5d 100644 --- a/apps/codecs/libspeex/filters.c +++ b/apps/codecs/libspeex/filters.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "filters_arm4.h" | 47 | #include "filters_arm4.h" |
48 | #elif defined (COLDFIRE_ASM) | 48 | #elif defined (COLDFIRE_ASM) |
49 | #define OVERRIDE_IIR_MEM16 | 49 | #define OVERRIDE_IIR_MEM16 |
50 | #define OVERRIDE_QMF_SYNTH | ||
50 | #elif defined (BFIN_ASM) | 51 | #elif defined (BFIN_ASM) |
51 | #include "filters_bfin.h" | 52 | #include "filters_bfin.h" |
52 | #endif | 53 | #endif |
@@ -475,6 +476,7 @@ void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1 | |||
475 | } | 476 | } |
476 | } | 477 | } |
477 | 478 | ||
479 | #ifndef OVERRIDE_QMF_SYNTH | ||
478 | /* Re-synthesised a signal from the QMF low-band and high-band signals */ | 480 | /* Re-synthesised a signal from the QMF low-band and high-band signals */ |
479 | void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) | 481 | void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) |
480 | /* assumptions: | 482 | /* assumptions: |
@@ -566,6 +568,7 @@ void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_ | |||
566 | for (i = 0; i < M2; i++) | 568 | for (i = 0; i < M2; i++) |
567 | mem2[2*i+1] = xx2[i]; | 569 | mem2[2*i+1] = xx2[i]; |
568 | } | 570 | } |
571 | #endif | ||
569 | 572 | ||
570 | #ifdef FIXED_POINT | 573 | #ifdef FIXED_POINT |
571 | #if 0 | 574 | #if 0 |
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S index 579af11581..dd650844c8 100644 --- a/apps/codecs/libspeex/filters_cf.S +++ b/apps/codecs/libspeex/filters_cf.S | |||
@@ -31,7 +31,6 @@ | |||
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | */ | 33 | */ |
34 | |||
35 | .text | 34 | .text |
36 | /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ | 35 | /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ |
37 | .global iir_mem16 | 36 | .global iir_mem16 |
@@ -59,14 +58,18 @@ iir_mem16: | |||
59 | move.w (%a3)+, %d0 | 58 | move.w (%a3)+, %d0 |
60 | ext.l %d0 | 59 | ext.l %d0 |
61 | add.l %d1, %d0 | Add with x[i] | 60 | add.l %d1, %d0 | Add with x[i] |
62 | move.l #32768, %d1 | 61 | move.l #32767, %d1 |
63 | add.l %d1, %d0 | Bias result to [0..65535] | 62 | move.l #65534, %a6 |
64 | cmp.l #65535, %d0 | Clip to [0..65535] range | 63 | add.l %d1, %d0 | Bias result to [-1..65534] |
65 | jle 1f | 64 | cmp.l %a6, %d0 | Now do clip to [0..65534] range |
66 | spl.b %d0 | 65 | jls 2f |
67 | ext.w %d0 | 66 | jpl 1f |
67 | clr.l %d0 | Clip low | ||
68 | .word 0x51fa | trapf.w, shadow next insn | ||
68 | 1: | 69 | 1: |
69 | sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] | 70 | move.l %a6, %d0 | Clip high |
71 | 2: | ||
72 | sub.l %d1, %d0 | Bias clipped result back to [-32767..32767] | ||
70 | neg.l %d0 | msac.w is bugged in gas, do this for now | 73 | neg.l %d0 | msac.w is bugged in gas, do this for now |
71 | move.w %d0, (%a5)+ | Write result to y[i] | 74 | move.w %d0, (%a5)+ | Write result to y[i] |
72 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | 75 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] |
@@ -111,14 +114,18 @@ iir_mem16: | |||
111 | move.w (%a3)+, %d0 | 114 | move.w (%a3)+, %d0 |
112 | ext.l %d0 | 115 | ext.l %d0 |
113 | add.l %d1, %d0 | Add with x[i] | 116 | add.l %d1, %d0 | Add with x[i] |
114 | move.l #32768, %d1 | 117 | move.l #32767, %d1 |
115 | add.l %d1, %d0 | Bias result to [0..65535] | 118 | move.l #65534, %a6 |
116 | cmp.l #65535, %d0 | Clip to [0..65535] range | 119 | add.l %d1, %d0 | Bias result to [-1..65534] |
117 | jle 1f | 120 | cmp.l %a6, %d0 | Now do clip to [0..65534] range |
118 | spl.b %d0 | 121 | jls 2f |
119 | ext.w %d0 | 122 | jpl 1f |
123 | clr.l %d0 | Clip low | ||
124 | .word 0x51fa | trapf.w, shadow next insn | ||
120 | 1: | 125 | 1: |
121 | sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] | 126 | move.l %a6, %d0 | Clip high |
127 | 2: | ||
128 | sub.l %d1, %d0 | Bias clipped result back to [-32767..32767] | ||
122 | neg.l %d0 | msac.w is bugged in gas, do this for now | 129 | neg.l %d0 | msac.w is bugged in gas, do this for now |
123 | move.w %d0, (%a5)+ | Write result to y[i] | 130 | move.w %d0, (%a5)+ | Write result to y[i] |
124 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] | 131 | move.l (%a4)+, %a6 | Fetch den[0] and den[1] |
@@ -159,7 +166,148 @@ iir_mem16: | |||
159 | movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] | 166 | movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] |
160 | 167 | ||
161 | .exit: | 168 | .exit: |
162 | movem.l (%sp), %d2-%d7/%a2-%a6 | 169 | movem.l (%sp), %d2-%d7/%a2-%a6 |
163 | lea.l (44, %sp), %sp | 170 | lea.l (44, %sp), %sp |
171 | rts | ||
172 | |||
173 | /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */ | ||
174 | .global qmf_synth | ||
175 | qmf_synth: | ||
176 | lea.l (-44, %sp), %sp | ||
177 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
178 | movem.l (44+4, %sp), %a0-%a3 | a0 = x1, a1 = x2, a2 = a, a3 = y | ||
179 | movem.l (44+20, %sp), %d0-%d1/%a4-%a5 | d0 = N, d1 = M, a4 = mem1,a5 = mem2 | ||
180 | move.l #0x80, %macsr | Enable saturation | ||
181 | |||
182 | | Comments make more sense when compared to the reference C version | ||
183 | move.l %a2, %d6 | Backup a | ||
184 | lsr.l #1, %d0 | N2 = N >> 1 | ||
185 | lsr.l #1, %d1 | M2 = M >> 1 | ||
186 | move.l %d1, %d7 | Backup M2 | ||
187 | clr.l %d2 | ||
188 | sub.l %d0, %d2 | ||
189 | sub.l %d1, %d2 | d2 = -(N2 + M2) | ||
190 | lea.l (%sp, %d2.l*2), %a2 | Alloc two buffers of N2 + M2 shorts | ||
191 | lea.l (%a2, %d2.l*2), %a6 | a2 = xx1, a6 = xx2 | ||
192 | move.l %sp, %d3 | ||
193 | move.l %a6, %sp | Update sp | ||
194 | move.l %d3, -(%sp) | Stack old %sp | ||
195 | |||
196 | | Backwards copy x1 and x2 arrays to xx1 and xx2 | ||
197 | | TODO: these copying loops probably have more potential for optimization | ||
198 | lea.l (%a0, %d0.l*2), %a0 | x1 += N2 | ||
199 | lea.l (%a1, %d0.l*2), %a1 | x2 += N2 | ||
200 | move.l %d0, %d2 | Loop counter is N2 | ||
201 | 0: | ||
202 | move.w -(%a0), (%a2)+ | ||
203 | move.w -(%a1), (%a6)+ | ||
204 | subq.l #1, %d2 | ||
205 | jne 0b | ||
206 | |||
207 | | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2 | ||
208 | move.l %d1, %d2 | Loop counter is M2 | ||
209 | addq.l #4, %a4 | a4 = &mem1[1] | ||
210 | addq.l #4, %a5 | a5 = &mem2[1] | ||
211 | move.l %a4, %d3 | Backup mem1 and mem2 | ||
212 | move.l %a5, %d4 | ||
213 | 0: | ||
214 | move.l (%a4), %d5 | ||
215 | move.w %d5, (%a2)+ | ||
216 | move.l (%a5), %d5 | ||
217 | move.w %d5, (%a6)+ | ||
218 | addq.l #8, %a4 | ||
219 | addq.l #8, %a5 | ||
220 | subq.l #1, %d2 | ||
221 | jne 0b | ||
222 | move.l %d3, %a4 | a4 = &mem1[1] | ||
223 | move.l %d4, %a5 | a5 = &mem2[1] | ||
224 | |||
225 | clr.l %d2 | ||
226 | sub.l %d1, %d2 | d2 = -M2 | ||
227 | lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2] | ||
228 | lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2] | ||
229 | move.l %d6, %a2 | a2 = a | ||
230 | |||
231 | | Main loop, register usage: | ||
232 | | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup | ||
233 | | d2 = x10, d3 = x11, d4 = x20, d5 = x21, d6 = [a0, a1] | ||
234 | | a0 = xx1, a1 = xx2, a2 = a, a3 = y, a4 = mem1, a5 = mem2 | ||
235 | 0: | Outerloop | ||
236 | move.l #32768, %d2 | Rounding constant | ||
237 | move.l %d2, %acc0 | ||
238 | move.l %d2, %acc1 | ||
239 | move.l %d2, %acc2 | ||
240 | move.l %d2, %acc3 | ||
241 | move.w (%a0)+, %d2 | d2 = x10 | ||
242 | move.w (%a1)+, %d4 | d4 = x20 | ||
243 | move.l (%a2)+, %d6 | d6 = [a0, a1] | ||
244 | 1: | Innerloop | ||
245 | move.w (%a0)+, %d3 | d3 = x11 | ||
246 | move.w (%a1)+, %d5 | d5 = x21 | ||
247 | mac.w %d6u, %d3l, #1, %acc0 | acc0 += a0*x11 | ||
248 | msac.w %d6u, %d5l, #1, %acc0 | acc0 -= a0*x21 | ||
249 | mac.w %d6l, %d3l, #1, %acc1 | acc1 += a1*x11 | ||
250 | mac.w %d6l, %d5l, #1, %acc1 | acc1 += a1*x21 | ||
251 | mac.w %d6u, %d2l, #1, %acc2 | acc2 += a0*x10 | ||
252 | msac.w %d6u, %d4l, #1, %acc2 | acc2 -= a0*x20 | ||
253 | mac.w %d6l, %d2l, #1, %acc3 | acc3 += a1*x10 | ||
254 | mac.w %d6l, %d4l, #1, (%a2)+, %d6, %acc3 | acc3 += a1*x20 | ||
255 | |||
256 | move.w (%a0)+, %d2 | d2 = x10 | ||
257 | move.w (%a1)+, %d4 | d4 = x20 | ||
258 | mac.w %d6u, %d2l, #1, %acc0 | acc0 += a0*x10 | ||
259 | msac.w %d6u, %d4l, #1, %acc0 | acc0 -= a0*x20 | ||
260 | mac.w %d6l, %d2l, #1, %acc1 | acc1 += a1*x10 | ||
261 | mac.w %d6l, %d4l, #1, %acc1 | acc1 += a1*x20 | ||
262 | mac.w %d6u, %d3l, #1, %acc2 | acc2 += a0*x11 | ||
263 | msac.w %d6u, %d5l, #1, %acc2 | acc2 -= a0*x21 | ||
264 | mac.w %d6l, %d3l, #1, %acc3 | acc3 += a1*x11 | ||
265 | mac.w %d6l, %d5l, #1, (%a2)+, %d6, %acc3 | acc3 += a1*x21 | ||
266 | subq.l #2, %d1 | ||
267 | jne 1b | ||
268 | |||
269 | sub.l %d7, %d1 | d1 = -M2 | ||
270 | lea.l (-4, %a2, %d1.l*4), %a2 | a2 = &a[0] | ||
271 | lea.l (-6, %a0, %d1.l*2), %a0 | a0 = &xx1[N2 - 2 - i] | ||
272 | lea.l (-6, %a1, %d1.l*2), %a1 | a1 = &xx2[N2 - 2 - i] | ||
273 | neg.l %d1 | d1 = M2 | ||
274 | movclr.l %acc0, %d2 | ||
275 | movclr.l %acc1, %d3 | ||
276 | movclr.l %acc2, %d4 | ||
277 | movclr.l %acc3, %d5 | ||
278 | swap.w %d2 | Shift 16 right | ||
279 | swap.w %d3 | ||
280 | swap.w %d4 | ||
281 | swap.w %d5 | ||
282 | | Thanks to the extra shift in the mac chain, we get clipping for free. | ||
283 | | The clipping will be [-32768..32767], not Speex standard [-32767..32767], | ||
284 | | but since qmf_synth() is called so late in the signal chain, it should | ||
285 | | work fine. | ||
286 | move.w %d2, (%a3)+ | Write results to y[] | ||
287 | move.w %d3, (%a3)+ | ||
288 | move.w %d4, (%a3)+ | ||
289 | move.w %d5, (%a3)+ | ||
290 | subq.l #2, %d0 | ||
291 | jne 0b | ||
292 | |||
293 | | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries | ||
294 | addq.l #4, %a0 | a0 = &xx1[0] | ||
295 | addq.l #4, %a1 | a1 = &xx2[0] | ||
296 | 0: | ||
297 | move.w (%a0)+, %d2 | ||
298 | move.w (%a1)+, %d3 | ||
299 | ext.l %d2 | ||
300 | ext.l %d3 | ||
301 | move.l %d2, (%a4) | ||
302 | move.l %d3, (%a5) | ||
303 | addq.l #8, %a4 | ||
304 | addq.l #8, %a5 | ||
305 | subq.l #1, %d1 | ||
306 | jne 0b | ||
307 | |||
308 | move.l #0, %macsr | ||
309 | move.l (%sp), %sp | ||
310 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
311 | lea.l (44, %sp), %sp | ||
164 | rts | 312 | rts |
165 | 313 | ||