summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/libspeex/filters.c3
-rw-r--r--apps/codecs/libspeex/filters_cf.S182
2 files changed, 168 insertions, 17 deletions
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c
index 02f93a27b1..e64f087a5d 100644
--- a/apps/codecs/libspeex/filters.c
+++ b/apps/codecs/libspeex/filters.c
@@ -47,6 +47,7 @@
47#include "filters_arm4.h" 47#include "filters_arm4.h"
48#elif defined (COLDFIRE_ASM) 48#elif defined (COLDFIRE_ASM)
49#define OVERRIDE_IIR_MEM16 49#define OVERRIDE_IIR_MEM16
50#define OVERRIDE_QMF_SYNTH
50#elif defined (BFIN_ASM) 51#elif defined (BFIN_ASM)
51#include "filters_bfin.h" 52#include "filters_bfin.h"
52#endif 53#endif
@@ -475,6 +476,7 @@ void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1
475 } 476 }
476} 477}
477 478
479#ifndef OVERRIDE_QMF_SYNTH
478/* Re-synthesised a signal from the QMF low-band and high-band signals */ 480/* Re-synthesised a signal from the QMF low-band and high-band signals */
479void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) 481void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack)
480 /* assumptions: 482 /* assumptions:
@@ -566,6 +568,7 @@ void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_
566 for (i = 0; i < M2; i++) 568 for (i = 0; i < M2; i++)
567 mem2[2*i+1] = xx2[i]; 569 mem2[2*i+1] = xx2[i];
568} 570}
571#endif
569 572
570#ifdef FIXED_POINT 573#ifdef FIXED_POINT
571#if 0 574#if 0
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S
index 579af11581..dd650844c8 100644
--- a/apps/codecs/libspeex/filters_cf.S
+++ b/apps/codecs/libspeex/filters_cf.S
@@ -31,7 +31,6 @@
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*/ 33*/
34
35 .text 34 .text
36/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ 35/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */
37 .global iir_mem16 36 .global iir_mem16
@@ -59,14 +58,18 @@ iir_mem16:
59 move.w (%a3)+, %d0 58 move.w (%a3)+, %d0
60 ext.l %d0 59 ext.l %d0
61 add.l %d1, %d0 | Add with x[i] 60 add.l %d1, %d0 | Add with x[i]
62 move.l #32768, %d1 61 move.l #32767, %d1
63 add.l %d1, %d0 | Bias result to [0..65535] 62 move.l #65534, %a6
64 cmp.l #65535, %d0 | Clip to [0..65535] range 63 add.l %d1, %d0 | Bias result to [-1..65534]
65 jle 1f 64 cmp.l %a6, %d0 | Now do clip to [0..65534] range
66 spl.b %d0 65 jls 2f
67 ext.w %d0 66 jpl 1f
67 clr.l %d0 | Clip low
68 .word 0x51fa | trapf.w, shadow next insn
681: 691:
69 sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] 70 move.l %a6, %d0 | Clip high
712:
72 sub.l %d1, %d0 | Bias clipped result back to [-32767..32767]
70 neg.l %d0 | msac.w is bugged in gas, do this for now 73 neg.l %d0 | msac.w is bugged in gas, do this for now
71 move.w %d0, (%a5)+ | Write result to y[i] 74 move.w %d0, (%a5)+ | Write result to y[i]
72 move.l (%a4)+, %a6 | Fetch den[0] and den[1] 75 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
@@ -111,14 +114,18 @@ iir_mem16:
111 move.w (%a3)+, %d0 114 move.w (%a3)+, %d0
112 ext.l %d0 115 ext.l %d0
113 add.l %d1, %d0 | Add with x[i] 116 add.l %d1, %d0 | Add with x[i]
114 move.l #32768, %d1 117 move.l #32767, %d1
115 add.l %d1, %d0 | Bias result to [0..65535] 118 move.l #65534, %a6
116 cmp.l #65535, %d0 | Clip to [0..65535] range 119 add.l %d1, %d0 | Bias result to [-1..65534]
117 jle 1f 120 cmp.l %a6, %d0 | Now do clip to [0..65534] range
118 spl.b %d0 121 jls 2f
119 ext.w %d0 122 jpl 1f
123 clr.l %d0 | Clip low
124 .word 0x51fa | trapf.w, shadow next insn
1201: 1251:
121 sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] 126 move.l %a6, %d0 | Clip high
1272:
128 sub.l %d1, %d0 | Bias clipped result back to [-32767..32767]
122 neg.l %d0 | msac.w is bugged in gas, do this for now 129 neg.l %d0 | msac.w is bugged in gas, do this for now
123 move.w %d0, (%a5)+ | Write result to y[i] 130 move.w %d0, (%a5)+ | Write result to y[i]
124 move.l (%a4)+, %a6 | Fetch den[0] and den[1] 131 move.l (%a4)+, %a6 | Fetch den[0] and den[1]
@@ -159,7 +166,148 @@ iir_mem16:
159 movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] 166 movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[]
160 167
161.exit: 168.exit:
162 movem.l (%sp), %d2-%d7/%a2-%a6 169 movem.l (%sp), %d2-%d7/%a2-%a6
163 lea.l (44, %sp), %sp 170 lea.l (44, %sp), %sp
171 rts
172
173/* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
174 .global qmf_synth
175qmf_synth:
176 lea.l (-44, %sp), %sp
177 movem.l %d2-%d7/%a2-%a6, (%sp)
178 movem.l (44+4, %sp), %a0-%a3 | a0 = x1, a1 = x2, a2 = a, a3 = y
179 movem.l (44+20, %sp), %d0-%d1/%a4-%a5 | d0 = N, d1 = M, a4 = mem1,a5 = mem2
180 move.l #0x80, %macsr | Enable saturation
181
182 | Comments make more sense when compared to the reference C version
183 move.l %a2, %d6 | Backup a
184 lsr.l #1, %d0 | N2 = N >> 1
185 lsr.l #1, %d1 | M2 = M >> 1
186 move.l %d1, %d7 | Backup M2
187 clr.l %d2
188 sub.l %d0, %d2
189 sub.l %d1, %d2 | d2 = -(N2 + M2)
190 lea.l (%sp, %d2.l*2), %a2 | Alloc two buffers of N2 + M2 shorts
191 lea.l (%a2, %d2.l*2), %a6 | a2 = xx1, a6 = xx2
192 move.l %sp, %d3
193 move.l %a6, %sp | Update sp
194 move.l %d3, -(%sp) | Stack old %sp
195
196 | Backwards copy x1 and x2 arrays to xx1 and xx2
197 | TODO: these copying loops probably have more potential for optimization
198 lea.l (%a0, %d0.l*2), %a0 | x1 += N2
199 lea.l (%a1, %d0.l*2), %a1 | x2 += N2
200 move.l %d0, %d2 | Loop counter is N2
2010:
202 move.w -(%a0), (%a2)+
203 move.w -(%a1), (%a6)+
204 subq.l #1, %d2
205 jne 0b
206
207 | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2
208 move.l %d1, %d2 | Loop counter is M2
209 addq.l #4, %a4 | a4 = &mem1[1]
210 addq.l #4, %a5 | a5 = &mem2[1]
211 move.l %a4, %d3 | Backup mem1 and mem2
212 move.l %a5, %d4
2130:
214 move.l (%a4), %d5
215 move.w %d5, (%a2)+
216 move.l (%a5), %d5
217 move.w %d5, (%a6)+
218 addq.l #8, %a4
219 addq.l #8, %a5
220 subq.l #1, %d2
221 jne 0b
222 move.l %d3, %a4 | a4 = &mem1[1]
223 move.l %d4, %a5 | a5 = &mem2[1]
224
225 clr.l %d2
226 sub.l %d1, %d2 | d2 = -M2
227 lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2]
228 lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2]
229 move.l %d6, %a2 | a2 = a
230
231 | Main loop, register usage:
232 | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup
233 | d2 = x10, d3 = x11, d4 = x20, d5 = x21, d6 = [a0, a1]
234 | a0 = xx1, a1 = xx2, a2 = a, a3 = y, a4 = mem1, a5 = mem2
2350: | Outerloop
236 move.l #32768, %d2 | Rounding constant
237 move.l %d2, %acc0
238 move.l %d2, %acc1
239 move.l %d2, %acc2
240 move.l %d2, %acc3
241 move.w (%a0)+, %d2 | d2 = x10
242 move.w (%a1)+, %d4 | d4 = x20
243 move.l (%a2)+, %d6 | d6 = [a0, a1]
2441: | Innerloop
245 move.w (%a0)+, %d3 | d3 = x11
246 move.w (%a1)+, %d5 | d5 = x21
247 mac.w %d6u, %d3l, #1, %acc0 | acc0 += a0*x11
248 msac.w %d6u, %d5l, #1, %acc0 | acc0 -= a0*x21
249 mac.w %d6l, %d3l, #1, %acc1 | acc1 += a1*x11
250 mac.w %d6l, %d5l, #1, %acc1 | acc1 += a1*x21
251 mac.w %d6u, %d2l, #1, %acc2 | acc2 += a0*x10
252 msac.w %d6u, %d4l, #1, %acc2 | acc2 -= a0*x20
253 mac.w %d6l, %d2l, #1, %acc3 | acc3 += a1*x10
254 mac.w %d6l, %d4l, #1, (%a2)+, %d6, %acc3 | acc3 += a1*x20
255
256 move.w (%a0)+, %d2 | d2 = x10
257 move.w (%a1)+, %d4 | d4 = x20
258 mac.w %d6u, %d2l, #1, %acc0 | acc0 += a0*x10
259 msac.w %d6u, %d4l, #1, %acc0 | acc0 -= a0*x20
260 mac.w %d6l, %d2l, #1, %acc1 | acc1 += a1*x10
261 mac.w %d6l, %d4l, #1, %acc1 | acc1 += a1*x20
262 mac.w %d6u, %d3l, #1, %acc2 | acc2 += a0*x11
263 msac.w %d6u, %d5l, #1, %acc2 | acc2 -= a0*x21
264 mac.w %d6l, %d3l, #1, %acc3 | acc3 += a1*x11
265 mac.w %d6l, %d5l, #1, (%a2)+, %d6, %acc3 | acc3 += a1*x21
266 subq.l #2, %d1
267 jne 1b
268
269 sub.l %d7, %d1 | d1 = -M2
270 lea.l (-4, %a2, %d1.l*4), %a2 | a2 = &a[0]
271 lea.l (-6, %a0, %d1.l*2), %a0 | a0 = &xx1[N2 - 2 - i]
272 lea.l (-6, %a1, %d1.l*2), %a1 | a1 = &xx2[N2 - 2 - i]
273 neg.l %d1 | d1 = M2
274 movclr.l %acc0, %d2
275 movclr.l %acc1, %d3
276 movclr.l %acc2, %d4
277 movclr.l %acc3, %d5
278 swap.w %d2 | Shift 16 right
279 swap.w %d3
280 swap.w %d4
281 swap.w %d5
282 | Thanks to the extra shift in the mac chain, we get clipping for free.
283 | The clipping will be [-32768..32767], not Speex standard [-32767..32767],
284 | but since qmf_synth() is called so late in the signal chain, it should
285 | work fine.
286 move.w %d2, (%a3)+ | Write results to y[]
287 move.w %d3, (%a3)+
288 move.w %d4, (%a3)+
289 move.w %d5, (%a3)+
290 subq.l #2, %d0
291 jne 0b
292
293 | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries
294 addq.l #4, %a0 | a0 = &xx1[0]
295 addq.l #4, %a1 | a1 = &xx2[0]
2960:
297 move.w (%a0)+, %d2
298 move.w (%a1)+, %d3
299 ext.l %d2
300 ext.l %d3
301 move.l %d2, (%a4)
302 move.l %d3, (%a5)
303 addq.l #8, %a4
304 addq.l #8, %a5
305 subq.l #1, %d1
306 jne 0b
307
308 move.l #0, %macsr
309 move.l (%sp), %sp
310 movem.l (%sp), %d2-%d7/%a2-%a6
311 lea.l (44, %sp), %sp
164 rts 312 rts
165 313