summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libspeex/filters_arm4.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/filters_arm4.S')
-rw-r--r--lib/rbcodec/codecs/libspeex/filters_arm4.S328
1 files changed, 328 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/filters_arm4.S b/lib/rbcodec/codecs/libspeex/filters_arm4.S
new file mode 100644
index 0000000000..e652ede554
--- /dev/null
+++ b/lib/rbcodec/codecs/libspeex/filters_arm4.S
@@ -0,0 +1,328 @@
1/* Copyright (C) 2007 Thom Johansen */
2/**
3 @file filters_arm4.S
4 @brief Various analysis/synthesis filters (ARMv4 version)
5*/
6/*
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 - Neither the name of the Xiph.org Foundation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*/
34
35#include "config.h"
36#if CONFIG_CPU == PP5002
37 .section .icode,"ax",%progbits
38#else
39 .text
40#endif
41
42/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */
43 .global iir_mem16
44iir_mem16:
45 stmdb sp!, { r4-r11, lr }
46 ldr r5, [sp, #36] @ r0 = x, r1 = den, r2 = y, r3 = N
47 ldr r4, [sp, #40] @ r4 = mem, r5 = ord
48 cmp r5, #10
49 beq .order_10
50 cmp r5, #8
51 beq .order_8
52 ldmpc regs=r4-r11 @ Non-supported order, return
53
54 @ TODO: try using direct form 1 filtering
55.order_8:
56 ldmia r4, { r5-r12 } @ r5-r12 = mem[0..7]
570:
58 add r5, r5, #4096 @ Rounding constant
59 ldrsh r14, [r0], #2
60 add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i]
61 mov r5, #0x7f00
62 orr r5, r5, #0xff @ r5 = 32767
63 cmp r14, r5
64 movgt r14, r5 @ Clip positive
65 cmn r14, r5
66 rsblt r14, r5, #0 @ Clip negative
67 strh r14, [r2], #2 @ Write result to y[i]
68
69 ldrsh r4, [r1]
70 mul r5, r4, r14
71 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
72 ldrsh r4, [r1, #2]
73 mul r6, r4, r14
74 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
75 ldrsh r4, [r1, #4]
76 mul r7, r4, r14
77 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
78 ldrsh r4, [r1, #6]
79 mul r8, r4, r14
80 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
81 ldrsh r4, [r1, #8]
82 mul r9, r4, r14
83 sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i]
84 ldrsh r4, [r1, #10]
85 mul r10, r4, r14
86 sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i]
87 ldrsh r4, [r1, #12]
88 mul r11, r4, r14
89 sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i]
90 ldrsh r4, [r1, #14]
91 mul r12, r4, r14
92 rsb r12, r12, #0 @ mem[7] = -den[7]*y[i]
93 subs r3, r3, #1
94 bne 0b
95 ldr r4, [sp, #40] @ r4 = mem
96 stmia r4, { r5-r12 } @ Save back mem[]
97 ldmpc regs=r4-r11 @ Exit
98
99.order_10:
100 ldmia r4, { r5-r9 } @ r5-r9 = mem[0..4]
101 add r5, r5, #4096 @ Rounding constant
102 ldrsh r14, [r0], #2
103 add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i]
104 mov r5, #0x7f00
105 orr r5, r5, #0xff @ r5 = 32767
106 cmp r14, r5
107 movgt r14, r5 @ Clip positive
108 cmn r14, r5
109 rsblt r14, r5, #0 @ Clip negative
110 strh r14, [r2], #2 @ Write result to y[i]
111
112 ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5]
113 mov r5, r10, lsl #16
114 mov r5, r5, asr #16
115 mul r5, r14, r5
116 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
117 mov r10, r10, asr #16
118 mul r6, r14, r10
119 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
120 mov r10, r11, lsl #16
121 mov r10, r10, asr #16
122 mul r7, r14, r10
123 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
124 mov r10, r11, asr #16
125 mul r8, r14, r10
126 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
127 stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4]
128 mov r10, r12, lsl #16
129 mov r10, r10, asr #16
130 mul r5, r14, r10
131
132 ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9]
133 sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i]
134 mov r12, r12, asr #16
135 mul r6, r14, r12
136 sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i]
137 ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9]
138 mov r7, r11, lsl #16
139 mov r7, r7, asr #16
140 mul r7, r14, r7
141 sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i]
142 mov r11, r11, asr #16
143 mul r8, r14, r11
144 sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i]
145 mov r11, r12, lsl #16
146 mov r11, r11, asr #16
147 mul r9, r14, r11
148 sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i]
149 mov r12, r12, asr #16
150 mul r10, r14, r12
151 rsb r10, r10, #0 @ mem[9] = -den[9]*y[i]
152 stmia r4!, { r5-r10 } @ Write back mem[4..9]
153 sub r4, r4, #10*4
154 sub r1, r1, #10*2
155 subs r3, r3, #1
156 bne .order_10
157 ldmpc regs=r4-r11 @ Exit
158
159
160/* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */
161 .global qmf_synth
162qmf_synth:
163 stmdb sp!, { r4-r11, lr }
164 add r7, sp, #36 @ r0 = x1, r1 = x2, r2 = a, r3 = y
165 ldmia r7, { r4-r7 } @ r4 = N, r5 = M, r6 = mem1, r7 = mem2
166
167 add r8, r4, r5
168 sub r9, sp, r8 @ r9 = sp - (N + M >> 1) = xx2
169 sub r8, r9, r8 @ r8 = r9 - (N + M >> 1) = xx1
170 str sp, [r8, #-4] @ Stack old sp
171 sub sp, r8, #4 @ Update sp
172
173 add r0, r0, r4 @ x1 += N >> 1
174 add r1, r1, r4 @ x2 += N >> 1
175 mov r14, r4 @ Loop counter is N
1760:
177 @ Backwards copy x1 and x2 arrays to xx1 and xx2, assume N2 is power of two
178 @ N should always be a multiple of four, so this should be OK
179 ldmdb r0!, { r10-r11 }
180 mov r12, r10, ror #16
181 mov r11, r11, ror #16
182 stmia r8!, { r11-r12 }
183 ldmdb r1!, { r10-r11 }
184 mov r12, r10, ror #16
185 mov r11, r11, ror #16
186 stmia r9!, { r11-r12 }
187 subs r14, r14, #8
188 bne 0b
189
190 @ Copy alternate members of mem1 and mem2 to last part of xx1 and xx2
191 mov r14, r5 @ Loop counter is M
192 add r6, r6, #2
193 add r7, r7, #2
194 stmdb sp!, { r6-r7 } @ Stack &mem1[1], &mem2[1]
1950:
196 ldrh r10, [r6], #4
197 ldrh r11, [r6], #4
198 ldrh r12, [r7], #4
199 orr r10, r10, r11, lsl #16
200 ldrh r11, [r7], #4
201 orr r11, r12, r11, lsl #16
202 str r10, [r8], #4
203 str r11, [r9], #4
204 subs r14, r14, #4
205 bne 0b
206
207 sub r0, r8, r5 @ r0 = &xx1[N2]
208 sub r1, r9, r5 @ r1 = &xx2[N2]
209 str r4, [sp, #-4]! @ Stack N
210 mov r4, r5
211 str r4, [sp, #-4]! @ Stack M
212 @ Main loop, register usage:
213 @ r0 = xx1, r1 = xx2, r2 = a, r3 = y, r4 = M, r5 = x10, r6 = x11, r7 = x20
214 @ r8 = x21, r9 = [a1, a0], r10 = acc0, r11 = acc1, r12 = acc2, r14 = acc3
2150: @ Outerloop
216 mov r10, #16384 @ Init acccumulators to rounding const
217 mov r11, #16384
218 mov r12, #16384
219 mov r14, #16384
220
221 ldrsh r5, [r0, #-4]! @ r5 = x10, r0 = &xx1[N2 - 2]
222 ldrsh r7, [r1, #-4]! @ r7 = x20, r1 = &xx2[N2 - 2]
2231: @ Innerloop
224 ldrsh r9, [r2], #2 @ r9 = a0
225 ldrsh r6, [r0, #2]! @ r6 = x11
226 ldrsh r8, [r1, #2]! @ r8 = x21
227 sub r5, r5, r7 @ r5 = x10 - x20
228 add r7, r5, r7, asl #1 @ r7 = x10 + x20
229 mla r12, r9, r5, r12 @ acc2 += a0*(x10 - x20)
230 sub r5, r6, r8 @ r5 = x11 - x21
231 mla r10, r9, r5, r10 @ acc0 += a0*(x11 - x21)
232 ldrsh r9, [r2], #2 @ r9 = a1
233 add r5, r6, r8 @ r5 = x11 + x21
234 mla r14, r9, r7, r14 @ acc3 += a1*(x10 + x20)
235 mla r11, r9, r5, r11 @ acc1 += a1*(x11 + x21)
236
237 ldrsh r9, [r2], #2 @ r9 = a1
238 ldrsh r5, [r0, #2]! @ r5 = x10
239 ldrsh r7, [r1, #2]! @ r7 = x20
240 sub r6, r6, r8 @ r6 = x11 - x21
241 add r8, r6, r8, asl #1 @ r8 = x11 + x21
242 mla r12, r9, r6, r12 @ acc2 += a0*(x11 - x21)
243 sub r6, r5, r7 @ r6 = x10 - x20
244 mla r10, r9, r6, r10 @ acc0 += a0*(x10 - x20)
245 ldrsh r9, [r2], #2 @ r9 = a1
246 add r6, r5, r7 @ r5 = x10 + x20
247 mla r14, r9, r8, r14 @ acc3 += a1*(x11 + x21)
248 mla r11, r9, r6, r11 @ acc1 += a1*(x10 + x10)
249 subs r4, r4, #4
250 bne 1b
251
252 ldr r4, [sp] @ r4 = M
253 sub r2, r2, r4, lsl #1 @ r2 = &a[0]
254 sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i]
255 sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i]
256
257 mov r10, r10, asr #15 @ Shift outputs down
258 mov r11, r11, asr #15
259 mov r12, r12, asr #15
260 mov r14, r14, asr #15
261
262 @ Clip output to -32768..32767 range, which works fine despite not being
263 @ Speex' usual clipping range.
264 mvn r9, #0x8000
265 mov r5, r10, asr #15
266 teq r5, r5, asr #31
267 eorne r10, r9, r5, asr #31
268 mov r5, r11, asr #15
269 teq r5, r5, asr #31
270 eorne r11, r9, r5, asr #31
271 mov r5, r12, asr #15
272 teq r5, r5, asr #31
273 eorne r12, r9, r5, asr #31
274 mov r5, r14, asr #15
275 teq r5, r5, asr #31
276 eorne r14, r9, r5, asr #31
277
278 strh r10, [r3], #2 @ Write outputs
279 strh r11, [r3], #2
280 strh r12, [r3], #2
281 strh r14, [r3], #2
282 ldr r10, [sp, #4] @ Load N
283 subs r10, r10, #4 @ Are we done?
284 strne r10, [sp, #4] @ no ->
285 bne 0b @ do outer loop
286
287 @ Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries
288 @ r0 and r1 are &xx1[0] and &xx2[0] at this point
289 add sp, sp, #8
290 ldmia sp, { r5-r6, sp } @ Fetch &mem1[1], &mem2[1], restore sp
2910:
292 ldr r7, [r0], #4
293 ldr r8, [r1], #4
294 strh r7, [r5], #4
295 strh r8, [r6], #4
296 mov r7, r7, lsr #16
297 mov r8, r8, lsr #16
298 strh r7, [r5], #4
299 strh r8, [r6], #4
300 subs r4, r4, #4
301 bne 0b
302 ldmpc regs=r4-r11 @ Exit
303
304
305/* void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len) */
306 .global signal_mul
307signal_mul:
308 stmdb sp!, { r4-r8, lr }
3090:
310 ldmia r0!, { r5-r8 } @ Load four input samples
311 smull r5, r12, r2, r5
312 mov r12, r12, lsl #18 @ Recombine upper and lower parts
313 orr r5, r12, r5, lsr #14
314 smull r6, r12, r2, r6
315 mov r12, r12, lsl #18
316 orr r6, r12, r6, lsr #14
317 smull r7, r12, r2, r7
318 mov r12, r12, lsl #18
319 orr r7, r12, r7, lsr #14
320 smull r8, r12, r2, r8
321 mov r12, r12, lsl #18
322 orr r8, r12, r8, lsr #14
323 stmia r1!, { r5-r8 } @ Store four output samples
324 subs r3, r3, #4 @ Are we done?
325 bne 0b
326
327 ldmpc regs=r4-r8 @ Exit
328