diff options
author | Thom Johansen <thomj@rockbox.org> | 2007-11-01 21:11:26 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2007-11-01 21:11:26 +0000 |
commit | 6d88717f6949587908ec08affa07d06239c3bae1 (patch) | |
tree | 55f93d99a48d0e571cee7eb8f024ca54cb58784d /apps/codecs/libspeex | |
parent | 9e23e9d43e6345bd3f8caa2176c1084251160a28 (diff) | |
download | rockbox-6d88717f6949587908ec08affa07d06239c3bae1.tar.gz rockbox-6d88717f6949587908ec08affa07d06239c3bae1.zip |
ARM assembler versions of iir_mem16() and qmf_synth(), yielding a very nice speedup. Touch some comments in filters_cf.S
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15393 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libspeex')
-rw-r--r-- | apps/codecs/libspeex/SOURCES | 2 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters.c | 2 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters_arm4.S | 302 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters_cf.S | 28 |
4 files changed, 321 insertions, 13 deletions
diff --git a/apps/codecs/libspeex/SOURCES b/apps/codecs/libspeex/SOURCES index f5a6786fa1..e1f038160b 100644 --- a/apps/codecs/libspeex/SOURCES +++ b/apps/codecs/libspeex/SOURCES | |||
@@ -34,4 +34,6 @@ window.c | |||
34 | #ifdef CPU_COLDFIRE | 34 | #ifdef CPU_COLDFIRE |
35 | filters_cf.S | 35 | filters_cf.S |
36 | ltp_cf.S | 36 | ltp_cf.S |
37 | #elif defined(CPU_ARM) | ||
38 | filters_arm4.S | ||
37 | #endif | 39 | #endif |
diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c index 0e76e27e84..36b110af30 100644 --- a/apps/codecs/libspeex/filters.c +++ b/apps/codecs/libspeex/filters.c | |||
@@ -45,6 +45,8 @@ | |||
45 | #include "filters_sse.h" | 45 | #include "filters_sse.h" |
46 | #elif defined (ARM4_ASM) || defined(ARM5E_ASM) | 46 | #elif defined (ARM4_ASM) || defined(ARM5E_ASM) |
47 | #include "filters_arm4.h" | 47 | #include "filters_arm4.h" |
48 | #define OVERRIDE_IIR_MEM16 | ||
49 | #define OVERRIDE_QMF_SYNTH | ||
48 | #elif defined (COLDFIRE_ASM) | 50 | #elif defined (COLDFIRE_ASM) |
49 | #define OVERRIDE_IIR_MEM16 | 51 | #define OVERRIDE_IIR_MEM16 |
50 | #define OVERRIDE_QMF_SYNTH | 52 | #define OVERRIDE_QMF_SYNTH |
diff --git a/apps/codecs/libspeex/filters_arm4.S b/apps/codecs/libspeex/filters_arm4.S new file mode 100644 index 0000000000..7924e7030f --- /dev/null +++ b/apps/codecs/libspeex/filters_arm4.S | |||
@@ -0,0 +1,302 @@ | |||
1 | /* Copyright (C) 2007 Thom Johansen */ | ||
2 | /** | ||
3 | @file filters_arm4.S | ||
4 | @brief Various analysis/synthesis filters (ARMv4 version) | ||
5 | */ | ||
6 | /* | ||
7 | Redistribution and use in source and binary forms, with or without | ||
8 | modification, are permitted provided that the following conditions | ||
9 | are met: | ||
10 | |||
11 | - Redistributions of source code must retain the above copyright | ||
12 | notice, this list of conditions and the following disclaimer. | ||
13 | |||
14 | - Redistributions in binary form must reproduce the above copyright | ||
15 | notice, this list of conditions and the following disclaimer in the | ||
16 | documentation and/or other materials provided with the distribution. | ||
17 | |||
18 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
19 | contributors may be used to endorse or promote products derived from | ||
20 | this software without specific prior written permission. | ||
21 | |||
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | .text | ||
36 | /* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ | ||
37 | .global iir_mem16 | ||
38 | iir_mem16: | ||
39 | stmdb sp!, { r4-r11, lr } | ||
40 | ldr r5, [sp, #36] @ r0 = x, r1 = den, r2 = y, r3 = N | ||
41 | ldr r4, [sp, #40] @ r4 = mem, r5 = ord | ||
42 | cmp r5, #10 | ||
43 | beq .order_10 | ||
44 | cmp r5, #8 | ||
45 | beq .order_8 | ||
46 | ldmia sp!, { r4-r11, pc } @ Mon-supported order, return | ||
47 | |||
48 | @ TODO: try using direct form 1 filtering | ||
49 | .order_8: | ||
50 | ldmia r4, { r5-r12 } @ r5-r12 = mem[0..7] | ||
51 | 0: | ||
52 | add r5, r5, #4096 @ Rounding constant | ||
53 | ldrsh r14, [r0], #2 | ||
54 | add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i] | ||
55 | mov r5, #0x7f00 | ||
56 | orr r5, r5, #0xff @ r5 = 32767 | ||
57 | cmp r14, r5 | ||
58 | movgt r14, r5 @ Clip positive | ||
59 | cmn r14, r5 | ||
60 | rsblt r14, r5, #0 @ Clip negative | ||
61 | strh r14, [r2], #2 @ Write result to y[i] | ||
62 | |||
63 | ldrsh r4, [r1] | ||
64 | mul r5, r4, r14 | ||
65 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] | ||
66 | ldrsh r4, [r1, #2] | ||
67 | mul r6, r4, r14 | ||
68 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] | ||
69 | ldrsh r4, [r1, #4] | ||
70 | mul r7, r4, r14 | ||
71 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] | ||
72 | ldrsh r4, [r1, #6] | ||
73 | mul r8, r4, r14 | ||
74 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] | ||
75 | ldrsh r4, [r1, #8] | ||
76 | mul r9, r4, r14 | ||
77 | sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i] | ||
78 | ldrsh r4, [r1, #10] | ||
79 | mul r10, r4, r14 | ||
80 | sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i] | ||
81 | ldrsh r4, [r1, #12] | ||
82 | mul r11, r4, r14 | ||
83 | sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i] | ||
84 | ldrsh r4, [r1, #14] | ||
85 | mul r12, r4, r14 | ||
86 | rsb r12, r12, #0 @ mem[7] = -den[7]*y[i] | ||
87 | subs r3, r3, #1 | ||
88 | bne 0b | ||
89 | ldr r4, [sp, #40] @ r4 = mem | ||
90 | stmia r4, { r5-r12 } @ Save back mem[] | ||
91 | ldmia sp!, { r4-r11, pc } @ Exit | ||
92 | |||
93 | .order_10: | ||
94 | ldmia r4, { r5-r9 } @ r5-r9 = mem[0..4] | ||
95 | add r5, r5, #4096 @ Rounding constant | ||
96 | ldrsh r14, [r0], #2 | ||
97 | add r14, r14, r5, asr #13 @ (mem[0] + 4096) >> 13 + x[i] | ||
98 | mov r5, #0x7f00 | ||
99 | orr r5, r5, #0xff @ r5 = 32767 | ||
100 | cmp r14, r5 | ||
101 | movgt r14, r5 @ Clip positive | ||
102 | cmn r14, r5 | ||
103 | rsblt r14, r5, #0 @ Clip negative | ||
104 | strh r14, [r2], #2 @ Write result to y[i] | ||
105 | |||
106 | ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5] | ||
107 | mov r5, r10, lsl #16 | ||
108 | mov r5, r5, asr #16 | ||
109 | mul r5, r14, r5 | ||
110 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] | ||
111 | mov r10, r10, asr #16 | ||
112 | mul r6, r14, r10 | ||
113 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] | ||
114 | mov r10, r11, lsl #16 | ||
115 | mov r10, r10, asr #16 | ||
116 | mul r7, r14, r10 | ||
117 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] | ||
118 | mov r10, r11, asr #16 | ||
119 | mul r8, r14, r10 | ||
120 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] | ||
121 | stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4] | ||
122 | mov r10, r12, lsl #16 | ||
123 | mov r10, r10, asr #16 | ||
124 | mul r5, r14, r10 | ||
125 | |||
126 | ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9] | ||
127 | sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i] | ||
128 | mov r12, r12, asr #16 | ||
129 | mul r6, r14, r12 | ||
130 | sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i] | ||
131 | ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9] | ||
132 | mov r7, r11, lsl #16 | ||
133 | mov r7, r7, asr #16 | ||
134 | mul r7, r14, r7 | ||
135 | sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i] | ||
136 | mov r11, r11, asr #16 | ||
137 | mul r8, r14, r11 | ||
138 | sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i] | ||
139 | mov r11, r12, lsl #16 | ||
140 | mov r11, r11, asr #16 | ||
141 | mul r9, r14, r11 | ||
142 | sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i] | ||
143 | mov r12, r12, asr #16 | ||
144 | mul r10, r14, r12 | ||
145 | rsb r10, r10, #0 @ mem[9] = -den[9]*y[i] | ||
146 | stmia r4!, { r5-r10 } @ Write back mem[4..9] | ||
147 | sub r4, r4, #10*4 | ||
148 | sub r1, r1, #10*2 | ||
149 | subs r3, r3, #1 | ||
150 | bne .order_10 | ||
151 | ldmia sp!, { r4-r11, pc } @ Exit | ||
152 | |||
153 | |||
154 | /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */ | ||
155 | .global qmf_synth | ||
156 | qmf_synth: | ||
157 | stmdb sp!, { r4-r11, lr } | ||
158 | add r7, sp, #36 @ r0 = x1, r1 = x2, r2 = a, r3 = y | ||
159 | ldmia r7, { r4-r7 } @ r4 = N, r5 = M, r6 = mem1, r7 = mem2 | ||
160 | |||
161 | add r8, r4, r5 | ||
162 | sub r9, sp, r8 @ r9 = sp - (N + M >> 1) = xx2 | ||
163 | sub r8, r9, r8 @ r8 = r9 - (N + M >> 1) = xx1 | ||
164 | str sp, [r8, #-4] @ Stack old sp | ||
165 | sub sp, r8, #4 @ Update sp | ||
166 | |||
167 | add r0, r0, r4 @ x1 += N >> 1 | ||
168 | add r1, r1, r4 @ x2 += N >> 1 | ||
169 | mov r14, r4 @ Loop counter is N | ||
170 | 0: | ||
171 | @ Backwards copy x1 and x2 arrays to xx1 and xx2, assume N2 is power of two | ||
172 | @ N should always be a multiple of four, so this should be OK | ||
173 | ldmdb r0!, { r10-r11 } | ||
174 | mov r12, r10, ror #16 | ||
175 | mov r11, r11, ror #16 | ||
176 | stmia r8!, { r11-r12 } | ||
177 | ldmdb r1!, { r10-r11 } | ||
178 | mov r12, r10, ror #16 | ||
179 | mov r11, r11, ror #16 | ||
180 | stmia r9!, { r11-r12 } | ||
181 | subs r14, r14, #8 | ||
182 | bne 0b | ||
183 | |||
184 | @ Copy alternate members of mem1 and mem2 to last part of xx1 and xx2 | ||
185 | mov r14, r5 @ Loop counter is M | ||
186 | add r6, r6, #2 | ||
187 | add r7, r7, #2 | ||
188 | stmdb sp!, { r6-r7 } @ Stack &mem1[1], &mem2[1] | ||
189 | 0: | ||
190 | ldrh r10, [r6], #4 | ||
191 | ldrh r11, [r6], #4 | ||
192 | ldrh r12, [r7], #4 | ||
193 | orr r10, r10, r11, lsl #16 | ||
194 | ldrh r11, [r7], #4 | ||
195 | orr r11, r12, r11, lsl #16 | ||
196 | str r10, [r8], #4 | ||
197 | str r11, [r9], #4 | ||
198 | subs r14, r14, #4 | ||
199 | bne 0b | ||
200 | |||
201 | sub r0, r8, r5 @ r0 = &xx1[N2] | ||
202 | sub r1, r9, r5 @ r1 = %xx2[N2] | ||
203 | str r4, [sp, #-4] @ Stack N | ||
204 | mov r4, r5 | ||
205 | str r4, [sp, #-8] @ Stack M | ||
206 | @ sp doesn't point to the end of the stack frame from here on, but we're not | ||
207 | @ calling anything so it shouldn't matter | ||
208 | @ Main loop, register usage: | ||
209 | @ r0 = xx1, r1 = xx2, r2 = a, r3 = y, r4 = M, r5 = x10, r6 = x11, r7 = x20 | ||
210 | @ r8 = x21, r9 = [a1, a0], r10 = acc0, r11 = acc1, r12 = acc2, r14 = acc3 | ||
211 | 0: @ Outerloop | ||
212 | mov r10, #16384 @ Init acccumulators to rounding const | ||
213 | mov r11, #16384 | ||
214 | mov r12, #16384 | ||
215 | mov r14, #16384 | ||
216 | |||
217 | ldrsh r5, [r0, #-4]! @ r5 = x10, r0 = &xx1[N2 - 2] | ||
218 | ldrsh r7, [r1, #-4]! @ r7 = x20, r1 = &xx2[N2 - 2] | ||
219 | 1: @ Innerloop | ||
220 | ldrsh r9, [r2], #2 @ r9 = a0 | ||
221 | ldrsh r6, [r0, #2]! @ r6 = x11 | ||
222 | ldrsh r8, [r1, #2]! @ r8 = x21 | ||
223 | sub r5, r5, r7 @ r5 = x10 - x20 | ||
224 | add r7, r5, r7, asl #1 @ r7 = x10 + x20 | ||
225 | mla r12, r9, r5, r12 @ acc2 += a0*(x10 - x20) | ||
226 | sub r5, r6, r8 @ r5 = x11 - x21 | ||
227 | mla r10, r9, r5, r10 @ acc0 += a0*(x11 - x21) | ||
228 | ldrsh r9, [r2], #2 @ r9 = a1 | ||
229 | add r5, r6, r8 @ r5 = x11 + x21 | ||
230 | mla r14, r9, r7, r14 @ acc3 += a1*(x10 + x20) | ||
231 | mla r11, r9, r5, r11 @ acc1 += a1*(x11 + x21) | ||
232 | |||
233 | ldrsh r9, [r2], #2 @ r9 = a1 | ||
234 | ldrsh r5, [r0, #2]! @ r5 = x10 | ||
235 | ldrsh r7, [r1, #2]! @ r7 = x20 | ||
236 | sub r6, r6, r8 @ r6 = x11 - x21 | ||
237 | add r8, r6, r8, asl #1 @ r8 = x11 + x21 | ||
238 | mla r12, r9, r6, r12 @ acc2 += a0*(x11 - x21) | ||
239 | sub r6, r5, r7 @ r6 = x10 - x20 | ||
240 | mla r10, r9, r6, r10 @ acc0 += a0*(x10 - x20) | ||
241 | ldrsh r9, [r2], #2 @ r9 = a1 | ||
242 | add r6, r5, r7 @ r5 = x10 + x20 | ||
243 | mla r14, r9, r8, r14 @ acc3 += a1*(x11 + x21) | ||
244 | mla r11, r9, r6, r11 @ acc1 += a1*(x10 + x10) | ||
245 | subs r4, r4, #4 | ||
246 | bne 1b | ||
247 | |||
248 | ldr r4, [sp, #-8] @ r4 = M | ||
249 | sub r2, r2, r4, lsl #1 @ r2 = &a[0] | ||
250 | sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i] | ||
251 | sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i] | ||
252 | |||
253 | mov r10, r10, asr #15 @ Shift outputs down | ||
254 | mov r11, r11, asr #15 | ||
255 | mov r12, r12, asr #15 | ||
256 | mov r14, r14, asr #15 | ||
257 | |||
258 | @ TODO: this can be optimized further | ||
259 | mov r9, #0x7f00 @ Clip all four outputs | ||
260 | orr r9, r9, #0xff @ r9 = 32767 | ||
261 | cmp r10, r9 | ||
262 | movgt r10, r9 | ||
263 | cmn r10, r9 | ||
264 | rsblt r10, r9, #0 | ||
265 | cmp r11, r9 | ||
266 | movgt r11, r9 | ||
267 | cmn r11, r9 | ||
268 | rsblt r11, r9, #0 | ||
269 | cmp r12, r9 | ||
270 | movgt r12, r9 | ||
271 | cmn r12, r9 | ||
272 | rsblt r12, r9, #0 | ||
273 | cmp r14, r9 | ||
274 | movgt r14, r9 | ||
275 | cmn r14, r9 | ||
276 | rsblt r14, r9, #0 | ||
277 | |||
278 | strh r10, [r3], #2 @ Write outputs | ||
279 | strh r11, [r3], #2 | ||
280 | strh r12, [r3], #2 | ||
281 | strh r14, [r3], #2 | ||
282 | ldr r10, [sp, #-4] @ Load N | ||
283 | subs r10, r10, #4 @ Are we done? | ||
284 | strne r10, [sp, #-4] | ||
285 | bne 0b | ||
286 | |||
287 | @ Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries | ||
288 | @ r0 and r1 are &xx1[0] and &xx2[0] at this point | ||
289 | ldmia sp, { r5-r6, sp } @ Fetch &mem1[1], &mem2[1], restore sp | ||
290 | 0: | ||
291 | ldr r7, [r0], #4 | ||
292 | ldr r8, [r1], #4 | ||
293 | strh r7, [r5], #4 | ||
294 | strh r8, [r6], #4 | ||
295 | mov r7, r7, lsr #16 | ||
296 | mov r8, r8, lsr #16 | ||
297 | strh r7, [r5], #4 | ||
298 | strh r8, [r6], #4 | ||
299 | subs r4, r4, #4 | ||
300 | bne 0b | ||
301 | ldmia sp!, { r4-r11, pc } @ Exit | ||
302 | |||
diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S index b0367025e1..861d6c18f9 100644 --- a/apps/codecs/libspeex/filters_cf.S +++ b/apps/codecs/libspeex/filters_cf.S | |||
@@ -48,6 +48,7 @@ iir_mem16: | |||
48 | jeq .order_10 | 48 | jeq .order_10 |
49 | jra .exit | 49 | jra .exit |
50 | 50 | ||
51 | | TODO: try using direct form 1 filtering | ||
51 | | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7] | 52 | | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7] |
52 | | a3 = x, a4 = den, a5 = y, a6 = temp | 53 | | a3 = x, a4 = den, a5 = y, a6 = temp |
53 | .order_8: | 54 | .order_8: |
@@ -171,6 +172,7 @@ iir_mem16: | |||
171 | lea.l (44, %sp), %sp | 172 | lea.l (44, %sp), %sp |
172 | rts | 173 | rts |
173 | 174 | ||
175 | |||
174 | /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */ | 176 | /* void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word32_t *mem1, spx_word32_t *mem2, char *stack) */ |
175 | .global qmf_synth | 177 | .global qmf_synth |
176 | qmf_synth: | 178 | qmf_synth: |
@@ -210,10 +212,10 @@ qmf_synth: | |||
210 | jne 0b | 212 | jne 0b |
211 | 213 | ||
212 | | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2 | 214 | | Copy alternate members of mem1 and mem2 to last part of xx1 and xx2 |
213 | move.l %d1, %d2 | Loop counter is M2 | 215 | move.l %d1, %d2 | Loop counter is M2 |
214 | addq.l #2, %a4 | a4 = &mem1[1] | 216 | addq.l #2, %a4 | a4 = &mem1[1] |
215 | addq.l #2, %a5 | a5 = &mem2[1] | 217 | addq.l #2, %a5 | a5 = &mem2[1] |
216 | move.l %a4, %d3 | Backup mem1 and mem2 | 218 | move.l %a4, %d3 | Backup mem1 and mem2 |
217 | move.l %a5, %d4 | 219 | move.l %a5, %d4 |
218 | 0: | 220 | 0: |
219 | move.w (%a4), (%a2)+ | 221 | move.w (%a4), (%a2)+ |
@@ -222,14 +224,14 @@ qmf_synth: | |||
222 | addq.l #4, %a5 | 224 | addq.l #4, %a5 |
223 | subq.l #1, %d2 | 225 | subq.l #1, %d2 |
224 | jne 0b | 226 | jne 0b |
225 | move.l %d3, %a4 | a4 = &mem1[1] | 227 | move.l %d3, %a4 | a4 = &mem1[1] |
226 | move.l %d4, %a5 | a5 = &mem2[1] | 228 | move.l %d4, %a5 | a5 = &mem2[1] |
227 | 229 | ||
228 | clr.l %d2 | 230 | clr.l %d2 |
229 | sub.l %d1, %d2 | d2 = -M2 | 231 | sub.l %d1, %d2 | d2 = -M2 |
230 | lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2] | 232 | lea.l (-4, %a2, %d2.l*2), %a0 | a0 = &xx1[N2 - 2] |
231 | lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2] | 233 | lea.l (-4, %a6, %d2.l*2), %a1 | a1 = &xx2[N2 - 2] |
232 | move.l %d6, %a2 | a2 = a | 234 | move.l %d6, %a2 | a2 = a |
233 | 235 | ||
234 | | Main loop, register usage: | 236 | | Main loop, register usage: |
235 | | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup | 237 | | d0 = N2 counter, d1 = M2 counter, d7 = M2 backup |
@@ -286,7 +288,7 @@ qmf_synth: | |||
286 | | The clipping will be [-32768..32767], not Speex standard [-32767..32767], | 288 | | The clipping will be [-32768..32767], not Speex standard [-32767..32767], |
287 | | but since qmf_synth() is called so late in the signal chain, it should | 289 | | but since qmf_synth() is called so late in the signal chain, it should |
288 | | work fine. | 290 | | work fine. |
289 | move.w %d2, (%a3)+ | Write results to y[] | 291 | move.w %d2, (%a3)+ | Write results to y[] |
290 | move.w %d3, (%a3)+ | 292 | move.w %d3, (%a3)+ |
291 | move.w %d4, (%a3)+ | 293 | move.w %d4, (%a3)+ |
292 | move.w %d5, (%a3)+ | 294 | move.w %d5, (%a3)+ |
@@ -294,8 +296,8 @@ qmf_synth: | |||
294 | jne 0b | 296 | jne 0b |
295 | 297 | ||
296 | | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries | 298 | | Copy start of xx1 and xx2 back to alternate mem1 and mem2 entries |
297 | addq.l #4, %a0 | a0 = &xx1[0] | 299 | addq.l #4, %a0 | a0 = &xx1[0] |
298 | addq.l #4, %a1 | a1 = &xx2[0] | 300 | addq.l #4, %a1 | a1 = &xx2[0] |
299 | 0: | 301 | 0: |
300 | move.w (%a0)+, (%a4) | 302 | move.w (%a0)+, (%a4) |
301 | move.w (%a1)+, (%a5) | 303 | move.w (%a1)+, (%a5) |