diff options
Diffstat (limited to 'lib/rbcodec/codecs/demac/libdemac/predictor-arm.S')
-rw-r--r-- | lib/rbcodec/codecs/demac/libdemac/predictor-arm.S | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S new file mode 100644 index 0000000000..92a78ed9b4 --- /dev/null +++ b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S | |||
@@ -0,0 +1,702 @@ | |||
1 | /* | ||
2 | |||
3 | libdemac - A Monkey's Audio decoder | ||
4 | |||
5 | $Id$ | ||
6 | |||
7 | Copyright (C) Dave Chapman 2007 | ||
8 | |||
9 | This program is free software; you can redistribute it and/or modify | ||
10 | it under the terms of the GNU General Public License as published by | ||
11 | the Free Software Foundation; either version 2 of the License, or | ||
12 | (at your option) any later version. | ||
13 | |||
14 | This program is distributed in the hope that it will be useful, | ||
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | GNU General Public License for more details. | ||
18 | |||
19 | You should have received a copy of the GNU General Public License | ||
20 | along with this program; if not, write to the Free Software | ||
21 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | ||
22 | |||
23 | */ | ||
24 | #include "demac_config.h" | ||
25 | |||
26 | .section ICODE_SECTION_DEMAC_ARM,"ax",%progbits | ||
27 | |||
28 | .align 2 | ||
29 | |||
30 | /* NOTE: The following need to be kept in sync with parser.h */ | ||
31 | |||
32 | #define YDELAYA 200 | ||
33 | #define YDELAYB 168 | ||
34 | #define XDELAYA 136 | ||
35 | #define XDELAYB 104 | ||
36 | #define YADAPTCOEFFSA 72 | ||
37 | #define XADAPTCOEFFSA 56 | ||
38 | #define YADAPTCOEFFSB 40 | ||
39 | #define XADAPTCOEFFSB 20 | ||
40 | |||
41 | /* struct predictor_t members: */ | ||
42 | #define buf 0 /* int32_t* buf */ | ||
43 | |||
44 | #define YlastA 4 /* int32_t YlastA; */ | ||
45 | #define XlastA 8 /* int32_t XlastA; */ | ||
46 | |||
47 | #define YfilterB 12 /* int32_t YfilterB; */ | ||
48 | #define XfilterA 16 /* int32_t XfilterA; */ | ||
49 | |||
50 | #define XfilterB 20 /* int32_t XfilterB; */ | ||
51 | #define YfilterA 24 /* int32_t YfilterA; */ | ||
52 | |||
53 | #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */ | ||
54 | #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */ | ||
55 | #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */ | ||
56 | #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */ | ||
57 | |||
58 | #define historybuffer 100 /* int32_t historybuffer[] */ | ||
59 | |||
60 | @ Macro for loading 2 registers, for various ARM versions. | ||
61 | @ Registers must start with an even register, and must be consecutive. | ||
62 | |||
63 | .macro LDR2OFS reg1, reg2, base, offset | ||
64 | #if ARM_ARCH >= 6 | ||
65 | ldrd \reg1, [\base, \offset] | ||
66 | #else /* ARM_ARCH < 6 */ | ||
67 | #ifdef CPU_ARM7TDMI | ||
68 | add \reg1, \base, \offset | ||
69 | ldmia \reg1, {\reg1, \reg2} | ||
70 | #else /* ARM9 (v4 and v5) is faster this way */ | ||
71 | ldr \reg1, [\base, \offset] | ||
72 | ldr \reg2, [\base, \offset+4] | ||
73 | #endif | ||
74 | #endif /* ARM_ARCH */ | ||
75 | .endm | ||
76 | |||
77 | @ Macro for storing 2 registers, for various ARM versions. | ||
78 | @ Registers must start with an even register, and must be consecutive. | ||
79 | |||
80 | .macro STR2OFS reg1, reg2, base, offset | ||
81 | #if ARM_ARCH >= 6 | ||
82 | strd \reg1, [\base, \offset] | ||
83 | #else | ||
84 | str \reg1, [\base, \offset] | ||
85 | str \reg2, [\base, \offset+4] | ||
86 | #endif | ||
87 | .endm | ||
88 | |||
89 | .global predictor_decode_stereo | ||
90 | .type predictor_decode_stereo,%function | ||
91 | |||
92 | @ Register usage: | ||
93 | @ | ||
94 | @ r0-r11 - scratch | ||
95 | @ r12 - struct predictor_t* p | ||
96 | @ r14 - int32_t* p->buf | ||
97 | |||
98 | @ void predictor_decode_stereo(struct predictor_t* p, | ||
99 | @ int32_t* decoded0, | ||
100 | @ int32_t* decoded1, | ||
101 | @ int count) | ||
102 | |||
103 | predictor_decode_stereo: | ||
104 | stmdb sp!, {r1-r11, lr} | ||
105 | |||
106 | @ r1 (decoded0) is [sp] | ||
107 | @ r2 (decoded1) is [sp, #4] | ||
108 | @ r3 (count) is [sp, #8] | ||
109 | |||
110 | mov r12, r0 @ r12 := p | ||
111 | ldr r14, [r0] @ r14 := p->buf | ||
112 | |||
113 | loop: | ||
114 | |||
115 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y | ||
116 | |||
117 | @ Predictor Y, Filter A | ||
118 | |||
119 | ldr r11, [r12, #YlastA] @ r11 := p->YlastA | ||
120 | |||
121 | add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] | ||
122 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] | ||
123 | @ r3 := p->buf[YDELAYA-2] | ||
124 | @ r10 := p->buf[YDELAYA-1] | ||
125 | |||
126 | add r6, r12, #YcoeffsA | ||
127 | ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0] | ||
128 | @ r7 := p->YcoeffsA[1] | ||
129 | @ r8 := p->YcoeffsA[2] | ||
130 | @ r9 := p->YcoeffsA[3] | ||
131 | |||
132 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
133 | |||
134 | STR2OFS r10, r11, r14, #YDELAYA-4 | ||
135 | @ p->buf[YDELAYA-1] = r10 | ||
136 | @ p->buf[YDELAYA] = r11 | ||
137 | |||
138 | mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] | ||
139 | mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
140 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
141 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
142 | |||
143 | @ flags were set above, in the subs instruction | ||
144 | mvngt r10, #0 | ||
145 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
146 | |||
147 | cmp r11, #0 | ||
148 | mvngt r11, #0 | ||
149 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
150 | |||
151 | STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 | ||
152 | @ p->buf[YADAPTCOEFFSA-1] := r10 | ||
153 | @ p->buf[YADAPTCOEFFSA] := r11 | ||
154 | |||
155 | @ NOTE: r0 now contains predictionA - don't overwrite. | ||
156 | |||
157 | @ Predictor Y, Filter B | ||
158 | |||
159 | LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB | ||
160 | @ r7 := p->XfilterA | ||
161 | |||
162 | add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4] | ||
163 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4] | ||
164 | @ r3 := p->buf[YDELAYB-3] | ||
165 | @ r4 := p->buf[YDELAYB-2] | ||
166 | @ r10 := p->buf[YDELAYB-1] | ||
167 | |||
168 | rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31) | ||
169 | sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5) | ||
170 | |||
171 | str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA) | ||
172 | |||
173 | add r5, r12, #YcoeffsB | ||
174 | ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0] | ||
175 | @ r6 := p->YcoeffsB[1] | ||
176 | @ r7 := p->YcoeffsB[2] | ||
177 | @ r8 := p->YcoeffsB[3] | ||
178 | @ r9 := p->YcoeffsB[4] | ||
179 | |||
180 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
181 | |||
182 | STR2OFS r10, r11, r14, #YDELAYB-4 | ||
183 | @ p->buf[YDELAYB-1] = r10 | ||
184 | @ p->buf[YDELAYB] = r11 | ||
185 | |||
186 | mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0] | ||
187 | mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1] | ||
188 | mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2] | ||
189 | mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3] | ||
190 | mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4] | ||
191 | |||
192 | @ flags were set above, in the subs instruction | ||
193 | mvngt r10, #0 | ||
194 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
195 | |||
196 | cmp r11, #0 | ||
197 | mvngt r11, #0 | ||
198 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
199 | |||
200 | STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4 | ||
201 | @ p->buf[YADAPTCOEFFSB-1] := r10 | ||
202 | @ p->buf[YADAPTCOEFFSB] := r11 | ||
203 | |||
204 | @ r0 still contains predictionA | ||
205 | @ r1 contains predictionB | ||
206 | |||
207 | @ Finish Predictor Y | ||
208 | |||
209 | ldr r2, [sp] @ r2 := decoded0 | ||
210 | add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) | ||
211 | ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA | ||
212 | ldr r3, [r2] @ r3 := *decoded0 | ||
213 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
214 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
215 | str r1, [r12, #YlastA] @ p->YlastA := r1 | ||
216 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
217 | str r1, [r12, #YfilterA] @ p->YfilterA := r1 | ||
218 | |||
219 | @ r1 contains p->YfilterA | ||
220 | @ r2 contains decoded0 | ||
221 | @ r3 contains *decoded0 | ||
222 | |||
223 | @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4] | ||
224 | @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB] | ||
225 | |||
226 | str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) | ||
227 | str r2, [sp] @ save decoded0 | ||
228 | cmp r3, #0 | ||
229 | beq 3f | ||
230 | |||
231 | add r2, r14, #YADAPTCOEFFSB-16 | ||
232 | ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4] | ||
233 | @ r3 := p->buf[YADAPTCOEFFSB-3] | ||
234 | @ r4 := p->buf[YADAPTCOEFFSB-2] | ||
235 | blt 1f | ||
236 | |||
237 | @ *decoded0 > 0 | ||
238 | |||
239 | sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB] | ||
240 | sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1] | ||
241 | sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4] | ||
242 | sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3] | ||
243 | sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2] | ||
244 | |||
245 | add r0, r12, #YcoeffsB | ||
246 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] | ||
247 | |||
248 | add r1, r12, #YcoeffsA | ||
249 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] | ||
250 | @ r3 := p->YcoeffsA[1] | ||
251 | @ r4 := p->YcoeffsA[2] | ||
252 | @ r5 := p->YcoeffsA[3] | ||
253 | |||
254 | add r6, r14, #YADAPTCOEFFSA-12 | ||
255 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | ||
256 | @ r7 := p->buf[YADAPTCOEFFSA-2] | ||
257 | @ r8 := p->buf[YADAPTCOEFFSA-1] | ||
258 | @ r9 := p->buf[YADAPTCOEFFSA] | ||
259 | |||
260 | sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
261 | sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
262 | sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
263 | sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
264 | |||
265 | b 2f | ||
266 | |||
267 | |||
268 | 1: @ *decoded0 < 0 | ||
269 | |||
270 | add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB] | ||
271 | add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1] | ||
272 | add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4] | ||
273 | add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3] | ||
274 | add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2] | ||
275 | |||
276 | add r0, r12, #YcoeffsB | ||
277 | stmia r0, {r5 - r9} @ Save p->YcoeffsB[] | ||
278 | |||
279 | add r1, r12, #YcoeffsA | ||
280 | ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0] | ||
281 | @ r3 := p->YcoeffsA[1] | ||
282 | @ r4 := p->YcoeffsA[2] | ||
283 | @ r5 := p->YcoeffsA[3] | ||
284 | |||
285 | add r6, r14, #YADAPTCOEFFSA-12 | ||
286 | ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3] | ||
287 | @ r7 := p->buf[YADAPTCOEFFSA-2] | ||
288 | @ r8 := p->buf[YADAPTCOEFFSA-1] | ||
289 | @ r9 := p->buf[YADAPTCOEFFSA] | ||
290 | |||
291 | add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
292 | add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
293 | add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
294 | add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
295 | |||
296 | 2: | ||
297 | stmia r1, {r2 - r5} @ Save p->YcoeffsA | ||
298 | |||
299 | 3: | ||
300 | |||
301 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X | ||
302 | |||
303 | @ Predictor X, Filter A | ||
304 | |||
305 | ldr r11, [r12, #XlastA] @ r11 := p->XlastA | ||
306 | |||
307 | add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3] | ||
308 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3] | ||
309 | @ r3 := p->buf[XDELAYA-2] | ||
310 | @ r10 := p->buf[XDELAYA-1] | ||
311 | |||
312 | add r6, r12, #XcoeffsA | ||
313 | ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0] | ||
314 | @ r7 := p->XcoeffsA[1] | ||
315 | @ r8 := p->XcoeffsA[2] | ||
316 | @ r9 := p->XcoeffsA[3] | ||
317 | |||
318 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
319 | |||
320 | STR2OFS r10, r11, r14, #XDELAYA-4 | ||
321 | @ p->buf[XDELAYA-1] = r10 | ||
322 | @ p->buf[XDELAYA] = r11 | ||
323 | |||
324 | mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0] | ||
325 | mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1] | ||
326 | mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2] | ||
327 | mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3] | ||
328 | |||
329 | @ flags were set above, in the subs instruction | ||
330 | mvngt r10, #0 | ||
331 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
332 | |||
333 | cmp r11, #0 | ||
334 | mvngt r11, #0 | ||
335 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
336 | |||
337 | STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4 | ||
338 | @ p->buf[XADAPTCOEFFSA-1] := r10 | ||
339 | @ p->buf[XADAPTCOEFFSA] := r11 | ||
340 | |||
341 | @ NOTE: r0 now contains predictionA - don't overwrite. | ||
342 | |||
343 | @ Predictor X, Filter B | ||
344 | |||
345 | LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB | ||
346 | @ r7 := p->YfilterA | ||
347 | |||
348 | add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4] | ||
349 | ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4] | ||
350 | @ r3 := p->buf[XDELAYB-3] | ||
351 | @ r4 := p->buf[XDELAYB-2] | ||
352 | @ r10 := p->buf[XDELAYB-1] | ||
353 | |||
354 | rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31) | ||
355 | sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5) | ||
356 | |||
357 | str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA) | ||
358 | |||
359 | add r5, r12, #XcoeffsB | ||
360 | ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0] | ||
361 | @ r6 := p->XcoeffsB[1] | ||
362 | @ r7 := p->XcoeffsB[2] | ||
363 | @ r8 := p->XcoeffsB[3] | ||
364 | @ r9 := p->XcoeffsB[4] | ||
365 | |||
366 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
367 | |||
368 | STR2OFS r10, r11, r14, #XDELAYB-4 | ||
369 | @ p->buf[XDELAYB-1] = r10 | ||
370 | @ p->buf[XDELAYB] = r11 | ||
371 | |||
372 | mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0] | ||
373 | mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1] | ||
374 | mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2] | ||
375 | mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3] | ||
376 | mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4] | ||
377 | |||
378 | @ flags were set above, in the subs instruction | ||
379 | mvngt r10, #0 | ||
380 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
381 | |||
382 | cmp r11, #0 | ||
383 | mvngt r11, #0 | ||
384 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
385 | |||
386 | STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4 | ||
387 | @ p->buf[XADAPTCOEFFSB-1] := r10 | ||
388 | @ p->buf[XADAPTCOEFFSB] := r11 | ||
389 | |||
390 | @ r0 still contains predictionA | ||
391 | @ r1 contains predictionB | ||
392 | |||
393 | @ Finish Predictor X | ||
394 | |||
395 | ldr r2, [sp, #4] @ r2 := decoded1 | ||
396 | add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1) | ||
397 | ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA | ||
398 | ldr r3, [r2] @ r3 := *decoded1 | ||
399 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
400 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
401 | str r1, [r12, #XlastA] @ p->XlastA := r1 | ||
402 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
403 | str r1, [r12, #XfilterA] @ p->XfilterA := r1 | ||
404 | |||
405 | @ r1 contains p->XfilterA | ||
406 | @ r2 contains decoded1 | ||
407 | @ r3 contains *decoded1 | ||
408 | |||
409 | @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4] | ||
410 | @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB] | ||
411 | |||
412 | str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA) | ||
413 | str r2, [sp, #4] @ save decoded1 | ||
414 | cmp r3, #0 | ||
415 | beq 3f | ||
416 | |||
417 | add r2, r14, #XADAPTCOEFFSB-16 | ||
418 | ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4] | ||
419 | @ r3 := p->buf[XADAPTCOEFFSB-3] | ||
420 | @ r4 := p->buf[XADAPTCOEFFSB-2] | ||
421 | blt 1f | ||
422 | |||
423 | @ *decoded1 > 0 | ||
424 | |||
425 | sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB] | ||
426 | sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1] | ||
427 | sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4] | ||
428 | sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3] | ||
429 | sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2] | ||
430 | |||
431 | add r0, r12, #XcoeffsB | ||
432 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] | ||
433 | |||
434 | add r1, r12, #XcoeffsA | ||
435 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] | ||
436 | @ r3 := p->XcoeffsA[1] | ||
437 | @ r4 := p->XcoeffsA[2] | ||
438 | @ r5 := p->XcoeffsA[3] | ||
439 | |||
440 | add r6, r14, #XADAPTCOEFFSA-12 | ||
441 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | ||
442 | @ r7 := p->buf[XADAPTCOEFFSA-2] | ||
443 | @ r8 := p->buf[XADAPTCOEFFSA-1] | ||
444 | @ r9 := p->buf[XADAPTCOEFFSA] | ||
445 | |||
446 | sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3] | ||
447 | sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2] | ||
448 | sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1] | ||
449 | sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA] | ||
450 | |||
451 | b 2f | ||
452 | |||
453 | |||
454 | 1: @ *decoded1 < 0 | ||
455 | |||
456 | add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB] | ||
457 | add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1] | ||
458 | add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4] | ||
459 | add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3] | ||
460 | add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2] | ||
461 | |||
462 | add r0, r12, #XcoeffsB | ||
463 | stmia r0, {r5 - r9} @ Save p->XcoeffsB[] | ||
464 | |||
465 | add r1, r12, #XcoeffsA | ||
466 | ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0] | ||
467 | @ r3 := p->XcoeffsA[1] | ||
468 | @ r4 := p->XcoeffsA[2] | ||
469 | @ r5 := p->XcoeffsA[3] | ||
470 | |||
471 | add r6, r14, #XADAPTCOEFFSA-12 | ||
472 | ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3] | ||
473 | @ r7 := p->buf[XADAPTCOEFFSA-2] | ||
474 | @ r8 := p->buf[XADAPTCOEFFSA-1] | ||
475 | @ r9 := p->buf[XADAPTCOEFFSA] | ||
476 | |||
477 | add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3] | ||
478 | add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2] | ||
479 | add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1] | ||
480 | add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA] | ||
481 | |||
482 | 2: | ||
483 | stmia r1, {r2 - r5} @ Save p->XcoeffsA | ||
484 | |||
485 | 3: | ||
486 | |||
487 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON | ||
488 | |||
489 | add r14, r14, #4 @ p->buf++ | ||
490 | |||
491 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] | ||
492 | |||
493 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 | ||
494 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE | ||
495 | |||
496 | ldr r0, [sp, #8] | ||
497 | cmp r10, r11 | ||
498 | beq move_hist @ The history buffer is full, we need to do a memmove | ||
499 | |||
500 | @ Check loop count | ||
501 | subs r0, r0, #1 | ||
502 | strne r0, [sp, #8] | ||
503 | bne loop | ||
504 | |||
505 | done: | ||
506 | str r14, [r12] @ Save value of p->buf | ||
507 | add sp, sp, #12 @ Don't bother restoring r1-r3 | ||
508 | #ifdef ROCKBOX | ||
509 | ldmpc regs=r4-r11 | ||
510 | #else | ||
511 | ldmia sp!, {r4 - r11, pc} | ||
512 | #endif | ||
513 | |||
514 | move_hist: | ||
515 | @ dest = r11 (p->historybuffer) | ||
516 | @ src = r14 (p->buf) | ||
517 | @ n = 200 | ||
518 | |||
519 | ldmia r14!, {r0-r9} @ 40 bytes | ||
520 | stmia r11!, {r0-r9} | ||
521 | ldmia r14!, {r0-r9} @ 40 bytes | ||
522 | stmia r11!, {r0-r9} | ||
523 | ldmia r14!, {r0-r9} @ 40 bytes | ||
524 | stmia r11!, {r0-r9} | ||
525 | ldmia r14!, {r0-r9} @ 40 bytes | ||
526 | stmia r11!, {r0-r9} | ||
527 | ldmia r14!, {r0-r9} @ 40 bytes | ||
528 | stmia r11!, {r0-r9} | ||
529 | |||
530 | ldr r0, [sp, #8] | ||
531 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] | ||
532 | |||
533 | @ Check loop count | ||
534 | subs r0, r0, #1 | ||
535 | strne r0, [sp, #8] | ||
536 | bne loop | ||
537 | |||
538 | b done | ||
539 | .size predictor_decode_stereo, .-predictor_decode_stereo | ||
540 | |||
541 | .global predictor_decode_mono | ||
542 | .type predictor_decode_mono,%function | ||
543 | |||
544 | @ Register usage: | ||
545 | @ | ||
546 | @ r0-r11 - scratch | ||
547 | @ r12 - struct predictor_t* p | ||
548 | @ r14 - int32_t* p->buf | ||
549 | |||
550 | @ void predictor_decode_mono(struct predictor_t* p, | ||
551 | @ int32_t* decoded0, | ||
552 | @ int count) | ||
553 | |||
554 | predictor_decode_mono: | ||
555 | stmdb sp!, {r1, r2, r4-r11, lr} | ||
556 | |||
557 | @ r1 (decoded0) is [sp] | ||
558 | @ r2 (count) is [sp, #4] | ||
559 | |||
560 | mov r12, r0 @ r12 := p | ||
561 | ldr r14, [r0] @ r14 := p->buf | ||
562 | |||
563 | loopm: | ||
564 | |||
565 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR | ||
566 | |||
567 | ldr r11, [r12, #YlastA] @ r11 := p->YlastA | ||
568 | |||
569 | add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3] | ||
570 | ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3] | ||
571 | @ r3 := p->buf[YDELAYA-2] | ||
572 | @ r10 := p->buf[YDELAYA-1] | ||
573 | |||
574 | add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0] | ||
575 | ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0] | ||
576 | @ r7 := p->YcoeffsA[1] | ||
577 | @ r8 := p->YcoeffsA[2] | ||
578 | @ r9 := p->YcoeffsA[3] | ||
579 | |||
580 | subs r10, r11, r10 @ r10 := r11 - r10 | ||
581 | |||
582 | STR2OFS r10, r11, r14, #YDELAYA-4 | ||
583 | @ p->buf[YDELAYA-1] = r10 | ||
584 | @ p->buf[YDELAYA] = r11 | ||
585 | |||
586 | mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0] | ||
587 | mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1] | ||
588 | mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2] | ||
589 | mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3] | ||
590 | |||
591 | @ flags were set above, in the subs instruction | ||
592 | mvngt r10, #0 | ||
593 | movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro) | ||
594 | |||
595 | cmp r11, #0 | ||
596 | mvngt r11, #0 | ||
597 | movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro) | ||
598 | |||
599 | STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4 | ||
600 | @ p->buf[YADAPTCOEFFSA-1] := r10 | ||
601 | @ p->buf[YADAPTCOEFFSA] := r11 | ||
602 | |||
603 | ldr r2, [sp] @ r2 := decoded0 | ||
604 | ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA | ||
605 | ldr r3, [r2] @ r3 := *decoded0 | ||
606 | rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31) | ||
607 | add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10) | ||
608 | str r1, [r12, #YlastA] @ p->YlastA := r1 | ||
609 | add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5) | ||
610 | str r1, [r12, #YfilterA] @ p->YfilterA := r1 | ||
611 | |||
612 | @ r1 contains p->YfilterA | ||
613 | @ r2 contains decoded0 | ||
614 | @ r3 contains *decoded0 | ||
615 | |||
616 | @ r6, r7, r8, r9 contain p->YcoeffsA[0..3] | ||
617 | @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA] | ||
618 | |||
619 | str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA) | ||
620 | str r2, [sp] @ save decoded0 | ||
621 | cmp r3, #0 | ||
622 | beq 3f | ||
623 | |||
624 | LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12 | ||
625 | @ r2 := p->buf[YADAPTCOEFFSA-3] | ||
626 | @ r3 := p->buf[YADAPTCOEFFSA-2] | ||
627 | blt 1f | ||
628 | |||
629 | @ *decoded0 > 0 | ||
630 | |||
631 | sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA] | ||
632 | sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1] | ||
633 | sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3] | ||
634 | sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2] | ||
635 | |||
636 | b 2f | ||
637 | |||
638 | 1: @ *decoded0 < 0 | ||
639 | |||
640 | add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA] | ||
641 | add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1] | ||
642 | add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3] | ||
643 | add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2] | ||
644 | |||
645 | 2: | ||
646 | stmia r5, {r6 - r9} @ Save p->YcoeffsA | ||
647 | |||
648 | 3: | ||
649 | |||
650 | @@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON | ||
651 | |||
652 | add r14, r14, #4 @ p->buf++ | ||
653 | |||
654 | add r11, r12, #historybuffer @ r11 := &p->historybuffer[0] | ||
655 | |||
656 | sub r10, r14, #PREDICTOR_HISTORY_SIZE*4 | ||
657 | @ r10 := p->buf - PREDICTOR_HISTORY_SIZE | ||
658 | |||
659 | ldr r0, [sp, #4] | ||
660 | cmp r10, r11 | ||
661 | beq move_histm @ The history buffer is full, we need to do a memmove | ||
662 | |||
663 | @ Check loop count | ||
664 | subs r0, r0, #1 | ||
665 | strne r0, [sp, #4] | ||
666 | bne loopm | ||
667 | |||
668 | donem: | ||
669 | str r14, [r12] @ Save value of p->buf | ||
670 | add sp, sp, #8 @ Don't bother restoring r1, r2 | ||
671 | #ifdef ROCKBOX | ||
672 | ldmpc regs=r4-r11 | ||
673 | #else | ||
674 | ldmia sp!, {r4 - r11, pc} | ||
675 | #endif | ||
676 | |||
677 | move_histm: | ||
678 | @ dest = r11 (p->historybuffer) | ||
679 | @ src = r14 (p->buf) | ||
680 | @ n = 200 | ||
681 | |||
682 | ldmia r14!, {r0-r9} @ 40 bytes | ||
683 | stmia r11!, {r0-r9} | ||
684 | ldmia r14!, {r0-r9} @ 40 bytes | ||
685 | stmia r11!, {r0-r9} | ||
686 | ldmia r14!, {r0-r9} @ 40 bytes | ||
687 | stmia r11!, {r0-r9} | ||
688 | ldmia r14!, {r0-r9} @ 40 bytes | ||
689 | stmia r11!, {r0-r9} | ||
690 | ldmia r14!, {r0-r9} @ 40 bytes | ||
691 | stmia r11!, {r0-r9} | ||
692 | |||
693 | ldr r0, [sp, #4] | ||
694 | add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0] | ||
695 | |||
696 | @ Check loop count | ||
697 | subs r0, r0, #1 | ||
698 | strne r0, [sp, #4] | ||
699 | bne loopm | ||
700 | |||
701 | b donem | ||
702 | .size predictor_decode_mono, .-predictor_decode_mono | ||