diff options
Diffstat (limited to 'lib/rbcodec/codecs/libwavpack/arml.S')
-rw-r--r-- | lib/rbcodec/codecs/libwavpack/arml.S | 506 |
1 files changed, 506 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwavpack/arml.S b/lib/rbcodec/codecs/libwavpack/arml.S new file mode 100644 index 0000000000..60818aa1e6 --- /dev/null +++ b/lib/rbcodec/codecs/libwavpack/arml.S | |||
@@ -0,0 +1,506 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by David Bryant | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | /* This is an assembly optimized version of the following WavPack function: | ||
23 | * | ||
24 | * void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, | ||
25 | * long *buffer, long sample_count); | ||
26 | * | ||
27 | * It performs a single pass of stereo decorrelation on the provided buffer. | ||
28 | * Note that this version of the function requires that the 8 previous stereo | ||
29 | * samples are visible and correct. In other words, it ignores the "samples_*" | ||
30 | * fields in the decorr_pass structure and gets the history data directly | ||
31 | * from the buffer. It does, however, return the appropriate history samples | ||
32 | * to the decorr_pass structure before returning. | ||
33 | * | ||
34 | * This is written to work on a ARM7TDMI processor. This version uses the | ||
35 | * 64-bit multiply-accumulate instruction and so can be used with all | ||
36 | * WavPack files. However, for optimum performance with 16-bit WavPack | ||
37 | * files, there is a faster version that only uses the 32-bit MLA | ||
38 | * instruction. | ||
39 | */ | ||
40 | |||
41 | #include "config.h" | ||
42 | |||
43 | .text | ||
44 | .align | ||
45 | .global decorr_stereo_pass_cont_arml | ||
46 | |||
47 | /* | ||
48 | * on entry: | ||
49 | * | ||
50 | * r0 = struct decorr_pass *dpp | ||
51 | * r1 = long *buffer | ||
52 | * r2 = long sample_count | ||
53 | */ | ||
54 | |||
55 | decorr_stereo_pass_cont_arml: | ||
56 | |||
57 | stmfd sp!, {r4 - r8, r10, r11, lr} | ||
58 | mov r5, r0 @ r5 = dpp | ||
59 | mov r11, #512 @ r11 = 512 for rounding | ||
60 | ldrsh r6, [r0, #2] @ r6 = dpp->delta | ||
61 | ldrsh r4, [r0, #4] @ r4 = dpp->weight_A | ||
62 | ldrsh r0, [r0, #6] @ r0 = dpp->weight_B | ||
63 | cmp r2, #0 @ exit if no samples to process | ||
64 | beq common_exit | ||
65 | |||
66 | mov r0, r0, asl #18 @ for 64-bit math we use weights << 18 | ||
67 | mov r4, r4, asl #18 | ||
68 | mov r6, r6, asl #18 | ||
69 | add r7, r1, r2, asl #3 @ r7 = buffer ending position | ||
70 | ldrsh r2, [r5, #0] @ r2 = dpp->term | ||
71 | cmp r2, #0 | ||
72 | blt minus_term | ||
73 | |||
74 | ldr lr, [r1, #-16] @ load 2 sample history from buffer | ||
75 | ldr r10, [r1, #-12] @ for terms 2, 17, and 18 | ||
76 | ldr r8, [r1, #-8] | ||
77 | ldr r3, [r1, #-4] | ||
78 | |||
79 | cmp r2, #18 | ||
80 | beq term_18_loop | ||
81 | mov lr, lr, asl #4 | ||
82 | mov r10, r10, asl #4 | ||
83 | cmp r2, #2 | ||
84 | beq term_2_loop | ||
85 | cmp r2, #17 | ||
86 | beq term_17_loop | ||
87 | b term_default_loop | ||
88 | |||
89 | minus_term: | ||
90 | mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping | ||
91 | rsb r10, r10, #0 @ (only used for negative terms) | ||
92 | cmn r2, #1 | ||
93 | beq term_minus_1 | ||
94 | cmn r2, #2 | ||
95 | beq term_minus_2 | ||
96 | cmn r2, #3 | ||
97 | beq term_minus_3 | ||
98 | b common_exit | ||
99 | |||
100 | /* | ||
101 | ****************************************************************************** | ||
102 | * Loop to handle term = 17 condition | ||
103 | * | ||
104 | * r0 = dpp->weight_B r8 = previous left sample | ||
105 | * r1 = bptr r9 = | ||
106 | * r2 = current sample r10 = second previous left sample << 4 | ||
107 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
108 | * r4 = dpp->weight_A ip = current decorrelation value | ||
109 | * r5 = dpp sp = | ||
110 | * r6 = dpp->delta lr = second previous right sample << 4 | ||
111 | * r7 = eptr pc = | ||
112 | ******************************************************************************* | ||
113 | */ | ||
114 | |||
115 | term_17_loop: | ||
116 | rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev | ||
117 | mov lr, r8, asl #4 @ previous becomes 2nd previous | ||
118 | ldr r2, [r1], #4 @ get sample & update pointer | ||
119 | mov r11, #0x80000000 | ||
120 | mov r8, r2 | ||
121 | smlalne r11, r8, r4, ip | ||
122 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
123 | cmpne r2, #0 | ||
124 | beq .L325 | ||
125 | teq ip, r2 @ update weight based on signs | ||
126 | submi r4, r4, r6 | ||
127 | addpl r4, r4, r6 | ||
128 | |||
129 | .L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel | ||
130 | mov r10, r3, asl #4 | ||
131 | ldr r2, [r1], #4 | ||
132 | mov r11, #0x80000000 | ||
133 | mov r3, r2 | ||
134 | smlalne r11, r3, r0, ip | ||
135 | strne r3, [r1, #-4] | ||
136 | cmpne r2, #0 | ||
137 | beq .L329 | ||
138 | teq ip, r2 | ||
139 | submi r0, r0, r6 | ||
140 | addpl r0, r0, r6 | ||
141 | |||
142 | .L329: cmp r7, r1 @ loop back if more samples to do | ||
143 | bhi term_17_loop | ||
144 | mov lr, lr, asr #4 | ||
145 | mov r10, r10, asr #4 | ||
146 | b store_1718 @ common exit for terms 17 & 18 | ||
147 | |||
148 | /* | ||
149 | ****************************************************************************** | ||
150 | * Loop to handle term = 18 condition | ||
151 | * | ||
152 | * r0 = dpp->weight_B r8 = previous left sample | ||
153 | * r1 = bptr r9 = | ||
154 | * r2 = current sample r10 = second previous left sample | ||
155 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
156 | * r4 = dpp->weight_A ip = decorrelation value | ||
157 | * r5 = dpp sp = | ||
158 | * r6 = dpp->delta lr = second previous right sample | ||
159 | * r7 = eptr pc = | ||
160 | ******************************************************************************* | ||
161 | */ | ||
162 | |||
163 | term_18_loop: | ||
164 | rsb ip, lr, r8 @ decorr value = | ||
165 | mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 | ||
166 | add ip, lr, ip, asr #1 | ||
167 | movs ip, ip, asl #4 | ||
168 | ldr r2, [r1], #4 @ get sample & update pointer | ||
169 | mov r11, #0x80000000 | ||
170 | mov r8, r2 | ||
171 | smlalne r11, r8, r4, ip | ||
172 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
173 | cmpne r2, #0 | ||
174 | beq .L337 | ||
175 | teq ip, r2 @ update weight based on signs | ||
176 | submi r4, r4, r6 | ||
177 | addpl r4, r4, r6 | ||
178 | |||
179 | .L337: rsb ip, r10, r3 @ do same thing for right channel | ||
180 | mov r10, r3 | ||
181 | add ip, r10, ip, asr #1 | ||
182 | movs ip, ip, asl #4 | ||
183 | ldr r2, [r1], #4 | ||
184 | mov r11, #0x80000000 | ||
185 | mov r3, r2 | ||
186 | smlalne r11, r3, r0, ip | ||
187 | strne r3, [r1, #-4] | ||
188 | cmpne r2, #0 | ||
189 | beq .L341 | ||
190 | teq ip, r2 | ||
191 | submi r0, r0, r6 | ||
192 | addpl r0, r0, r6 | ||
193 | |||
194 | .L341: cmp r7, r1 @ loop back if more samples to do | ||
195 | bhi term_18_loop | ||
196 | |||
197 | /* common exit for terms 17 & 18 */ | ||
198 | |||
199 | store_1718: | ||
200 | str r3, [r5, #40] @ store sample history into struct | ||
201 | str r8, [r5, #8] | ||
202 | str r10, [r5, #44] | ||
203 | str lr, [r5, #12] | ||
204 | b common_exit @ and return | ||
205 | |||
206 | /* | ||
207 | ****************************************************************************** | ||
208 | * Loop to handle term = 2 condition | ||
209 | * (note that this case can be handled by the default term handler (1-8), but | ||
210 | * this special case is faster because it doesn't have to read memory twice) | ||
211 | * | ||
212 | * r0 = dpp->weight_B r8 = previous left sample | ||
213 | * r1 = bptr r9 = | ||
214 | * r2 = current sample r10 = second previous left sample << 4 | ||
215 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
216 | * r4 = dpp->weight_A ip = decorrelation value | ||
217 | * r5 = dpp sp = | ||
218 | * r6 = dpp->delta lr = second previous right sample << 4 | ||
219 | * r7 = eptr pc = | ||
220 | ******************************************************************************* | ||
221 | */ | ||
222 | |||
223 | term_2_loop: | ||
224 | movs ip, lr @ get decorrelation value & test | ||
225 | ldr r2, [r1], #4 @ get sample & update pointer | ||
226 | mov lr, r8, asl #4 @ previous becomes 2nd previous | ||
227 | mov r11, #0x80000000 | ||
228 | mov r8, r2 | ||
229 | smlalne r11, r8, r4, ip | ||
230 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
231 | cmpne r2, #0 | ||
232 | beq .L225 | ||
233 | teq ip, r2 @ update weight based on signs | ||
234 | submi r4, r4, r6 | ||
235 | addpl r4, r4, r6 | ||
236 | |||
237 | .L225: movs ip, r10 @ do same thing for right channel | ||
238 | ldr r2, [r1], #4 | ||
239 | mov r10, r3, asl #4 | ||
240 | mov r11, #0x80000000 | ||
241 | mov r3, r2 | ||
242 | smlalne r11, r3, r0, ip | ||
243 | strne r3, [r1, #-4] | ||
244 | cmpne r2, #0 | ||
245 | beq .L229 | ||
246 | teq ip, r2 | ||
247 | submi r0, r0, r6 | ||
248 | addpl r0, r0, r6 | ||
249 | |||
250 | .L229: cmp r7, r1 @ loop back if more samples to do | ||
251 | bhi term_2_loop | ||
252 | |||
253 | b default_term_exit @ this exit updates all dpp->samples | ||
254 | |||
255 | /* | ||
256 | ****************************************************************************** | ||
257 | * Loop to handle default term condition | ||
258 | * | ||
259 | * r0 = dpp->weight_B r8 = result accumulator | ||
260 | * r1 = bptr r9 = | ||
261 | * r2 = dpp->term r10 = | ||
262 | * r3 = decorrelation value r11 = lo accumulator (for rounding) | ||
263 | * r4 = dpp->weight_A ip = current sample | ||
264 | * r5 = dpp sp = | ||
265 | * r6 = dpp->delta lr = | ||
266 | * r7 = eptr pc = | ||
267 | ******************************************************************************* | ||
268 | */ | ||
269 | |||
270 | term_default_loop: | ||
271 | ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term | ||
272 | ldr ip, [r1], #4 @ get original sample and bump ptr | ||
273 | movs r3, r3, asl #4 | ||
274 | mov r11, #0x80000000 | ||
275 | mov r8, ip | ||
276 | smlalne r11, r8, r4, r3 | ||
277 | strne r8, [r1, #-4] @ if possibly changed, store updated sample | ||
278 | cmpne ip, #0 | ||
279 | beq .L350 | ||
280 | teq ip, r3 @ update weight based on signs | ||
281 | submi r4, r4, r6 | ||
282 | addpl r4, r4, r6 | ||
283 | |||
284 | .L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel | ||
285 | ldr ip, [r1], #4 | ||
286 | movs r3, r3, asl #4 | ||
287 | mov r11, #0x80000000 | ||
288 | mov r8, ip | ||
289 | smlalne r11, r8, r0, r3 | ||
290 | strne r8, [r1, #-4] | ||
291 | cmpne ip, #0 | ||
292 | beq .L354 | ||
293 | teq ip, r3 | ||
294 | submi r0, r0, r6 | ||
295 | addpl r0, r0, r6 | ||
296 | |||
297 | .L354: cmp r7, r1 @ loop back if more samples to do | ||
298 | bhi term_default_loop | ||
299 | |||
300 | /* | ||
301 | * This exit is used by terms 1-8 to store the previous 8 samples into the decorr | ||
302 | * structure (even if they are not all used for the given term) | ||
303 | */ | ||
304 | |||
305 | default_term_exit: | ||
306 | ldrsh r3, [r5, #0] | ||
307 | sub ip, r3, #1 | ||
308 | mov lr, #7 | ||
309 | |||
310 | .L358: and r3, ip, #7 | ||
311 | add r3, r5, r3, asl #2 | ||
312 | ldr r2, [r1, #-4] | ||
313 | str r2, [r3, #40] | ||
314 | ldr r2, [r1, #-8]! | ||
315 | str r2, [r3, #8] | ||
316 | sub ip, ip, #1 | ||
317 | sub lr, lr, #1 | ||
318 | cmn lr, #1 | ||
319 | bne .L358 | ||
320 | b common_exit | ||
321 | |||
322 | /* | ||
323 | ****************************************************************************** | ||
324 | * Loop to handle term = -1 condition | ||
325 | * | ||
326 | * r0 = dpp->weight_B r8 = | ||
327 | * r1 = bptr r9 = | ||
328 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
329 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
330 | * r4 = dpp->weight_A ip = current sample | ||
331 | * r5 = dpp sp = | ||
332 | * r6 = dpp->delta lr = updated left sample | ||
333 | * r7 = eptr pc = | ||
334 | ******************************************************************************* | ||
335 | */ | ||
336 | |||
337 | term_minus_1: | ||
338 | ldr r3, [r1, #-4] | ||
339 | |||
340 | term_minus_1_loop: | ||
341 | ldr ip, [r1], #8 @ for left channel the decorrelation value | ||
342 | movs r3, r3, asl #4 @ is the previous right sample (in r3) | ||
343 | mov r11, #0x80000000 | ||
344 | mov lr, ip | ||
345 | smlalne r11, lr, r4, r3 | ||
346 | strne lr, [r1, #-8] | ||
347 | cmpne ip, #0 | ||
348 | beq .L361 | ||
349 | teq ip, r3 @ update weight based on signs | ||
350 | submi r4, r4, r6 | ||
351 | addpl r4, r4, r6 | ||
352 | cmp r4, #(1024 << 18) | ||
353 | movgt r4, #(1024 << 18) | ||
354 | cmp r4, r10 | ||
355 | movlt r4, r10 | ||
356 | |||
357 | .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value | ||
358 | movs lr, lr, asl #4 | ||
359 | mov r11, #0x80000000 | ||
360 | mov r3, r2 | ||
361 | smlalne r11, r3, r0, lr | ||
362 | strne r3, [r1, #-4] | ||
363 | cmpne r2, #0 | ||
364 | beq .L369 | ||
365 | teq r2, lr | ||
366 | submi r0, r0, r6 | ||
367 | addpl r0, r0, r6 | ||
368 | cmp r0, #(1024 << 18) @ then clip weight to +/-1024 | ||
369 | movgt r0, #(1024 << 18) | ||
370 | cmp r0, r10 | ||
371 | movlt r0, r10 | ||
372 | |||
373 | .L369: cmp r7, r1 @ loop back if more samples to do | ||
374 | bhi term_minus_1_loop | ||
375 | |||
376 | str r3, [r5, #8] @ else store right sample and exit | ||
377 | b common_exit | ||
378 | |||
379 | /* | ||
380 | ****************************************************************************** | ||
381 | * Loop to handle term = -2 condition | ||
382 | * (note that the channels are processed in the reverse order here) | ||
383 | * | ||
384 | * r0 = dpp->weight_B r8 = | ||
385 | * r1 = bptr r9 = | ||
386 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
387 | * r3 = previous left sample r11 = lo accumulator (for rounding) | ||
388 | * r4 = dpp->weight_A ip = current sample | ||
389 | * r5 = dpp sp = | ||
390 | * r6 = dpp->delta lr = updated right sample | ||
391 | * r7 = eptr pc = | ||
392 | ******************************************************************************* | ||
393 | */ | ||
394 | |||
395 | term_minus_2: | ||
396 | ldr r3, [r1, #-8] | ||
397 | |||
398 | term_minus_2_loop: | ||
399 | ldr ip, [r1, #4] @ for right channel the decorrelation value | ||
400 | movs r3, r3, asl #4 @ is the previous left sample (in r3) | ||
401 | mov r11, #0x80000000 | ||
402 | mov lr, ip | ||
403 | smlalne r11, lr, r0, r3 | ||
404 | strne lr, [r1, #4] | ||
405 | cmpne ip, #0 | ||
406 | beq .L380 | ||
407 | teq ip, r3 @ update weight based on signs | ||
408 | submi r0, r0, r6 | ||
409 | addpl r0, r0, r6 | ||
410 | cmp r0, #(1024 << 18) @ then clip weight to +/-1024 | ||
411 | movgt r0, #(1024 << 18) | ||
412 | cmp r0, r10 | ||
413 | movlt r0, r10 | ||
414 | |||
415 | .L380: ldr r2, [r1], #8 @ for left channel the decorrelation value | ||
416 | movs lr, lr, asl #4 | ||
417 | mov r11, #0x80000000 | ||
418 | mov r3, r2 | ||
419 | smlalne r11, r3, r4, lr | ||
420 | strne r3, [r1, #-8] | ||
421 | cmpne r2, #0 | ||
422 | beq .L388 | ||
423 | teq r2, lr | ||
424 | submi r4, r4, r6 | ||
425 | addpl r4, r4, r6 | ||
426 | cmp r4, #(1024 << 18) | ||
427 | movgt r4, #(1024 << 18) | ||
428 | cmp r4, r10 | ||
429 | movlt r4, r10 | ||
430 | |||
431 | .L388: cmp r7, r1 @ loop back if more samples to do | ||
432 | bhi term_minus_2_loop | ||
433 | |||
434 | str r3, [r5, #40] @ else store left channel and exit | ||
435 | b common_exit | ||
436 | |||
437 | /* | ||
438 | ****************************************************************************** | ||
439 | * Loop to handle term = -3 condition | ||
440 | * | ||
441 | * r0 = dpp->weight_B r8 = previous left sample | ||
442 | * r1 = bptr r9 = | ||
443 | * r2 = current left sample r10 = -1024 (for clipping) | ||
444 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
445 | * r4 = dpp->weight_A ip = intermediate result | ||
446 | * r5 = dpp sp = | ||
447 | * r6 = dpp->delta lr = | ||
448 | * r7 = eptr pc = | ||
449 | ******************************************************************************* | ||
450 | */ | ||
451 | |||
452 | term_minus_3: | ||
453 | ldr r3, [r1, #-4] @ load previous samples | ||
454 | ldr r8, [r1, #-8] | ||
455 | |||
456 | term_minus_3_loop: | ||
457 | ldr ip, [r1], #4 | ||
458 | movs r3, r3, asl #4 | ||
459 | mov r11, #0x80000000 | ||
460 | mov r2, ip | ||
461 | smlalne r11, r2, r4, r3 | ||
462 | strne r2, [r1, #-4] | ||
463 | cmpne ip, #0 | ||
464 | beq .L399 | ||
465 | teq ip, r3 @ update weight based on signs | ||
466 | submi r4, r4, r6 | ||
467 | addpl r4, r4, r6 | ||
468 | cmp r4, #(1024 << 18) @ then clip weight to +/-1024 | ||
469 | movgt r4, #(1024 << 18) | ||
470 | cmp r4, r10 | ||
471 | movlt r4, r10 | ||
472 | |||
473 | .L399: movs ip, r8, asl #4 @ ip = previous left we use now | ||
474 | mov r8, r2 @ r8 = current left we use next time | ||
475 | ldr r2, [r1], #4 | ||
476 | mov r11, #0x80000000 | ||
477 | mov r3, r2 | ||
478 | smlalne r11, r3, r0, ip | ||
479 | strne r3, [r1, #-4] | ||
480 | cmpne r2, #0 | ||
481 | beq .L407 | ||
482 | teq ip, r2 | ||
483 | submi r0, r0, r6 | ||
484 | addpl r0, r0, r6 | ||
485 | cmp r0, #(1024 << 18) | ||
486 | movgt r0, #(1024 << 18) | ||
487 | cmp r0, r10 | ||
488 | movlt r0, r10 | ||
489 | |||
490 | .L407: cmp r7, r1 @ loop back if more samples to do | ||
491 | bhi term_minus_3_loop | ||
492 | |||
493 | str r3, [r5, #8] @ else store previous samples & exit | ||
494 | str r8, [r5, #40] | ||
495 | |||
496 | /* | ||
497 | * Before finally exiting we must store weights back for next time | ||
498 | */ | ||
499 | |||
500 | common_exit: | ||
501 | mov r0, r0, asr #18 @ restore weights to real magnitude | ||
502 | mov r4, r4, asr #18 | ||
503 | strh r4, [r5, #4] | ||
504 | strh r0, [r5, #6] | ||
505 | ldmpc regs="r4-r8, r10-r11" | ||
506 | |||