diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/libwavpack/arm.S | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libwavpack/arm.S')
-rw-r--r-- | lib/rbcodec/codecs/libwavpack/arm.S | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwavpack/arm.S b/lib/rbcodec/codecs/libwavpack/arm.S new file mode 100644 index 0000000000..32de1df7de --- /dev/null +++ b/lib/rbcodec/codecs/libwavpack/arm.S | |||
@@ -0,0 +1,477 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by David Bryant | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | /* This is an assembly optimized version of the following WavPack function: | ||
23 | * | ||
24 | * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, | ||
25 | * long *buffer, long sample_count); | ||
26 | * | ||
27 | * It performs a single pass of stereo decorrelation on the provided buffer. | ||
28 | * Note that this version of the function requires that the 8 previous stereo | ||
29 | * samples are visible and correct. In other words, it ignores the "samples_*" | ||
30 | * fields in the decorr_pass structure and gets the history data directly | ||
31 | * from the buffer. It does, however, return the appropriate history samples | ||
32 | * to the decorr_pass structure before returning. | ||
33 | * | ||
34 | * This is written to work on a ARM7TDMI processor. This version only uses the | ||
35 | * 32-bit multiply-accumulate instruction and so will overflow with 24-bit | ||
36 | * WavPack files. | ||
37 | */ | ||
38 | |||
39 | #include "config.h" | ||
40 | |||
41 | .text | ||
42 | .align | ||
43 | .global decorr_stereo_pass_cont_arm | ||
44 | |||
45 | /* | ||
46 | * on entry: | ||
47 | * | ||
48 | * r0 = struct decorr_pass *dpp | ||
49 | * r1 = long *buffer | ||
50 | * r2 = long sample_count | ||
51 | */ | ||
52 | |||
53 | decorr_stereo_pass_cont_arm: | ||
54 | |||
55 | stmfd sp!, {r4 - r8, r10, r11, lr} | ||
56 | mov r5, r0 @ r5 = dpp | ||
57 | mov r11, #512 @ r11 = 512 for rounding | ||
58 | ldrsh r6, [r0, #2] @ r6 = dpp->delta | ||
59 | ldrsh r4, [r0, #4] @ r4 = dpp->weight_A | ||
60 | ldrsh r0, [r0, #6] @ r0 = dpp->weight_B | ||
61 | cmp r2, #0 @ exit if no samples to process | ||
62 | beq common_exit | ||
63 | |||
64 | add r7, r1, r2, asl #3 @ r7 = buffer ending position | ||
65 | ldrsh r2, [r5, #0] @ r2 = dpp->term | ||
66 | cmp r2, #0 | ||
67 | bmi minus_term | ||
68 | |||
69 | ldr lr, [r1, #-16] @ load 2 sample history from buffer | ||
70 | ldr r10, [r1, #-12] @ for terms 2, 17, and 18 | ||
71 | ldr r8, [r1, #-8] | ||
72 | ldr r3, [r1, #-4] | ||
73 | cmp r2, #17 | ||
74 | beq term_17_loop | ||
75 | cmp r2, #18 | ||
76 | beq term_18_loop | ||
77 | cmp r2, #2 | ||
78 | beq term_2_loop | ||
79 | b term_default_loop @ else handle default (1-8, except 2) | ||
80 | |||
81 | minus_term: | ||
82 | mov r10, #1024 @ r10 = -1024 for weight clipping | ||
83 | rsb r10, r10, #0 @ (only used for negative terms) | ||
84 | cmn r2, #1 | ||
85 | beq term_minus_1 | ||
86 | cmn r2, #2 | ||
87 | beq term_minus_2 | ||
88 | cmn r2, #3 | ||
89 | beq term_minus_3 | ||
90 | b common_exit | ||
91 | |||
92 | /* | ||
93 | ****************************************************************************** | ||
94 | * Loop to handle term = 17 condition | ||
95 | * | ||
96 | * r0 = dpp->weight_B r8 = previous left sample | ||
97 | * r1 = bptr r9 = | ||
98 | * r2 = current sample r10 = second previous left sample | ||
99 | * r3 = previous right sample r11 = 512 (for rounding) | ||
100 | * r4 = dpp->weight_A ip = current decorrelation value | ||
101 | * r5 = dpp sp = | ||
102 | * r6 = dpp->delta lr = second previous right sample | ||
103 | * r7 = eptr pc = | ||
104 | ******************************************************************************* | ||
105 | */ | ||
106 | |||
107 | term_17_loop: | ||
108 | rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev | ||
109 | mov lr, r8 @ previous becomes 2nd previous | ||
110 | ldr r2, [r1], #4 @ get sample & update pointer | ||
111 | mla r8, ip, r4, r11 @ mult decorr value by weight, round, | ||
112 | add r8, r2, r8, asr #10 @ shift, and add to new sample | ||
113 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
114 | cmpne r2, #0 | ||
115 | beq .L325 | ||
116 | teq ip, r2 @ update weight based on signs | ||
117 | submi r4, r4, r6 | ||
118 | addpl r4, r4, r6 | ||
119 | |||
120 | .L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel | ||
121 | mov r10, r3 | ||
122 | ldr r2, [r1], #4 | ||
123 | mla r3, ip, r0, r11 | ||
124 | add r3, r2, r3, asr #10 | ||
125 | strne r3, [r1, #-4] | ||
126 | cmpne r2, #0 | ||
127 | beq .L329 | ||
128 | teq ip, r2 | ||
129 | submi r0, r0, r6 | ||
130 | addpl r0, r0, r6 | ||
131 | |||
132 | .L329: cmp r7, r1 @ loop back if more samples to do | ||
133 | bhi term_17_loop | ||
134 | b store_1718 @ common exit for terms 17 & 18 | ||
135 | |||
136 | /* | ||
137 | ****************************************************************************** | ||
138 | * Loop to handle term = 18 condition | ||
139 | * | ||
140 | * r0 = dpp->weight_B r8 = previous left sample | ||
141 | * r1 = bptr r9 = | ||
142 | * r2 = current sample r10 = second previous left sample | ||
143 | * r3 = previous right sample r11 = 512 (for rounding) | ||
144 | * r4 = dpp->weight_A ip = decorrelation value | ||
145 | * r5 = dpp sp = | ||
146 | * r6 = dpp->delta lr = second previous right sample | ||
147 | * r7 = eptr pc = | ||
148 | ******************************************************************************* | ||
149 | */ | ||
150 | |||
151 | term_18_loop: | ||
152 | sub ip, r8, lr @ decorr value = | ||
153 | mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 | ||
154 | adds ip, r8, ip, asr #1 | ||
155 | ldr r2, [r1], #4 @ get sample & update pointer | ||
156 | mla r8, ip, r4, r11 @ mult decorr value by weight, round, | ||
157 | add r8, r2, r8, asr #10 @ shift, and add to new sample | ||
158 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
159 | cmpne r2, #0 | ||
160 | beq .L337 | ||
161 | teq ip, r2 @ update weight based on signs | ||
162 | submi r4, r4, r6 | ||
163 | addpl r4, r4, r6 | ||
164 | |||
165 | .L337: sub ip, r3, r10 @ do same thing for right channel | ||
166 | mov r10, r3 | ||
167 | adds ip, r3, ip, asr #1 | ||
168 | ldr r2, [r1], #4 | ||
169 | mla r3, ip, r0, r11 | ||
170 | add r3, r2, r3, asr #10 | ||
171 | strne r3, [r1, #-4] | ||
172 | cmpne r2, #0 | ||
173 | beq .L341 | ||
174 | teq ip, r2 | ||
175 | submi r0, r0, r6 | ||
176 | addpl r0, r0, r6 | ||
177 | |||
178 | .L341: cmp r7, r1 @ loop back if more samples to do | ||
179 | bhi term_18_loop | ||
180 | |||
181 | /* common exit for terms 17 & 18 */ | ||
182 | |||
183 | store_1718: | ||
184 | str r3, [r5, #40] @ store sample history into struct | ||
185 | str r8, [r5, #8] | ||
186 | str r10, [r5, #44] | ||
187 | str lr, [r5, #12] | ||
188 | b common_exit @ and return | ||
189 | |||
190 | /* | ||
191 | ****************************************************************************** | ||
192 | * Loop to handle term = 2 condition | ||
193 | * (note that this case can be handled by the default term handler (1-8), but | ||
194 | * this special case is faster because it doesn't have to read memory twice) | ||
195 | * | ||
196 | * r0 = dpp->weight_B r8 = previous left sample | ||
197 | * r1 = bptr r9 = | ||
198 | * r2 = current sample r10 = second previous left sample | ||
199 | * r3 = previous right sample r11 = 512 (for rounding) | ||
200 | * r4 = dpp->weight_A ip = decorrelation value | ||
201 | * r5 = dpp sp = | ||
202 | * r6 = dpp->delta lr = second previous right sample | ||
203 | * r7 = eptr pc = | ||
204 | ******************************************************************************* | ||
205 | */ | ||
206 | |||
207 | term_2_loop: | ||
208 | movs ip, lr @ get decorrelation value & test | ||
209 | mov lr, r8 @ previous becomes 2nd previous | ||
210 | ldr r2, [r1], #4 @ get sample & update pointer | ||
211 | mla r8, ip, r4, r11 @ mult decorr value by weight, round, | ||
212 | add r8, r2, r8, asr #10 @ shift, and add to new sample | ||
213 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
214 | cmpne r2, #0 | ||
215 | beq .L225 | ||
216 | teq ip, r2 @ update weight based on signs | ||
217 | submi r4, r4, r6 | ||
218 | addpl r4, r4, r6 | ||
219 | |||
220 | .L225: movs ip, r10 @ do same thing for right channel | ||
221 | mov r10, r3 | ||
222 | ldr r2, [r1], #4 | ||
223 | mla r3, ip, r0, r11 | ||
224 | add r3, r2, r3, asr #10 | ||
225 | strne r3, [r1, #-4] | ||
226 | cmpne r2, #0 | ||
227 | beq .L229 | ||
228 | teq ip, r2 | ||
229 | submi r0, r0, r6 | ||
230 | addpl r0, r0, r6 | ||
231 | |||
232 | .L229: cmp r7, r1 @ loop back if more samples to do | ||
233 | bhi term_2_loop | ||
234 | b default_term_exit @ this exit updates all dpp->samples | ||
235 | |||
236 | /* | ||
237 | ****************************************************************************** | ||
238 | * Loop to handle default term condition | ||
239 | * | ||
240 | * r0 = dpp->weight_B r8 = result accumulator | ||
241 | * r1 = bptr r9 = | ||
242 | * r2 = dpp->term r10 = | ||
243 | * r3 = decorrelation value r11 = 512 (for rounding) | ||
244 | * r4 = dpp->weight_A ip = current sample | ||
245 | * r5 = dpp sp = | ||
246 | * r6 = dpp->delta lr = | ||
247 | * r7 = eptr pc = | ||
248 | ******************************************************************************* | ||
249 | */ | ||
250 | |||
251 | term_default_loop: | ||
252 | ldr ip, [r1] @ get original sample | ||
253 | ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term | ||
254 | mla r8, r3, r4, r11 @ mult decorr value by weight, round, | ||
255 | add r8, ip, r8, asr #10 @ shift and add to new sample | ||
256 | str r8, [r1], #4 @ store update sample | ||
257 | cmp r3, #0 | ||
258 | cmpne ip, #0 | ||
259 | beq .L350 | ||
260 | teq ip, r3 @ update weight based on signs | ||
261 | submi r4, r4, r6 | ||
262 | addpl r4, r4, r6 | ||
263 | |||
264 | .L350: ldr ip, [r1] @ do the same thing for right channel | ||
265 | ldr r3, [r1, -r2, asl #3] | ||
266 | mla r8, r3, r0, r11 | ||
267 | add r8, ip, r8, asr #10 | ||
268 | str r8, [r1], #4 | ||
269 | cmp r3, #0 | ||
270 | cmpne ip, #0 | ||
271 | beq .L354 | ||
272 | teq ip, r3 | ||
273 | submi r0, r0, r6 | ||
274 | addpl r0, r0, r6 | ||
275 | |||
276 | .L354: cmp r7, r1 @ loop back if more samples to do | ||
277 | bhi term_default_loop | ||
278 | |||
279 | /* | ||
280 | * This exit is used by terms 1-8 to store the previous 8 samples into the decorr | ||
281 | * structure (even if they are not all used for the given term) | ||
282 | */ | ||
283 | |||
284 | default_term_exit: | ||
285 | ldrsh r3, [r5, #0] | ||
286 | sub ip, r3, #1 | ||
287 | mov lr, #7 | ||
288 | |||
289 | .L358: and r3, ip, #7 | ||
290 | add r3, r5, r3, asl #2 | ||
291 | ldr r2, [r1, #-4] | ||
292 | str r2, [r3, #40] | ||
293 | ldr r2, [r1, #-8]! | ||
294 | str r2, [r3, #8] | ||
295 | sub ip, ip, #1 | ||
296 | sub lr, lr, #1 | ||
297 | cmn lr, #1 | ||
298 | bne .L358 | ||
299 | b common_exit | ||
300 | |||
301 | /* | ||
302 | ****************************************************************************** | ||
303 | * Loop to handle term = -1 condition | ||
304 | * | ||
305 | * r0 = dpp->weight_B r8 = | ||
306 | * r1 = bptr r9 = | ||
307 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
308 | * r3 = previous right sample r11 = 512 (for rounding) | ||
309 | * r4 = dpp->weight_A ip = current sample | ||
310 | * r5 = dpp sp = | ||
311 | * r6 = dpp->delta lr = updated left sample | ||
312 | * r7 = eptr pc = | ||
313 | ******************************************************************************* | ||
314 | */ | ||
315 | |||
316 | term_minus_1: | ||
317 | ldr r3, [r1, #-4] | ||
318 | |||
319 | term_minus_1_loop: | ||
320 | ldr ip, [r1] @ for left channel the decorrelation value | ||
321 | mla r2, r3, r4, r11 @ is the previous right sample (in r3) | ||
322 | add lr, ip, r2, asr #10 | ||
323 | str lr, [r1], #8 | ||
324 | cmp r3, #0 | ||
325 | cmpne ip, #0 | ||
326 | beq .L361 | ||
327 | teq ip, r3 @ update weight based on signs | ||
328 | submi r4, r4, r6 | ||
329 | addpl r4, r4, r6 | ||
330 | cmp r4, #1024 | ||
331 | movgt r4, #1024 | ||
332 | cmp r4, r10 | ||
333 | movlt r4, r10 | ||
334 | |||
335 | .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value | ||
336 | mla r3, lr, r0, r11 @ is the just updated right sample (in lr) | ||
337 | add r3, r2, r3, asr #10 | ||
338 | str r3, [r1, #-4] | ||
339 | cmp lr, #0 | ||
340 | cmpne r2, #0 | ||
341 | beq .L369 | ||
342 | teq r2, lr | ||
343 | submi r0, r0, r6 | ||
344 | addpl r0, r0, r6 | ||
345 | cmp r0, #1024 @ then clip weight to +/-1024 | ||
346 | movgt r0, #1024 | ||
347 | cmp r0, r10 | ||
348 | movlt r0, r10 | ||
349 | |||
350 | .L369: cmp r7, r1 @ loop back if more samples to do | ||
351 | bhi term_minus_1_loop | ||
352 | |||
353 | str r3, [r5, #8] @ else store right sample and exit | ||
354 | b common_exit | ||
355 | |||
356 | /* | ||
357 | ****************************************************************************** | ||
358 | * Loop to handle term = -2 condition | ||
359 | * (note that the channels are processed in the reverse order here) | ||
360 | * | ||
361 | * r0 = dpp->weight_B r8 = | ||
362 | * r1 = bptr r9 = | ||
363 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
364 | * r3 = previous left sample r11 = 512 (for rounding) | ||
365 | * r4 = dpp->weight_A ip = current sample | ||
366 | * r5 = dpp sp = | ||
367 | * r6 = dpp->delta lr = updated right sample | ||
368 | * r7 = eptr pc = | ||
369 | ******************************************************************************* | ||
370 | */ | ||
371 | |||
372 | term_minus_2: | ||
373 | ldr r3, [r1, #-8] | ||
374 | |||
375 | term_minus_2_loop: | ||
376 | ldr ip, [r1, #4] @ for right channel the decorrelation value | ||
377 | mla r2, r3, r0, r11 @ is the previous left sample (in r3) | ||
378 | add lr, ip, r2, asr #10 | ||
379 | str lr, [r1, #4] | ||
380 | cmp r3, #0 | ||
381 | cmpne ip, #0 | ||
382 | beq .L380 | ||
383 | teq ip, r3 @ update weight based on signs | ||
384 | submi r0, r0, r6 | ||
385 | addpl r0, r0, r6 | ||
386 | cmp r0, #1024 @ then clip weight to +/-1024 | ||
387 | movgt r0, #1024 | ||
388 | cmp r0, r10 | ||
389 | movlt r0, r10 | ||
390 | |||
391 | .L380: ldr r2, [r1, #0] @ for left channel the decorrelation value | ||
392 | mla r3, lr, r4, r11 @ is the just updated left sample (in lr) | ||
393 | add r3, r2, r3, asr #10 | ||
394 | str r3, [r1], #8 | ||
395 | cmp lr, #0 | ||
396 | cmpne r2, #0 | ||
397 | beq .L388 | ||
398 | teq r2, lr | ||
399 | submi r4, r4, r6 | ||
400 | addpl r4, r4, r6 | ||
401 | cmp r4, #1024 | ||
402 | movgt r4, #1024 | ||
403 | cmp r4, r10 | ||
404 | movlt r4, r10 | ||
405 | |||
406 | .L388: cmp r7, r1 @ loop back if more samples to do | ||
407 | bhi term_minus_2_loop | ||
408 | |||
409 | str r3, [r5, #40] @ else store left channel and exit | ||
410 | b common_exit | ||
411 | |||
412 | /* | ||
413 | ****************************************************************************** | ||
414 | * Loop to handle term = -3 condition | ||
415 | * | ||
416 | * r0 = dpp->weight_B r8 = previous left sample | ||
417 | * r1 = bptr r9 = | ||
418 | * r2 = current left sample r10 = -1024 (for clipping) | ||
419 | * r3 = previous right sample r11 = 512 (for rounding) | ||
420 | * r4 = dpp->weight_A ip = intermediate result | ||
421 | * r5 = dpp sp = | ||
422 | * r6 = dpp->delta lr = | ||
423 | * r7 = eptr pc = | ||
424 | ******************************************************************************* | ||
425 | */ | ||
426 | |||
427 | term_minus_3: | ||
428 | ldr r3, [r1, #-4] @ load previous samples | ||
429 | ldr r8, [r1, #-8] | ||
430 | |||
431 | term_minus_3_loop: | ||
432 | ldr ip, [r1] | ||
433 | mla r2, r3, r4, r11 | ||
434 | add r2, ip, r2, asr #10 | ||
435 | str r2, [r1], #4 | ||
436 | cmp r3, #0 | ||
437 | cmpne ip, #0 | ||
438 | beq .L399 | ||
439 | teq ip, r3 @ update weight based on signs | ||
440 | submi r4, r4, r6 | ||
441 | addpl r4, r4, r6 | ||
442 | cmp r4, #1024 @ then clip weight to +/-1024 | ||
443 | movgt r4, #1024 | ||
444 | cmp r4, r10 | ||
445 | movlt r4, r10 | ||
446 | |||
447 | .L399: movs ip, r8 @ ip = previous left we use now | ||
448 | mov r8, r2 @ r8 = current left we use next time | ||
449 | ldr r2, [r1], #4 | ||
450 | mla r3, ip, r0, r11 | ||
451 | add r3, r2, r3, asr #10 | ||
452 | strne r3, [r1, #-4] | ||
453 | cmpne r2, #0 | ||
454 | beq .L407 | ||
455 | teq ip, r2 | ||
456 | submi r0, r0, r6 | ||
457 | addpl r0, r0, r6 | ||
458 | cmp r0, #1024 | ||
459 | movgt r0, #1024 | ||
460 | cmp r0, r10 | ||
461 | movlt r0, r10 | ||
462 | |||
463 | .L407: cmp r7, r1 @ loop back if more samples to do | ||
464 | bhi term_minus_3_loop | ||
465 | |||
466 | str r3, [r5, #8] @ else store previous samples & exit | ||
467 | str r8, [r5, #40] | ||
468 | |||
469 | /* | ||
470 | * Before finally exiting we must store weights back for next time | ||
471 | */ | ||
472 | |||
473 | common_exit: | ||
474 | strh r4, [r5, #4] | ||
475 | strh r0, [r5, #6] | ||
476 | ldmpc regs="r4-r8, r10-r11" | ||
477 | |||