summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libwavpack/arm.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libwavpack/arm.S')
-rw-r--r--lib/rbcodec/codecs/libwavpack/arm.S477
1 files changed, 477 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwavpack/arm.S b/lib/rbcodec/codecs/libwavpack/arm.S
new file mode 100644
index 0000000000..32de1df7de
--- /dev/null
+++ b/lib/rbcodec/codecs/libwavpack/arm.S
@@ -0,0 +1,477 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by David Bryant
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22/* This is an assembly optimized version of the following WavPack function:
23 *
24 * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
25 * long *buffer, long sample_count);
26 *
27 * It performs a single pass of stereo decorrelation on the provided buffer.
28 * Note that this version of the function requires that the 8 previous stereo
29 * samples are visible and correct. In other words, it ignores the "samples_*"
30 * fields in the decorr_pass structure and gets the history data directly
31 * from the buffer. It does, however, return the appropriate history samples
32 * to the decorr_pass structure before returning.
33 *
34 * This is written to work on a ARM7TDMI processor. This version only uses the
35 * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
36 * WavPack files.
37 */
38
39#include "config.h"
40
41 .text
42 .align
43 .global decorr_stereo_pass_cont_arm
44
45/*
46 * on entry:
47 *
48 * r0 = struct decorr_pass *dpp
49 * r1 = long *buffer
50 * r2 = long sample_count
51 */
52
53decorr_stereo_pass_cont_arm:
54
55 stmfd sp!, {r4 - r8, r10, r11, lr}
56 mov r5, r0 @ r5 = dpp
57 mov r11, #512 @ r11 = 512 for rounding
58 ldrsh r6, [r0, #2] @ r6 = dpp->delta
59 ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
60 ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
61 cmp r2, #0 @ exit if no samples to process
62 beq common_exit
63
64 add r7, r1, r2, asl #3 @ r7 = buffer ending position
65 ldrsh r2, [r5, #0] @ r2 = dpp->term
66 cmp r2, #0
67 bmi minus_term
68
69 ldr lr, [r1, #-16] @ load 2 sample history from buffer
70 ldr r10, [r1, #-12] @ for terms 2, 17, and 18
71 ldr r8, [r1, #-8]
72 ldr r3, [r1, #-4]
73 cmp r2, #17
74 beq term_17_loop
75 cmp r2, #18
76 beq term_18_loop
77 cmp r2, #2
78 beq term_2_loop
79 b term_default_loop @ else handle default (1-8, except 2)
80
81minus_term:
82 mov r10, #1024 @ r10 = -1024 for weight clipping
83 rsb r10, r10, #0 @ (only used for negative terms)
84 cmn r2, #1
85 beq term_minus_1
86 cmn r2, #2
87 beq term_minus_2
88 cmn r2, #3
89 beq term_minus_3
90 b common_exit
91
92/*
93 ******************************************************************************
94 * Loop to handle term = 17 condition
95 *
96 * r0 = dpp->weight_B r8 = previous left sample
97 * r1 = bptr r9 =
98 * r2 = current sample r10 = second previous left sample
99 * r3 = previous right sample r11 = 512 (for rounding)
100 * r4 = dpp->weight_A ip = current decorrelation value
101 * r5 = dpp sp =
102 * r6 = dpp->delta lr = second previous right sample
103 * r7 = eptr pc =
104 *******************************************************************************
105 */
106
107term_17_loop:
108 rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
109 mov lr, r8 @ previous becomes 2nd previous
110 ldr r2, [r1], #4 @ get sample & update pointer
111 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
112 add r8, r2, r8, asr #10 @ shift, and add to new sample
113 strne r8, [r1, #-4] @ if change possible, store sample back
114 cmpne r2, #0
115 beq .L325
116 teq ip, r2 @ update weight based on signs
117 submi r4, r4, r6
118 addpl r4, r4, r6
119
120.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
121 mov r10, r3
122 ldr r2, [r1], #4
123 mla r3, ip, r0, r11
124 add r3, r2, r3, asr #10
125 strne r3, [r1, #-4]
126 cmpne r2, #0
127 beq .L329
128 teq ip, r2
129 submi r0, r0, r6
130 addpl r0, r0, r6
131
132.L329: cmp r7, r1 @ loop back if more samples to do
133 bhi term_17_loop
134 b store_1718 @ common exit for terms 17 & 18
135
136/*
137 ******************************************************************************
138 * Loop to handle term = 18 condition
139 *
140 * r0 = dpp->weight_B r8 = previous left sample
141 * r1 = bptr r9 =
142 * r2 = current sample r10 = second previous left sample
143 * r3 = previous right sample r11 = 512 (for rounding)
144 * r4 = dpp->weight_A ip = decorrelation value
145 * r5 = dpp sp =
146 * r6 = dpp->delta lr = second previous right sample
147 * r7 = eptr pc =
148 *******************************************************************************
149 */
150
151term_18_loop:
152 sub ip, r8, lr @ decorr value =
153 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
154 adds ip, r8, ip, asr #1
155 ldr r2, [r1], #4 @ get sample & update pointer
156 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
157 add r8, r2, r8, asr #10 @ shift, and add to new sample
158 strne r8, [r1, #-4] @ if change possible, store sample back
159 cmpne r2, #0
160 beq .L337
161 teq ip, r2 @ update weight based on signs
162 submi r4, r4, r6
163 addpl r4, r4, r6
164
165.L337: sub ip, r3, r10 @ do same thing for right channel
166 mov r10, r3
167 adds ip, r3, ip, asr #1
168 ldr r2, [r1], #4
169 mla r3, ip, r0, r11
170 add r3, r2, r3, asr #10
171 strne r3, [r1, #-4]
172 cmpne r2, #0
173 beq .L341
174 teq ip, r2
175 submi r0, r0, r6
176 addpl r0, r0, r6
177
178.L341: cmp r7, r1 @ loop back if more samples to do
179 bhi term_18_loop
180
181/* common exit for terms 17 & 18 */
182
183store_1718:
184 str r3, [r5, #40] @ store sample history into struct
185 str r8, [r5, #8]
186 str r10, [r5, #44]
187 str lr, [r5, #12]
188 b common_exit @ and return
189
190/*
191 ******************************************************************************
192 * Loop to handle term = 2 condition
193 * (note that this case can be handled by the default term handler (1-8), but
194 * this special case is faster because it doesn't have to read memory twice)
195 *
196 * r0 = dpp->weight_B r8 = previous left sample
197 * r1 = bptr r9 =
198 * r2 = current sample r10 = second previous left sample
199 * r3 = previous right sample r11 = 512 (for rounding)
200 * r4 = dpp->weight_A ip = decorrelation value
201 * r5 = dpp sp =
202 * r6 = dpp->delta lr = second previous right sample
203 * r7 = eptr pc =
204 *******************************************************************************
205 */
206
207term_2_loop:
208 movs ip, lr @ get decorrelation value & test
209 mov lr, r8 @ previous becomes 2nd previous
210 ldr r2, [r1], #4 @ get sample & update pointer
211 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
212 add r8, r2, r8, asr #10 @ shift, and add to new sample
213 strne r8, [r1, #-4] @ if change possible, store sample back
214 cmpne r2, #0
215 beq .L225
216 teq ip, r2 @ update weight based on signs
217 submi r4, r4, r6
218 addpl r4, r4, r6
219
220.L225: movs ip, r10 @ do same thing for right channel
221 mov r10, r3
222 ldr r2, [r1], #4
223 mla r3, ip, r0, r11
224 add r3, r2, r3, asr #10
225 strne r3, [r1, #-4]
226 cmpne r2, #0
227 beq .L229
228 teq ip, r2
229 submi r0, r0, r6
230 addpl r0, r0, r6
231
232.L229: cmp r7, r1 @ loop back if more samples to do
233 bhi term_2_loop
234 b default_term_exit @ this exit updates all dpp->samples
235
236/*
237 ******************************************************************************
238 * Loop to handle default term condition
239 *
240 * r0 = dpp->weight_B r8 = result accumulator
241 * r1 = bptr r9 =
242 * r2 = dpp->term r10 =
243 * r3 = decorrelation value r11 = 512 (for rounding)
244 * r4 = dpp->weight_A ip = current sample
245 * r5 = dpp sp =
246 * r6 = dpp->delta lr =
247 * r7 = eptr pc =
248 *******************************************************************************
249 */
250
251term_default_loop:
252 ldr ip, [r1] @ get original sample
253 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
254 mla r8, r3, r4, r11 @ mult decorr value by weight, round,
255 add r8, ip, r8, asr #10 @ shift and add to new sample
256 str r8, [r1], #4 @ store update sample
257 cmp r3, #0
258 cmpne ip, #0
259 beq .L350
260 teq ip, r3 @ update weight based on signs
261 submi r4, r4, r6
262 addpl r4, r4, r6
263
264.L350: ldr ip, [r1] @ do the same thing for right channel
265 ldr r3, [r1, -r2, asl #3]
266 mla r8, r3, r0, r11
267 add r8, ip, r8, asr #10
268 str r8, [r1], #4
269 cmp r3, #0
270 cmpne ip, #0
271 beq .L354
272 teq ip, r3
273 submi r0, r0, r6
274 addpl r0, r0, r6
275
276.L354: cmp r7, r1 @ loop back if more samples to do
277 bhi term_default_loop
278
279/*
280 * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
281 * structure (even if they are not all used for the given term)
282 */
283
284default_term_exit:
285 ldrsh r3, [r5, #0]
286 sub ip, r3, #1
287 mov lr, #7
288
289.L358: and r3, ip, #7
290 add r3, r5, r3, asl #2
291 ldr r2, [r1, #-4]
292 str r2, [r3, #40]
293 ldr r2, [r1, #-8]!
294 str r2, [r3, #8]
295 sub ip, ip, #1
296 sub lr, lr, #1
297 cmn lr, #1
298 bne .L358
299 b common_exit
300
301/*
302 ******************************************************************************
303 * Loop to handle term = -1 condition
304 *
305 * r0 = dpp->weight_B r8 =
306 * r1 = bptr r9 =
307 * r2 = intermediate result r10 = -1024 (for clipping)
308 * r3 = previous right sample r11 = 512 (for rounding)
309 * r4 = dpp->weight_A ip = current sample
310 * r5 = dpp sp =
311 * r6 = dpp->delta lr = updated left sample
312 * r7 = eptr pc =
313 *******************************************************************************
314 */
315
316term_minus_1:
317 ldr r3, [r1, #-4]
318
319term_minus_1_loop:
320 ldr ip, [r1] @ for left channel the decorrelation value
321 mla r2, r3, r4, r11 @ is the previous right sample (in r3)
322 add lr, ip, r2, asr #10
323 str lr, [r1], #8
324 cmp r3, #0
325 cmpne ip, #0
326 beq .L361
327 teq ip, r3 @ update weight based on signs
328 submi r4, r4, r6
329 addpl r4, r4, r6
330 cmp r4, #1024
331 movgt r4, #1024
332 cmp r4, r10
333 movlt r4, r10
334
335.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
336 mla r3, lr, r0, r11 @ is the just updated right sample (in lr)
337 add r3, r2, r3, asr #10
338 str r3, [r1, #-4]
339 cmp lr, #0
340 cmpne r2, #0
341 beq .L369
342 teq r2, lr
343 submi r0, r0, r6
344 addpl r0, r0, r6
345 cmp r0, #1024 @ then clip weight to +/-1024
346 movgt r0, #1024
347 cmp r0, r10
348 movlt r0, r10
349
350.L369: cmp r7, r1 @ loop back if more samples to do
351 bhi term_minus_1_loop
352
353 str r3, [r5, #8] @ else store right sample and exit
354 b common_exit
355
356/*
357 ******************************************************************************
358 * Loop to handle term = -2 condition
359 * (note that the channels are processed in the reverse order here)
360 *
361 * r0 = dpp->weight_B r8 =
362 * r1 = bptr r9 =
363 * r2 = intermediate result r10 = -1024 (for clipping)
364 * r3 = previous left sample r11 = 512 (for rounding)
365 * r4 = dpp->weight_A ip = current sample
366 * r5 = dpp sp =
367 * r6 = dpp->delta lr = updated right sample
368 * r7 = eptr pc =
369 *******************************************************************************
370 */
371
372term_minus_2:
373 ldr r3, [r1, #-8]
374
375term_minus_2_loop:
376 ldr ip, [r1, #4] @ for right channel the decorrelation value
377 mla r2, r3, r0, r11 @ is the previous left sample (in r3)
378 add lr, ip, r2, asr #10
379 str lr, [r1, #4]
380 cmp r3, #0
381 cmpne ip, #0
382 beq .L380
383 teq ip, r3 @ update weight based on signs
384 submi r0, r0, r6
385 addpl r0, r0, r6
386 cmp r0, #1024 @ then clip weight to +/-1024
387 movgt r0, #1024
388 cmp r0, r10
389 movlt r0, r10
390
391.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
392 mla r3, lr, r4, r11 @ is the just updated left sample (in lr)
393 add r3, r2, r3, asr #10
394 str r3, [r1], #8
395 cmp lr, #0
396 cmpne r2, #0
397 beq .L388
398 teq r2, lr
399 submi r4, r4, r6
400 addpl r4, r4, r6
401 cmp r4, #1024
402 movgt r4, #1024
403 cmp r4, r10
404 movlt r4, r10
405
406.L388: cmp r7, r1 @ loop back if more samples to do
407 bhi term_minus_2_loop
408
409 str r3, [r5, #40] @ else store left channel and exit
410 b common_exit
411
412/*
413 ******************************************************************************
414 * Loop to handle term = -3 condition
415 *
416 * r0 = dpp->weight_B r8 = previous left sample
417 * r1 = bptr r9 =
418 * r2 = current left sample r10 = -1024 (for clipping)
419 * r3 = previous right sample r11 = 512 (for rounding)
420 * r4 = dpp->weight_A ip = intermediate result
421 * r5 = dpp sp =
422 * r6 = dpp->delta lr =
423 * r7 = eptr pc =
424 *******************************************************************************
425 */
426
427term_minus_3:
428 ldr r3, [r1, #-4] @ load previous samples
429 ldr r8, [r1, #-8]
430
431term_minus_3_loop:
432 ldr ip, [r1]
433 mla r2, r3, r4, r11
434 add r2, ip, r2, asr #10
435 str r2, [r1], #4
436 cmp r3, #0
437 cmpne ip, #0
438 beq .L399
439 teq ip, r3 @ update weight based on signs
440 submi r4, r4, r6
441 addpl r4, r4, r6
442 cmp r4, #1024 @ then clip weight to +/-1024
443 movgt r4, #1024
444 cmp r4, r10
445 movlt r4, r10
446
447.L399: movs ip, r8 @ ip = previous left we use now
448 mov r8, r2 @ r8 = current left we use next time
449 ldr r2, [r1], #4
450 mla r3, ip, r0, r11
451 add r3, r2, r3, asr #10
452 strne r3, [r1, #-4]
453 cmpne r2, #0
454 beq .L407
455 teq ip, r2
456 submi r0, r0, r6
457 addpl r0, r0, r6
458 cmp r0, #1024
459 movgt r0, #1024
460 cmp r0, r10
461 movlt r0, r10
462
463.L407: cmp r7, r1 @ loop back if more samples to do
464 bhi term_minus_3_loop
465
466 str r3, [r5, #8] @ else store previous samples & exit
467 str r8, [r5, #40]
468
469/*
470 * Before finally exiting we must store weights back for next time
471 */
472
473common_exit:
474 strh r4, [r5, #4]
475 strh r0, [r5, #6]
476 ldmpc regs="r4-r8, r10-r11"
477