summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/libwavpack/SOURCES3
-rw-r--r--apps/codecs/libwavpack/arm.S474
-rw-r--r--apps/codecs/libwavpack/unpack.c6
3 files changed, 482 insertions, 1 deletions
diff --git a/apps/codecs/libwavpack/SOURCES b/apps/codecs/libwavpack/SOURCES
index f63c55a87a..8e38767ec6 100644
--- a/apps/codecs/libwavpack/SOURCES
+++ b/apps/codecs/libwavpack/SOURCES
@@ -8,4 +8,7 @@ wputils.c
8#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 8#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
9coldfire.S 9coldfire.S
10#endif 10#endif
11#if defined(CPU_ARM) && !defined(SIMULATOR)
12arm.S
13#endif
11 14
diff --git a/apps/codecs/libwavpack/arm.S b/apps/codecs/libwavpack/arm.S
new file mode 100644
index 0000000000..0b92bfccd7
--- /dev/null
+++ b/apps/codecs/libwavpack/arm.S
@@ -0,0 +1,474 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by David Bryant
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20/* This is an assembly optimized version of the following WavPack function:
21 *
22 * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
23 * long *buffer, long sample_count);
24 *
25 * It performs a single pass of stereo decorrelation on the provided buffer.
26 * Note that this version of the function requires that the 8 previous stereo
27 * samples are visible and correct. In other words, it ignores the "samples_*"
28 * fields in the decorr_pass structure and gets the history data directly
29 * from the buffer. It does, however, return the appropriate history samples
30 * to the decorr_pass structure before returning.
31 *
32 * This is written to work on a ARM7TDMI processor. This version only uses the
33 * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
34 * WavPack files. The advanced 64-bit multiply instructions in the ARM will
35 * provide full resolution for this, but are somewhat slower and have not
36 * been included yet.
37 */
38 .text
39 .align
40 .global decorr_stereo_pass_cont_arm
41
42/*
43 * on entry:
44 *
45 * r0 = struct decorr_pass *dpp
46 * r1 = long *buffer
47 * r2 = long sample_count
48 */
49
50decorr_stereo_pass_cont_arm:
51
52 stmfd sp!, {r4 - r8, r10, r11, lr}
53 mov r5, r0 @ r5 = dpp
54 mov r11, #512 @ r11 = 512 for rounding
55 ldrsh r6, [r0, #2] @ r6 = dpp->delta
56 ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
57 ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
58 cmp r2, #0 @ exit if no samples to process
59 beq common_exit
60
61 add r7, r1, r2, asl #3 @ r7 = buffer ending position
62 ldrsh r2, [r5, #0] @ r2 = dpp->term
63 cmp r2, #0
64 bmi minus_term
65
66 ldr lr, [r1, #-16] @ load 2 sample history from buffer
67 ldr r10, [r1, #-12] @ for terms 2, 17, and 18
68 ldr r8, [r1, #-8]
69 ldr r3, [r1, #-4]
70 cmp r2, #17
71 beq term_17_loop
72 cmp r2, #18
73 beq term_18_loop
74 cmp r2, #2
75 beq term_2_loop
76 b term_default_loop @ else handle default (1-8, except 2)
77
78minus_term:
79 mov r10, #1024 @ r10 = -1024 for weight clipping
80 rsb r10, r10, #0 @ (only used for negative terms)
81 cmn r2, #1
82 beq term_minus_1
83 cmn r2, #2
84 beq term_minus_2
85 cmn r2, #3
86 beq term_minus_3
87 b common_exit
88
89/*
90 ******************************************************************************
91 * Loop to handle term = 17 condition
92 *
93 * r0 = dpp->weight_B r8 = previous left sample
94 * r1 = bptr r9 =
95 * r2 = current sample r10 = second previous left sample
96 * r3 = previous right sample r11 = 512 (for rounding)
97 * r4 = dpp->weight_A ip = current decorrelation value
98 * r5 = dpp sp =
99 * r6 = dpp->delta lr = second previous right sample
100 * r7 = eptr pc =
101 *******************************************************************************
102 */
103
104term_17_loop:
105 rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
106 mov lr, r8 @ previous becomes 2nd previous
107 ldr r2, [r1], #4 @ get sample & update pointer
108 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
109 add r8, r2, r8, asr #10 @ shift, and add to new sample
110 strne r8, [r1, #-4] @ if change possible, store sample back
111 cmpne r2, #0
112 beq .L325
113 teq ip, r2 @ update weight based on signs
114 submi r4, r4, r6
115 addpl r4, r4, r6
116
117.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
118 mov r10, r3
119 ldr r2, [r1], #4
120 mla r3, ip, r0, r11
121 add r3, r2, r3, asr #10
122 strne r3, [r1, #-4]
123 cmpne r2, #0
124 beq .L329
125 teq ip, r2
126 submi r0, r0, r6
127 addpl r0, r0, r6
128
129.L329: cmp r7, r1 @ loop back if more samples to do
130 bhi term_17_loop
131 b store_1718 @ common exit for terms 17 & 18
132
133/*
134 ******************************************************************************
135 * Loop to handle term = 18 condition
136 *
137 * r0 = dpp->weight_B r8 = previous left sample
138 * r1 = bptr r9 =
139 * r2 = current sample r10 = second previous left sample
140 * r3 = previous right sample r11 = 512 (for rounding)
141 * r4 = dpp->weight_A ip = decorrelation value
142 * r5 = dpp sp =
143 * r6 = dpp->delta lr = second previous right sample
144 * r7 = eptr pc =
145 *******************************************************************************
146 */
147
148term_18_loop:
149 sub ip, r8, lr @ decorr value =
150 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
151 adds ip, r8, ip, asr #1
152 ldr r2, [r1], #4 @ get sample & update pointer
153 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
154 add r8, r2, r8, asr #10 @ shift, and add to new sample
155 strne r8, [r1, #-4] @ if change possible, store sample back
156 cmpne r2, #0
157 beq .L337
158 teq ip, r2 @ update weight based on signs
159 submi r4, r4, r6
160 addpl r4, r4, r6
161
162.L337: sub ip, r3, r10 @ do same thing for right channel
163 mov r10, r3
164 adds ip, r3, ip, asr #1
165 ldr r2, [r1], #4
166 mla r3, ip, r0, r11
167 add r3, r2, r3, asr #10
168 strne r3, [r1, #-4]
169 cmpne r2, #0
170 beq .L341
171 teq ip, r2
172 submi r0, r0, r6
173 addpl r0, r0, r6
174
175.L341: cmp r7, r1 @ loop back if more samples to do
176 bhi term_18_loop
177
178/* common exit for terms 17 & 18 */
179
180store_1718:
181 str r3, [r5, #40] @ store sample history into struct
182 str r8, [r5, #8]
183 str r10, [r5, #44]
184 str lr, [r5, #12]
185 b common_exit @ and return
186
187/*
188 ******************************************************************************
189 * Loop to handle term = 2 condition
190 * (note that this case can be handled by the default term handler (1-8), but
191 * this special case is faster because it doesn't have to read memory twice)
192 *
193 * r0 = dpp->weight_B r8 = previous left sample
194 * r1 = bptr r9 =
195 * r2 = current sample r10 = second previous left sample
196 * r3 = previous right sample r11 = 512 (for rounding)
197 * r4 = dpp->weight_A ip = decorrelation value
198 * r5 = dpp sp =
199 * r6 = dpp->delta lr = second previous right sample
200 * r7 = eptr pc =
201 *******************************************************************************
202 */
203
204term_2_loop:
205 movs ip, lr @ get decorrelation value & test
206 mov lr, r8 @ previous becomes 2nd previous
207 ldr r2, [r1], #4 @ get sample & update pointer
208 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
209 add r8, r2, r8, asr #10 @ shift, and add to new sample
210 strne r8, [r1, #-4] @ if change possible, store sample back
211 cmpne r2, #0
212 beq .L225
213 teq ip, r2 @ update weight based on signs
214 submi r4, r4, r6
215 addpl r4, r4, r6
216
217.L225: movs ip, r10 @ do same thing for right channel
218 mov r10, r3
219 ldr r2, [r1], #4
220 mla r3, ip, r0, r11
221 add r3, r2, r3, asr #10
222 strne r3, [r1, #-4]
223 cmpne r2, #0
224 beq .L229
225 teq ip, r2
226 submi r0, r0, r6
227 addpl r0, r0, r6
228
229.L229: cmp r7, r1 @ loop back if more samples to do
230 bhi term_2_loop
231 b default_term_exit @ this exit updates all dpp->samples
232
233/*
234 ******************************************************************************
235 * Loop to handle default term condition
236 *
237 * r0 = dpp->weight_B r8 = result accumulator
238 * r1 = bptr r9 =
239 * r2 = dpp->term r10 =
240 * r3 = decorrelation value r11 = 512 (for rounding)
241 * r4 = dpp->weight_A ip = current sample
242 * r5 = dpp sp =
243 * r6 = dpp->delta lr =
244 * r7 = eptr pc =
245 *******************************************************************************
246 */
247
248term_default_loop:
249 ldr ip, [r1] @ get original sample
250 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
251 mla r8, r4, r3, r11 @ mult decorr value by weight, round,
252 add r8, ip, r8, asr #10 @ shift and add to new sample
253 str r8, [r1], #4 @ store update sample
254 cmp r3, #0
255 cmpne ip, #0
256 beq .L350
257 teq ip, r3 @ update weight based on signs
258 submi r4, r4, r6
259 addpl r4, r4, r6
260
261.L350: ldr ip, [r1] @ do the same thing for right channel
262 ldr r3, [r1, -r2, asl #3]
263 mla r8, r0, r3, r11
264 add r8, ip, r8, asr #10
265 str r8, [r1], #4
266 cmp r3, #0
267 cmpne ip, #0
268 beq .L354
269 teq ip, r3
270 submi r0, r0, r6
271 addpl r0, r0, r6
272
273.L354: cmp r7, r1 @ loop back if more samples to do
274 bhi term_default_loop
275
276/*
277 * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
278 * structure (even if they are not all used for the given term)
279 */
280
281default_term_exit:
282 ldrsh r3, [r5, #0]
283 sub ip, r3, #1
284 mov lr, #7
285
286.L358: and r3, ip, #7
287 add r3, r5, r3, asl #2
288 ldr r2, [r1, #-4]
289 str r2, [r3, #40]
290 ldr r2, [r1, #-8]!
291 str r2, [r3, #8]
292 sub ip, ip, #1
293 sub lr, lr, #1
294 cmn lr, #1
295 bne .L358
296 b common_exit
297
298/*
299 ******************************************************************************
300 * Loop to handle term = -1 condition
301 *
302 * r0 = dpp->weight_B r8 =
303 * r1 = bptr r9 =
304 * r2 = intermediate result r10 = -1024 (for clipping)
305 * r3 = previous right sample r11 = 512 (for rounding)
306 * r4 = dpp->weight_A ip = current sample
307 * r5 = dpp sp =
308 * r6 = dpp->delta lr = updated left sample
309 * r7 = eptr pc =
310 *******************************************************************************
311 */
312
313term_minus_1:
314 ldr r3, [r1, #-4]
315
316term_minus_1_loop:
317 ldr ip, [r1] @ for left channel the decorrelation value
318 mla r2, r3, r4, r11 @ is the previous right sample (in r3)
319 add lr, ip, r2, asr #10
320 str lr, [r1], #8
321 cmp r3, #0
322 cmpne ip, #0
323 beq .L361
324 teq ip, r3 @ update weight based on signs
325 submi r4, r4, r6
326 addpl r4, r4, r6
327 cmp r4, #1024
328 movgt r4, #1024
329 cmp r4, r10
330 movlt r4, r10
331
332.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
333 mla r3, r0, lr, r11 @ is the just updated right sample (in lr)
334 add r3, r2, r3, asr #10
335 str r3, [r1, #-4]
336 cmp lr, #0
337 cmpne r2, #0
338 beq .L369
339 teq r2, lr
340 submi r0, r0, r6
341 addpl r0, r0, r6
342 cmp r0, #1024 @ then clip weight to +/-1024
343 movgt r0, #1024
344 cmp r0, r10
345 movlt r0, r10
346
347.L369: cmp r7, r1 @ loop back if more samples to do
348 bhi term_minus_1_loop
349
350 str r3, [r5, #8] @ else store right sample and exit
351 b common_exit
352
353/*
354 ******************************************************************************
355 * Loop to handle term = -2 condition
356 * (note that the channels are processed in the reverse order here)
357 *
358 * r0 = dpp->weight_B r8 =
359 * r1 = bptr r9 =
360 * r2 = intermediate result r10 = -1024 (for clipping)
361 * r3 = previous left sample r11 = 512 (for rounding)
362 * r4 = dpp->weight_A ip = current sample
363 * r5 = dpp sp =
364 * r6 = dpp->delta lr = updated right sample
365 * r7 = eptr pc =
366 *******************************************************************************
367 */
368
369term_minus_2:
370 ldr r3, [r1, #-8]
371
372term_minus_2_loop:
373 ldr ip, [r1, #4] @ for right channel the decorrelation value
374 mla r2, r3, r0, r11 @ is the previous left sample (in r3)
375 add lr, ip, r2, asr #10
376 str lr, [r1, #4]
377 cmp r3, #0
378 cmpne ip, #0
379 beq .L380
380 teq ip, r3 @ update weight based on signs
381 submi r0, r0, r6
382 addpl r0, r0, r6
383 cmp r0, #1024 @ then clip weight to +/-1024
384 movgt r0, #1024
385 cmp r0, r10
386 movlt r0, r10
387
388.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
389 mla r3, r4, lr, r11 @ is the just updated left sample (in lr)
390 add r3, r2, r3, asr #10
391 str r3, [r1], #8
392 cmp lr, #0
393 cmpne r2, #0
394 beq .L388
395 teq r2, lr
396 submi r4, r4, r6
397 addpl r4, r4, r6
398 cmp r4, #1024
399 movgt r4, #1024
400 cmp r4, r10
401 movlt r4, r10
402
403.L388: cmp r7, r1 @ loop back if more samples to do
404 bhi term_minus_2_loop
405
406 str r3, [r5, #40] @ else store left channel and exit
407 b common_exit
408
409/*
410 ******************************************************************************
411 * Loop to handle term = -3 condition
412 *
413 * r0 = dpp->weight_B r8 = previous left sample
414 * r1 = bptr r9 =
415 * r2 = current left sample r10 = -1024 (for clipping)
416 * r3 = previous right sample r11 = 512 (for rounding)
417 * r4 = dpp->weight_A ip = intermediate result
418 * r5 = dpp sp =
419 * r6 = dpp->delta lr =
420 * r7 = eptr pc =
421 *******************************************************************************
422 */
423
424term_minus_3:
425 ldr r3, [r1, #-4] @ load previous samples
426 ldr r8, [r1, #-8]
427
428term_minus_3_loop:
429 ldr ip, [r1]
430 mla r2, r3, r4, r11
431 add r2, ip, r2, asr #10
432 str r2, [r1], #4
433 cmp r3, #0
434 cmpne ip, #0
435 beq .L399
436 teq ip, r3 @ update weight based on signs
437 submi r4, r4, r6
438 addpl r4, r4, r6
439 cmp r4, #1024 @ then clip weight to +/-1024
440 movgt r4, #1024
441 cmp r4, r10
442 movlt r4, r10
443
444.L399: movs ip, r8 @ ip = previous left we use now
445 mov r8, r2 @ r8 = current left we use next time
446 ldr r2, [r1], #4
447 mla r3, ip, r0, r11
448 add r3, r2, r3, asr #10
449 strne r3, [r1, #-4]
450 cmpne r2, #0
451 beq .L407
452 teq ip, r2
453 submi r0, r0, r6
454 addpl r0, r0, r6
455 cmp r0, #1024
456 movgt r0, #1024
457 cmp r0, r10
458 movlt r0, r10
459
460.L407: cmp r7, r1 @ loop back if more samples to do
461 bhi term_minus_3_loop
462
463 str r3, [r5, #8] @ else store previous samples & exit
464 str r8, [r5, #40]
465
466/*
467 * Before finally exiting we must store weights back for next time
468 */
469
470common_exit:
471 strh r4, [r5, #4]
472 strh r0, [r5, #6]
473 ldmfd sp!, {r4 - r8, r10, r11, pc}
474
diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c
index 8f5c1ee46f..0c61e0e38a 100644
--- a/apps/codecs/libwavpack/unpack.c
+++ b/apps/codecs/libwavpack/unpack.c
@@ -288,6 +288,8 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd)
288 288
289#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 289#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
290extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); 290extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count);
291#elif defined(CPU_ARM) && !defined(SIMULATOR)
292extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, long *buffer, long sample_count);
291#else 293#else
292static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); 294static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count);
293#endif 295#endif
@@ -350,6 +352,8 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
350 decorr_stereo_pass (dpp, buffer, 8); 352 decorr_stereo_pass (dpp, buffer, 8);
351#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 353#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
352 decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); 354 decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8);
355#elif defined(CPU_ARM) && !defined(SIMULATOR)
356 decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8);
353#else 357#else
354 decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); 358 decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
355#endif 359#endif
@@ -510,7 +514,7 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long samp
510 dpp->weight_B = weight_B; 514 dpp->weight_B = weight_B;
511} 515}
512 516
513#if !defined(CPU_COLDFIRE) || defined(SIMULATOR) 517#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR)
514 518
515static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count) 519static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count)
516{ 520{