summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Bryant <bryant@rockbox.org>2006-02-23 20:53:59 +0000
committerDave Bryant <bryant@rockbox.org>2006-02-23 20:53:59 +0000
commitf0d1c96ee435e03af0c92aa5ac5260499ae589ed (patch)
tree48ad35f7a5bff47eae27c7488bc32be0e889bd86
parenteeec278d21ae258da9108bbbccf04d977c3d3bfa (diff)
downloadrockbox-f0d1c96ee435e03af0c92aa5ac5260499ae589ed.tar.gz
rockbox-f0d1c96ee435e03af0c92aa5ac5260499ae589ed.zip
Optimization of WavPack decoding in ARM assembler (for iPods). This allows WavPack files encoded in "high" mode to
play without skipping, although it's still rather marginal (i.e. can't play with other DSP effects enabled). For now this will not work with 24-bit files either, although that is coming along. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8814 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwavpack/SOURCES3
-rw-r--r--apps/codecs/libwavpack/arm.S474
-rw-r--r--apps/codecs/libwavpack/unpack.c6
3 files changed, 482 insertions, 1 deletions
diff --git a/apps/codecs/libwavpack/SOURCES b/apps/codecs/libwavpack/SOURCES
index f63c55a87a..8e38767ec6 100644
--- a/apps/codecs/libwavpack/SOURCES
+++ b/apps/codecs/libwavpack/SOURCES
@@ -8,4 +8,7 @@ wputils.c
8#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 8#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
9coldfire.S 9coldfire.S
10#endif 10#endif
11#if defined(CPU_ARM) && !defined(SIMULATOR)
12arm.S
13#endif
11 14
diff --git a/apps/codecs/libwavpack/arm.S b/apps/codecs/libwavpack/arm.S
new file mode 100644
index 0000000000..0b92bfccd7
--- /dev/null
+++ b/apps/codecs/libwavpack/arm.S
@@ -0,0 +1,474 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 by David Bryant
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20/* This is an assembly optimized version of the following WavPack function:
21 *
22 * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
23 * long *buffer, long sample_count);
24 *
25 * It performs a single pass of stereo decorrelation on the provided buffer.
26 * Note that this version of the function requires that the 8 previous stereo
27 * samples are visible and correct. In other words, it ignores the "samples_*"
28 * fields in the decorr_pass structure and gets the history data directly
29 * from the buffer. It does, however, return the appropriate history samples
30 * to the decorr_pass structure before returning.
31 *
32 * This is written to work on a ARM7TDMI processor. This version only uses the
33 * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
34 * WavPack files. The advanced 64-bit multiply instructions in the ARM will
35 * provide full resolution for this, but are somewhat slower and have not
36 * been included yet.
37 */
38 .text
39 .align
40 .global decorr_stereo_pass_cont_arm
41
42/*
43 * on entry:
44 *
45 * r0 = struct decorr_pass *dpp
46 * r1 = long *buffer
47 * r2 = long sample_count
48 */
49
50decorr_stereo_pass_cont_arm:
51
52 stmfd sp!, {r4 - r8, r10, r11, lr}
53 mov r5, r0 @ r5 = dpp
54 mov r11, #512 @ r11 = 512 for rounding
55 ldrsh r6, [r0, #2] @ r6 = dpp->delta
56 ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
57 ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
58 cmp r2, #0 @ exit if no samples to process
59 beq common_exit
60
61 add r7, r1, r2, asl #3 @ r7 = buffer ending position
62 ldrsh r2, [r5, #0] @ r2 = dpp->term
63 cmp r2, #0
64 bmi minus_term
65
66 ldr lr, [r1, #-16] @ load 2 sample history from buffer
67 ldr r10, [r1, #-12] @ for terms 2, 17, and 18
68 ldr r8, [r1, #-8]
69 ldr r3, [r1, #-4]
70 cmp r2, #17
71 beq term_17_loop
72 cmp r2, #18
73 beq term_18_loop
74 cmp r2, #2
75 beq term_2_loop
76 b term_default_loop @ else handle default (1-8, except 2)
77
78minus_term:
79 mov r10, #1024 @ r10 = -1024 for weight clipping
80 rsb r10, r10, #0 @ (only used for negative terms)
81 cmn r2, #1
82 beq term_minus_1
83 cmn r2, #2
84 beq term_minus_2
85 cmn r2, #3
86 beq term_minus_3
87 b common_exit
88
89/*
90 ******************************************************************************
91 * Loop to handle term = 17 condition
92 *
93 * r0 = dpp->weight_B r8 = previous left sample
94 * r1 = bptr r9 =
95 * r2 = current sample r10 = second previous left sample
96 * r3 = previous right sample r11 = 512 (for rounding)
97 * r4 = dpp->weight_A ip = current decorrelation value
98 * r5 = dpp sp =
99 * r6 = dpp->delta lr = second previous right sample
100 * r7 = eptr pc =
101 *******************************************************************************
102 */
103
104term_17_loop:
105 rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
106 mov lr, r8 @ previous becomes 2nd previous
107 ldr r2, [r1], #4 @ get sample & update pointer
108 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
109 add r8, r2, r8, asr #10 @ shift, and add to new sample
110 strne r8, [r1, #-4] @ if change possible, store sample back
111 cmpne r2, #0
112 beq .L325
113 teq ip, r2 @ update weight based on signs
114 submi r4, r4, r6
115 addpl r4, r4, r6
116
117.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
118 mov r10, r3
119 ldr r2, [r1], #4
120 mla r3, ip, r0, r11
121 add r3, r2, r3, asr #10
122 strne r3, [r1, #-4]
123 cmpne r2, #0
124 beq .L329
125 teq ip, r2
126 submi r0, r0, r6
127 addpl r0, r0, r6
128
129.L329: cmp r7, r1 @ loop back if more samples to do
130 bhi term_17_loop
131 b store_1718 @ common exit for terms 17 & 18
132
133/*
134 ******************************************************************************
135 * Loop to handle term = 18 condition
136 *
137 * r0 = dpp->weight_B r8 = previous left sample
138 * r1 = bptr r9 =
139 * r2 = current sample r10 = second previous left sample
140 * r3 = previous right sample r11 = 512 (for rounding)
141 * r4 = dpp->weight_A ip = decorrelation value
142 * r5 = dpp sp =
143 * r6 = dpp->delta lr = second previous right sample
144 * r7 = eptr pc =
145 *******************************************************************************
146 */
147
148term_18_loop:
149 sub ip, r8, lr @ decorr value =
150 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
151 adds ip, r8, ip, asr #1
152 ldr r2, [r1], #4 @ get sample & update pointer
153 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
154 add r8, r2, r8, asr #10 @ shift, and add to new sample
155 strne r8, [r1, #-4] @ if change possible, store sample back
156 cmpne r2, #0
157 beq .L337
158 teq ip, r2 @ update weight based on signs
159 submi r4, r4, r6
160 addpl r4, r4, r6
161
162.L337: sub ip, r3, r10 @ do same thing for right channel
163 mov r10, r3
164 adds ip, r3, ip, asr #1
165 ldr r2, [r1], #4
166 mla r3, ip, r0, r11
167 add r3, r2, r3, asr #10
168 strne r3, [r1, #-4]
169 cmpne r2, #0
170 beq .L341
171 teq ip, r2
172 submi r0, r0, r6
173 addpl r0, r0, r6
174
175.L341: cmp r7, r1 @ loop back if more samples to do
176 bhi term_18_loop
177
178/* common exit for terms 17 & 18 */
179
180store_1718:
181 str r3, [r5, #40] @ store sample history into struct
182 str r8, [r5, #8]
183 str r10, [r5, #44]
184 str lr, [r5, #12]
185 b common_exit @ and return
186
187/*
188 ******************************************************************************
189 * Loop to handle term = 2 condition
190 * (note that this case can be handled by the default term handler (1-8), but
191 * this special case is faster because it doesn't have to read memory twice)
192 *
193 * r0 = dpp->weight_B r8 = previous left sample
194 * r1 = bptr r9 =
195 * r2 = current sample r10 = second previous left sample
196 * r3 = previous right sample r11 = 512 (for rounding)
197 * r4 = dpp->weight_A ip = decorrelation value
198 * r5 = dpp sp =
199 * r6 = dpp->delta lr = second previous right sample
200 * r7 = eptr pc =
201 *******************************************************************************
202 */
203
204term_2_loop:
205 movs ip, lr @ get decorrelation value & test
206 mov lr, r8 @ previous becomes 2nd previous
207 ldr r2, [r1], #4 @ get sample & update pointer
208 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
209 add r8, r2, r8, asr #10 @ shift, and add to new sample
210 strne r8, [r1, #-4] @ if change possible, store sample back
211 cmpne r2, #0
212 beq .L225
213 teq ip, r2 @ update weight based on signs
214 submi r4, r4, r6
215 addpl r4, r4, r6
216
217.L225: movs ip, r10 @ do same thing for right channel
218 mov r10, r3
219 ldr r2, [r1], #4
220 mla r3, ip, r0, r11
221 add r3, r2, r3, asr #10
222 strne r3, [r1, #-4]
223 cmpne r2, #0
224 beq .L229
225 teq ip, r2
226 submi r0, r0, r6
227 addpl r0, r0, r6
228
229.L229: cmp r7, r1 @ loop back if more samples to do
230 bhi term_2_loop
231 b default_term_exit @ this exit updates all dpp->samples
232
233/*
234 ******************************************************************************
235 * Loop to handle default term condition
236 *
237 * r0 = dpp->weight_B r8 = result accumulator
238 * r1 = bptr r9 =
239 * r2 = dpp->term r10 =
240 * r3 = decorrelation value r11 = 512 (for rounding)
241 * r4 = dpp->weight_A ip = current sample
242 * r5 = dpp sp =
243 * r6 = dpp->delta lr =
244 * r7 = eptr pc =
245 *******************************************************************************
246 */
247
248term_default_loop:
249 ldr ip, [r1] @ get original sample
250 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
251 mla r8, r4, r3, r11 @ mult decorr value by weight, round,
252 add r8, ip, r8, asr #10 @ shift and add to new sample
253 str r8, [r1], #4 @ store update sample
254 cmp r3, #0
255 cmpne ip, #0
256 beq .L350
257 teq ip, r3 @ update weight based on signs
258 submi r4, r4, r6
259 addpl r4, r4, r6
260
261.L350: ldr ip, [r1] @ do the same thing for right channel
262 ldr r3, [r1, -r2, asl #3]
263 mla r8, r0, r3, r11
264 add r8, ip, r8, asr #10
265 str r8, [r1], #4
266 cmp r3, #0
267 cmpne ip, #0
268 beq .L354
269 teq ip, r3
270 submi r0, r0, r6
271 addpl r0, r0, r6
272
273.L354: cmp r7, r1 @ loop back if more samples to do
274 bhi term_default_loop
275
276/*
277 * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
278 * structure (even if they are not all used for the given term)
279 */
280
281default_term_exit:
282 ldrsh r3, [r5, #0]
283 sub ip, r3, #1
284 mov lr, #7
285
286.L358: and r3, ip, #7
287 add r3, r5, r3, asl #2
288 ldr r2, [r1, #-4]
289 str r2, [r3, #40]
290 ldr r2, [r1, #-8]!
291 str r2, [r3, #8]
292 sub ip, ip, #1
293 sub lr, lr, #1
294 cmn lr, #1
295 bne .L358
296 b common_exit
297
298/*
299 ******************************************************************************
300 * Loop to handle term = -1 condition
301 *
302 * r0 = dpp->weight_B r8 =
303 * r1 = bptr r9 =
304 * r2 = intermediate result r10 = -1024 (for clipping)
305 * r3 = previous right sample r11 = 512 (for rounding)
306 * r4 = dpp->weight_A ip = current sample
307 * r5 = dpp sp =
308 * r6 = dpp->delta lr = updated left sample
309 * r7 = eptr pc =
310 *******************************************************************************
311 */
312
313term_minus_1:
314 ldr r3, [r1, #-4]
315
316term_minus_1_loop:
317 ldr ip, [r1] @ for left channel the decorrelation value
318 mla r2, r3, r4, r11 @ is the previous right sample (in r3)
319 add lr, ip, r2, asr #10
320 str lr, [r1], #8
321 cmp r3, #0
322 cmpne ip, #0
323 beq .L361
324 teq ip, r3 @ update weight based on signs
325 submi r4, r4, r6
326 addpl r4, r4, r6
327 cmp r4, #1024
328 movgt r4, #1024
329 cmp r4, r10
330 movlt r4, r10
331
332.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
333 mla r3, r0, lr, r11 @ is the just updated right sample (in lr)
334 add r3, r2, r3, asr #10
335 str r3, [r1, #-4]
336 cmp lr, #0
337 cmpne r2, #0
338 beq .L369
339 teq r2, lr
340 submi r0, r0, r6
341 addpl r0, r0, r6
342 cmp r0, #1024 @ then clip weight to +/-1024
343 movgt r0, #1024
344 cmp r0, r10
345 movlt r0, r10
346
347.L369: cmp r7, r1 @ loop back if more samples to do
348 bhi term_minus_1_loop
349
350 str r3, [r5, #8] @ else store right sample and exit
351 b common_exit
352
353/*
354 ******************************************************************************
355 * Loop to handle term = -2 condition
356 * (note that the channels are processed in the reverse order here)
357 *
358 * r0 = dpp->weight_B r8 =
359 * r1 = bptr r9 =
360 * r2 = intermediate result r10 = -1024 (for clipping)
361 * r3 = previous left sample r11 = 512 (for rounding)
362 * r4 = dpp->weight_A ip = current sample
363 * r5 = dpp sp =
364 * r6 = dpp->delta lr = updated right sample
365 * r7 = eptr pc =
366 *******************************************************************************
367 */
368
369term_minus_2:
370 ldr r3, [r1, #-8]
371
372term_minus_2_loop:
373 ldr ip, [r1, #4] @ for right channel the decorrelation value
374 mla r2, r3, r0, r11 @ is the previous left sample (in r3)
375 add lr, ip, r2, asr #10
376 str lr, [r1, #4]
377 cmp r3, #0
378 cmpne ip, #0
379 beq .L380
380 teq ip, r3 @ update weight based on signs
381 submi r0, r0, r6
382 addpl r0, r0, r6
383 cmp r0, #1024 @ then clip weight to +/-1024
384 movgt r0, #1024
385 cmp r0, r10
386 movlt r0, r10
387
388.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
389 mla r3, r4, lr, r11 @ is the just updated left sample (in lr)
390 add r3, r2, r3, asr #10
391 str r3, [r1], #8
392 cmp lr, #0
393 cmpne r2, #0
394 beq .L388
395 teq r2, lr
396 submi r4, r4, r6
397 addpl r4, r4, r6
398 cmp r4, #1024
399 movgt r4, #1024
400 cmp r4, r10
401 movlt r4, r10
402
403.L388: cmp r7, r1 @ loop back if more samples to do
404 bhi term_minus_2_loop
405
406 str r3, [r5, #40] @ else store left channel and exit
407 b common_exit
408
409/*
410 ******************************************************************************
411 * Loop to handle term = -3 condition
412 *
413 * r0 = dpp->weight_B r8 = previous left sample
414 * r1 = bptr r9 =
415 * r2 = current left sample r10 = -1024 (for clipping)
416 * r3 = previous right sample r11 = 512 (for rounding)
417 * r4 = dpp->weight_A ip = intermediate result
418 * r5 = dpp sp =
419 * r6 = dpp->delta lr =
420 * r7 = eptr pc =
421 *******************************************************************************
422 */
423
424term_minus_3:
425 ldr r3, [r1, #-4] @ load previous samples
426 ldr r8, [r1, #-8]
427
428term_minus_3_loop:
429 ldr ip, [r1]
430 mla r2, r3, r4, r11
431 add r2, ip, r2, asr #10
432 str r2, [r1], #4
433 cmp r3, #0
434 cmpne ip, #0
435 beq .L399
436 teq ip, r3 @ update weight based on signs
437 submi r4, r4, r6
438 addpl r4, r4, r6
439 cmp r4, #1024 @ then clip weight to +/-1024
440 movgt r4, #1024
441 cmp r4, r10
442 movlt r4, r10
443
444.L399: movs ip, r8 @ ip = previous left we use now
445 mov r8, r2 @ r8 = current left we use next time
446 ldr r2, [r1], #4
447 mla r3, ip, r0, r11
448 add r3, r2, r3, asr #10
449 strne r3, [r1, #-4]
450 cmpne r2, #0
451 beq .L407
452 teq ip, r2
453 submi r0, r0, r6
454 addpl r0, r0, r6
455 cmp r0, #1024
456 movgt r0, #1024
457 cmp r0, r10
458 movlt r0, r10
459
460.L407: cmp r7, r1 @ loop back if more samples to do
461 bhi term_minus_3_loop
462
463 str r3, [r5, #8] @ else store previous samples & exit
464 str r8, [r5, #40]
465
466/*
467 * Before finally exiting we must store weights back for next time
468 */
469
470common_exit:
471 strh r4, [r5, #4]
472 strh r0, [r5, #6]
473 ldmfd sp!, {r4 - r8, r10, r11, pc}
474
diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c
index 8f5c1ee46f..0c61e0e38a 100644
--- a/apps/codecs/libwavpack/unpack.c
+++ b/apps/codecs/libwavpack/unpack.c
@@ -288,6 +288,8 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd)
288 288
289#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 289#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
290extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); 290extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count);
291#elif defined(CPU_ARM) && !defined(SIMULATOR)
292extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, long *buffer, long sample_count);
291#else 293#else
292static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); 294static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count);
293#endif 295#endif
@@ -350,6 +352,8 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
350 decorr_stereo_pass (dpp, buffer, 8); 352 decorr_stereo_pass (dpp, buffer, 8);
351#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 353#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
352 decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); 354 decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8);
355#elif defined(CPU_ARM) && !defined(SIMULATOR)
356 decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8);
353#else 357#else
354 decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); 358 decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
355#endif 359#endif
@@ -510,7 +514,7 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long samp
510 dpp->weight_B = weight_B; 514 dpp->weight_B = weight_B;
511} 515}
512 516
513#if !defined(CPU_COLDFIRE) || defined(SIMULATOR) 517#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR)
514 518
515static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count) 519static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count)
516{ 520{