diff options
author | Dave Bryant <bryant@rockbox.org> | 2006-02-26 08:22:34 +0000 |
---|---|---|
committer | Dave Bryant <bryant@rockbox.org> | 2006-02-26 08:22:34 +0000 |
commit | 503116240a4c827bda6091207097c7b7e3772467 (patch) | |
tree | cb481f159ad6a90b291e49793e1f97c265e91dd6 /apps/codecs | |
parent | 1c181468107af2565ff6337abf15d8a09d730b45 (diff) | |
download | rockbox-503116240a4c827bda6091207097c7b7e3772467.tar.gz rockbox-503116240a4c827bda6091207097c7b7e3772467.zip |
More WavPack optimizations. Restored 24-bit file playback.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8842 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libwavpack/SOURCES | 1 | ||||
-rw-r--r-- | apps/codecs/libwavpack/arm.S | 12 | ||||
-rw-r--r-- | apps/codecs/libwavpack/arml.S | 502 | ||||
-rw-r--r-- | apps/codecs/libwavpack/unpack.c | 6 |
4 files changed, 513 insertions, 8 deletions
diff --git a/apps/codecs/libwavpack/SOURCES b/apps/codecs/libwavpack/SOURCES index 8e38767ec6..5dcded9112 100644 --- a/apps/codecs/libwavpack/SOURCES +++ b/apps/codecs/libwavpack/SOURCES | |||
@@ -10,5 +10,6 @@ coldfire.S | |||
10 | #endif | 10 | #endif |
11 | #if defined(CPU_ARM) && !defined(SIMULATOR) | 11 | #if defined(CPU_ARM) && !defined(SIMULATOR) |
12 | arm.S | 12 | arm.S |
13 | arml.S | ||
13 | #endif | 14 | #endif |
14 | 15 | ||
diff --git a/apps/codecs/libwavpack/arm.S b/apps/codecs/libwavpack/arm.S index 0b92bfccd7..233bfd3a52 100644 --- a/apps/codecs/libwavpack/arm.S +++ b/apps/codecs/libwavpack/arm.S | |||
@@ -31,9 +31,7 @@ | |||
31 | * | 31 | * |
32 | * This is written to work on a ARM7TDMI processor. This version only uses the | 32 | * This is written to work on a ARM7TDMI processor. This version only uses the |
33 | * 32-bit multiply-accumulate instruction and so will overflow with 24-bit | 33 | * 32-bit multiply-accumulate instruction and so will overflow with 24-bit |
34 | * WavPack files. The advanced 64-bit multiply instructions in the ARM will | 34 | * WavPack files. |
35 | * provide full resolution for this, but are somewhat slower and have not | ||
36 | * been included yet. | ||
37 | */ | 35 | */ |
38 | .text | 36 | .text |
39 | .align | 37 | .align |
@@ -248,7 +246,7 @@ term_2_loop: | |||
248 | term_default_loop: | 246 | term_default_loop: |
249 | ldr ip, [r1] @ get original sample | 247 | ldr ip, [r1] @ get original sample |
250 | ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term | 248 | ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term |
251 | mla r8, r4, r3, r11 @ mult decorr value by weight, round, | 249 | mla r8, r3, r4, r11 @ mult decorr value by weight, round, |
252 | add r8, ip, r8, asr #10 @ shift and add to new sample | 250 | add r8, ip, r8, asr #10 @ shift and add to new sample |
253 | str r8, [r1], #4 @ store update sample | 251 | str r8, [r1], #4 @ store update sample |
254 | cmp r3, #0 | 252 | cmp r3, #0 |
@@ -260,7 +258,7 @@ term_default_loop: | |||
260 | 258 | ||
261 | .L350: ldr ip, [r1] @ do the same thing for right channel | 259 | .L350: ldr ip, [r1] @ do the same thing for right channel |
262 | ldr r3, [r1, -r2, asl #3] | 260 | ldr r3, [r1, -r2, asl #3] |
263 | mla r8, r0, r3, r11 | 261 | mla r8, r3, r0, r11 |
264 | add r8, ip, r8, asr #10 | 262 | add r8, ip, r8, asr #10 |
265 | str r8, [r1], #4 | 263 | str r8, [r1], #4 |
266 | cmp r3, #0 | 264 | cmp r3, #0 |
@@ -330,7 +328,7 @@ term_minus_1_loop: | |||
330 | movlt r4, r10 | 328 | movlt r4, r10 |
331 | 329 | ||
332 | .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value | 330 | .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value |
333 | mla r3, r0, lr, r11 @ is the just updated right sample (in lr) | 331 | mla r3, lr, r0, r11 @ is the just updated right sample (in lr) |
334 | add r3, r2, r3, asr #10 | 332 | add r3, r2, r3, asr #10 |
335 | str r3, [r1, #-4] | 333 | str r3, [r1, #-4] |
336 | cmp lr, #0 | 334 | cmp lr, #0 |
@@ -386,7 +384,7 @@ term_minus_2_loop: | |||
386 | movlt r0, r10 | 384 | movlt r0, r10 |
387 | 385 | ||
388 | .L380: ldr r2, [r1, #0] @ for left channel the decorrelation value | 386 | .L380: ldr r2, [r1, #0] @ for left channel the decorrelation value |
389 | mla r3, r4, lr, r11 @ is the just updated left sample (in lr) | 387 | mla r3, lr, r4, r11 @ is the just updated left sample (in lr) |
390 | add r3, r2, r3, asr #10 | 388 | add r3, r2, r3, asr #10 |
391 | str r3, [r1], #8 | 389 | str r3, [r1], #8 |
392 | cmp lr, #0 | 390 | cmp lr, #0 |
diff --git a/apps/codecs/libwavpack/arml.S b/apps/codecs/libwavpack/arml.S new file mode 100644 index 0000000000..97474f93b9 --- /dev/null +++ b/apps/codecs/libwavpack/arml.S | |||
@@ -0,0 +1,502 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by David Bryant | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | /* This is an assembly optimized version of the following WavPack function: | ||
21 | * | ||
22 | * void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, | ||
23 | * long *buffer, long sample_count); | ||
24 | * | ||
25 | * It performs a single pass of stereo decorrelation on the provided buffer. | ||
26 | * Note that this version of the function requires that the 8 previous stereo | ||
27 | * samples are visible and correct. In other words, it ignores the "samples_*" | ||
28 | * fields in the decorr_pass structure and gets the history data directly | ||
29 | * from the buffer. It does, however, return the appropriate history samples | ||
30 | * to the decorr_pass structure before returning. | ||
31 | * | ||
32 | * This is written to work on a ARM7TDMI processor. This version uses the | ||
33 | * 64-bit multiply-accumulate instruction and so can be used with all | ||
34 | * WavPack files. However, for optimum performance with 16-bit WavPack | ||
35 | * files, there is a faster version that only uses the 32-bit MLA | ||
36 | * instruction. | ||
37 | */ | ||
38 | |||
39 | .text | ||
40 | .align | ||
41 | .global decorr_stereo_pass_cont_arml | ||
42 | |||
43 | /* | ||
44 | * on entry: | ||
45 | * | ||
46 | * r0 = struct decorr_pass *dpp | ||
47 | * r1 = long *buffer | ||
48 | * r2 = long sample_count | ||
49 | */ | ||
50 | |||
51 | decorr_stereo_pass_cont_arml: | ||
52 | |||
53 | stmfd sp!, {r4 - r8, r10, r11, lr} | ||
54 | mov r5, r0 @ r5 = dpp | ||
55 | mov r11, #512 @ r11 = 512 for rounding | ||
56 | ldrsh r6, [r0, #2] @ r6 = dpp->delta | ||
57 | ldrsh r4, [r0, #4] @ r4 = dpp->weight_A | ||
58 | ldrsh r0, [r0, #6] @ r0 = dpp->weight_B | ||
59 | cmp r2, #0 @ exit if no samples to process | ||
60 | beq common_exit | ||
61 | |||
62 | mov r0, r0, asl #18 @ for 64-bit math we use weights << 18 | ||
63 | mov r4, r4, asl #18 | ||
64 | mov r6, r6, asl #18 | ||
65 | add r7, r1, r2, asl #3 @ r7 = buffer ending position | ||
66 | ldrsh r2, [r5, #0] @ r2 = dpp->term | ||
67 | cmp r2, #0 | ||
68 | blt minus_term | ||
69 | |||
70 | ldr lr, [r1, #-16] @ load 2 sample history from buffer | ||
71 | ldr r10, [r1, #-12] @ for terms 2, 17, and 18 | ||
72 | ldr r8, [r1, #-8] | ||
73 | ldr r3, [r1, #-4] | ||
74 | |||
75 | cmp r2, #18 | ||
76 | beq term_18_loop | ||
77 | mov lr, lr, asl #4 | ||
78 | mov r10, r10, asl #4 | ||
79 | cmp r2, #2 | ||
80 | beq term_2_loop | ||
81 | cmp r2, #17 | ||
82 | beq term_17_loop | ||
83 | b term_default_loop | ||
84 | |||
85 | minus_term: | ||
86 | mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping | ||
87 | rsb r10, r10, #0 @ (only used for negative terms) | ||
88 | cmn r2, #1 | ||
89 | beq term_minus_1 | ||
90 | cmn r2, #2 | ||
91 | beq term_minus_2 | ||
92 | cmn r2, #3 | ||
93 | beq term_minus_3 | ||
94 | b common_exit | ||
95 | |||
96 | /* | ||
97 | ****************************************************************************** | ||
98 | * Loop to handle term = 17 condition | ||
99 | * | ||
100 | * r0 = dpp->weight_B r8 = previous left sample | ||
101 | * r1 = bptr r9 = | ||
102 | * r2 = current sample r10 = second previous left sample << 4 | ||
103 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
104 | * r4 = dpp->weight_A ip = current decorrelation value | ||
105 | * r5 = dpp sp = | ||
106 | * r6 = dpp->delta lr = second previous right sample << 4 | ||
107 | * r7 = eptr pc = | ||
108 | ******************************************************************************* | ||
109 | */ | ||
110 | |||
111 | term_17_loop: | ||
112 | rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev | ||
113 | mov lr, r8, asl #4 @ previous becomes 2nd previous | ||
114 | ldr r2, [r1], #4 @ get sample & update pointer | ||
115 | mov r11, #0x80000000 | ||
116 | mov r8, r2 | ||
117 | smlalne r11, r8, r4, ip | ||
118 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
119 | cmpne r2, #0 | ||
120 | beq .L325 | ||
121 | teq ip, r2 @ update weight based on signs | ||
122 | submi r4, r4, r6 | ||
123 | addpl r4, r4, r6 | ||
124 | |||
125 | .L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel | ||
126 | mov r10, r3, asl #4 | ||
127 | ldr r2, [r1], #4 | ||
128 | mov r11, #0x80000000 | ||
129 | mov r3, r2 | ||
130 | smlalne r11, r3, r0, ip | ||
131 | strne r3, [r1, #-4] | ||
132 | cmpne r2, #0 | ||
133 | beq .L329 | ||
134 | teq ip, r2 | ||
135 | submi r0, r0, r6 | ||
136 | addpl r0, r0, r6 | ||
137 | |||
138 | .L329: cmp r7, r1 @ loop back if more samples to do | ||
139 | bhi term_17_loop | ||
140 | mov lr, lr, asr #4 | ||
141 | mov r10, r10, asr #4 | ||
142 | b store_1718 @ common exit for terms 17 & 18 | ||
143 | |||
144 | /* | ||
145 | ****************************************************************************** | ||
146 | * Loop to handle term = 18 condition | ||
147 | * | ||
148 | * r0 = dpp->weight_B r8 = previous left sample | ||
149 | * r1 = bptr r9 = | ||
150 | * r2 = current sample r10 = second previous left sample | ||
151 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
152 | * r4 = dpp->weight_A ip = decorrelation value | ||
153 | * r5 = dpp sp = | ||
154 | * r6 = dpp->delta lr = second previous right sample | ||
155 | * r7 = eptr pc = | ||
156 | ******************************************************************************* | ||
157 | */ | ||
158 | |||
159 | term_18_loop: | ||
160 | rsb ip, lr, r8 @ decorr value = | ||
161 | mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 | ||
162 | add ip, lr, ip, asr #1 | ||
163 | movs ip, ip, asl #4 | ||
164 | ldr r2, [r1], #4 @ get sample & update pointer | ||
165 | mov r11, #0x80000000 | ||
166 | mov r8, r2 | ||
167 | smlalne r11, r8, r4, ip | ||
168 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
169 | cmpne r2, #0 | ||
170 | beq .L337 | ||
171 | teq ip, r2 @ update weight based on signs | ||
172 | submi r4, r4, r6 | ||
173 | addpl r4, r4, r6 | ||
174 | |||
175 | .L337: rsb ip, r10, r3 @ do same thing for right channel | ||
176 | mov r10, r3 | ||
177 | add ip, r10, ip, asr #1 | ||
178 | movs ip, ip, asl #4 | ||
179 | ldr r2, [r1], #4 | ||
180 | mov r11, #0x80000000 | ||
181 | mov r3, r2 | ||
182 | smlalne r11, r3, r0, ip | ||
183 | strne r3, [r1, #-4] | ||
184 | cmpne r2, #0 | ||
185 | beq .L341 | ||
186 | teq ip, r2 | ||
187 | submi r0, r0, r6 | ||
188 | addpl r0, r0, r6 | ||
189 | |||
190 | .L341: cmp r7, r1 @ loop back if more samples to do | ||
191 | bhi term_18_loop | ||
192 | |||
193 | /* common exit for terms 17 & 18 */ | ||
194 | |||
195 | store_1718: | ||
196 | str r3, [r5, #40] @ store sample history into struct | ||
197 | str r8, [r5, #8] | ||
198 | str r10, [r5, #44] | ||
199 | str lr, [r5, #12] | ||
200 | b common_exit @ and return | ||
201 | |||
202 | /* | ||
203 | ****************************************************************************** | ||
204 | * Loop to handle term = 2 condition | ||
205 | * (note that this case can be handled by the default term handler (1-8), but | ||
206 | * this special case is faster because it doesn't have to read memory twice) | ||
207 | * | ||
208 | * r0 = dpp->weight_B r8 = previous left sample | ||
209 | * r1 = bptr r9 = | ||
210 | * r2 = current sample r10 = second previous left sample << 4 | ||
211 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
212 | * r4 = dpp->weight_A ip = decorrelation value | ||
213 | * r5 = dpp sp = | ||
214 | * r6 = dpp->delta lr = second previous right sample << 4 | ||
215 | * r7 = eptr pc = | ||
216 | ******************************************************************************* | ||
217 | */ | ||
218 | |||
219 | term_2_loop: | ||
220 | movs ip, lr @ get decorrelation value & test | ||
221 | ldr r2, [r1], #4 @ get sample & update pointer | ||
222 | mov lr, r8, asl #4 @ previous becomes 2nd previous | ||
223 | mov r11, #0x80000000 | ||
224 | mov r8, r2 | ||
225 | smlalne r11, r8, r4, ip | ||
226 | strne r8, [r1, #-4] @ if change possible, store sample back | ||
227 | cmpne r2, #0 | ||
228 | beq .L225 | ||
229 | teq ip, r2 @ update weight based on signs | ||
230 | submi r4, r4, r6 | ||
231 | addpl r4, r4, r6 | ||
232 | |||
233 | .L225: movs ip, r10 @ do same thing for right channel | ||
234 | ldr r2, [r1], #4 | ||
235 | mov r10, r3, asl #4 | ||
236 | mov r11, #0x80000000 | ||
237 | mov r3, r2 | ||
238 | smlalne r11, r3, r0, ip | ||
239 | strne r3, [r1, #-4] | ||
240 | cmpne r2, #0 | ||
241 | beq .L229 | ||
242 | teq ip, r2 | ||
243 | submi r0, r0, r6 | ||
244 | addpl r0, r0, r6 | ||
245 | |||
246 | .L229: cmp r7, r1 @ loop back if more samples to do | ||
247 | bhi term_2_loop | ||
248 | |||
249 | b default_term_exit @ this exit updates all dpp->samples | ||
250 | |||
251 | /* | ||
252 | ****************************************************************************** | ||
253 | * Loop to handle default term condition | ||
254 | * | ||
255 | * r0 = dpp->weight_B r8 = result accumulator | ||
256 | * r1 = bptr r9 = | ||
257 | * r2 = dpp->term r10 = | ||
258 | * r3 = decorrelation value r11 = lo accumulator (for rounding) | ||
259 | * r4 = dpp->weight_A ip = current sample | ||
260 | * r5 = dpp sp = | ||
261 | * r6 = dpp->delta lr = | ||
262 | * r7 = eptr pc = | ||
263 | ******************************************************************************* | ||
264 | */ | ||
265 | |||
266 | term_default_loop: | ||
267 | ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term | ||
268 | ldr ip, [r1], #4 @ get original sample and bump ptr | ||
269 | movs r3, r3, asl #4 | ||
270 | mov r11, #0x80000000 | ||
271 | mov r8, ip | ||
272 | smlalne r11, r8, r4, r3 | ||
273 | strne r8, [r1, #-4] @ if possibly changed, store updated sample | ||
274 | cmpne ip, #0 | ||
275 | beq .L350 | ||
276 | teq ip, r3 @ update weight based on signs | ||
277 | submi r4, r4, r6 | ||
278 | addpl r4, r4, r6 | ||
279 | |||
280 | .L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel | ||
281 | ldr ip, [r1], #4 | ||
282 | movs r3, r3, asl #4 | ||
283 | mov r11, #0x80000000 | ||
284 | mov r8, ip | ||
285 | smlalne r11, r8, r0, r3 | ||
286 | strne r8, [r1, #-4] | ||
287 | cmpne ip, #0 | ||
288 | beq .L354 | ||
289 | teq ip, r3 | ||
290 | submi r0, r0, r6 | ||
291 | addpl r0, r0, r6 | ||
292 | |||
293 | .L354: cmp r7, r1 @ loop back if more samples to do | ||
294 | bhi term_default_loop | ||
295 | |||
296 | /* | ||
297 | * This exit is used by terms 1-8 to store the previous 8 samples into the decorr | ||
298 | * structure (even if they are not all used for the given term) | ||
299 | */ | ||
300 | |||
301 | default_term_exit: | ||
302 | ldrsh r3, [r5, #0] | ||
303 | sub ip, r3, #1 | ||
304 | mov lr, #7 | ||
305 | |||
306 | .L358: and r3, ip, #7 | ||
307 | add r3, r5, r3, asl #2 | ||
308 | ldr r2, [r1, #-4] | ||
309 | str r2, [r3, #40] | ||
310 | ldr r2, [r1, #-8]! | ||
311 | str r2, [r3, #8] | ||
312 | sub ip, ip, #1 | ||
313 | sub lr, lr, #1 | ||
314 | cmn lr, #1 | ||
315 | bne .L358 | ||
316 | b common_exit | ||
317 | |||
318 | /* | ||
319 | ****************************************************************************** | ||
320 | * Loop to handle term = -1 condition | ||
321 | * | ||
322 | * r0 = dpp->weight_B r8 = | ||
323 | * r1 = bptr r9 = | ||
324 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
325 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
326 | * r4 = dpp->weight_A ip = current sample | ||
327 | * r5 = dpp sp = | ||
328 | * r6 = dpp->delta lr = updated left sample | ||
329 | * r7 = eptr pc = | ||
330 | ******************************************************************************* | ||
331 | */ | ||
332 | |||
333 | term_minus_1: | ||
334 | ldr r3, [r1, #-4] | ||
335 | |||
336 | term_minus_1_loop: | ||
337 | ldr ip, [r1], #8 @ for left channel the decorrelation value | ||
338 | movs r3, r3, asl #4 @ is the previous right sample (in r3) | ||
339 | mov r11, #0x80000000 | ||
340 | mov lr, ip | ||
341 | smlalne r11, lr, r4, r3 | ||
342 | strne lr, [r1, #-8] | ||
343 | cmpne ip, #0 | ||
344 | beq .L361 | ||
345 | teq ip, r3 @ update weight based on signs | ||
346 | submi r4, r4, r6 | ||
347 | addpl r4, r4, r6 | ||
348 | cmp r4, #(1024 << 18) | ||
349 | movgt r4, #(1024 << 18) | ||
350 | cmp r4, r10 | ||
351 | movlt r4, r10 | ||
352 | |||
353 | .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value | ||
354 | movs lr, lr, asl #4 | ||
355 | mov r11, #0x80000000 | ||
356 | mov r3, r2 | ||
357 | smlalne r11, r3, r0, lr | ||
358 | strne r3, [r1, #-4] | ||
359 | cmpne r2, #0 | ||
360 | beq .L369 | ||
361 | teq r2, lr | ||
362 | submi r0, r0, r6 | ||
363 | addpl r0, r0, r6 | ||
364 | cmp r0, #(1024 << 18) @ then clip weight to +/-1024 | ||
365 | movgt r0, #(1024 << 18) | ||
366 | cmp r0, r10 | ||
367 | movlt r0, r10 | ||
368 | |||
369 | .L369: cmp r7, r1 @ loop back if more samples to do | ||
370 | bhi term_minus_1_loop | ||
371 | |||
372 | str r3, [r5, #8] @ else store right sample and exit | ||
373 | b common_exit | ||
374 | |||
375 | /* | ||
376 | ****************************************************************************** | ||
377 | * Loop to handle term = -2 condition | ||
378 | * (note that the channels are processed in the reverse order here) | ||
379 | * | ||
380 | * r0 = dpp->weight_B r8 = | ||
381 | * r1 = bptr r9 = | ||
382 | * r2 = intermediate result r10 = -1024 (for clipping) | ||
383 | * r3 = previous left sample r11 = lo accumulator (for rounding) | ||
384 | * r4 = dpp->weight_A ip = current sample | ||
385 | * r5 = dpp sp = | ||
386 | * r6 = dpp->delta lr = updated right sample | ||
387 | * r7 = eptr pc = | ||
388 | ******************************************************************************* | ||
389 | */ | ||
390 | |||
391 | term_minus_2: | ||
392 | ldr r3, [r1, #-8] | ||
393 | |||
394 | term_minus_2_loop: | ||
395 | ldr ip, [r1, #4] @ for right channel the decorrelation value | ||
396 | movs r3, r3, asl #4 @ is the previous left sample (in r3) | ||
397 | mov r11, #0x80000000 | ||
398 | mov lr, ip | ||
399 | smlalne r11, lr, r0, r3 | ||
400 | strne lr, [r1, #4] | ||
401 | cmpne ip, #0 | ||
402 | beq .L380 | ||
403 | teq ip, r3 @ update weight based on signs | ||
404 | submi r0, r0, r6 | ||
405 | addpl r0, r0, r6 | ||
406 | cmp r0, #(1024 << 18) @ then clip weight to +/-1024 | ||
407 | movgt r0, #(1024 << 18) | ||
408 | cmp r0, r10 | ||
409 | movlt r0, r10 | ||
410 | |||
411 | .L380: ldr r2, [r1], #8 @ for left channel the decorrelation value | ||
412 | movs lr, lr, asl #4 | ||
413 | mov r11, #0x80000000 | ||
414 | mov r3, r2 | ||
415 | smlalne r11, r3, r4, lr | ||
416 | strne r3, [r1, #-8] | ||
417 | cmpne r2, #0 | ||
418 | beq .L388 | ||
419 | teq r2, lr | ||
420 | submi r4, r4, r6 | ||
421 | addpl r4, r4, r6 | ||
422 | cmp r4, #(1024 << 18) | ||
423 | movgt r4, #(1024 << 18) | ||
424 | cmp r4, r10 | ||
425 | movlt r4, r10 | ||
426 | |||
427 | .L388: cmp r7, r1 @ loop back if more samples to do | ||
428 | bhi term_minus_2_loop | ||
429 | |||
430 | str r3, [r5, #40] @ else store left channel and exit | ||
431 | b common_exit | ||
432 | |||
433 | /* | ||
434 | ****************************************************************************** | ||
435 | * Loop to handle term = -3 condition | ||
436 | * | ||
437 | * r0 = dpp->weight_B r8 = previous left sample | ||
438 | * r1 = bptr r9 = | ||
439 | * r2 = current left sample r10 = -1024 (for clipping) | ||
440 | * r3 = previous right sample r11 = lo accumulator (for rounding) | ||
441 | * r4 = dpp->weight_A ip = intermediate result | ||
442 | * r5 = dpp sp = | ||
443 | * r6 = dpp->delta lr = | ||
444 | * r7 = eptr pc = | ||
445 | ******************************************************************************* | ||
446 | */ | ||
447 | |||
448 | term_minus_3: | ||
449 | ldr r3, [r1, #-4] @ load previous samples | ||
450 | ldr r8, [r1, #-8] | ||
451 | |||
452 | term_minus_3_loop: | ||
453 | ldr ip, [r1], #4 | ||
454 | movs r3, r3, asl #4 | ||
455 | mov r11, #0x80000000 | ||
456 | mov r2, ip | ||
457 | smlalne r11, r2, r4, r3 | ||
458 | strne r2, [r1, #-4] | ||
459 | cmpne ip, #0 | ||
460 | beq .L399 | ||
461 | teq ip, r3 @ update weight based on signs | ||
462 | submi r4, r4, r6 | ||
463 | addpl r4, r4, r6 | ||
464 | cmp r4, #(1024 << 18) @ then clip weight to +/-1024 | ||
465 | movgt r4, #(1024 << 18) | ||
466 | cmp r4, r10 | ||
467 | movlt r4, r10 | ||
468 | |||
469 | .L399: movs ip, r8, asl #4 @ ip = previous left we use now | ||
470 | mov r8, r2 @ r8 = current left we use next time | ||
471 | ldr r2, [r1], #4 | ||
472 | mov r11, #0x80000000 | ||
473 | mov r3, r2 | ||
474 | smlalne r11, r3, r0, ip | ||
475 | strne r3, [r1, #-4] | ||
476 | cmpne r2, #0 | ||
477 | beq .L407 | ||
478 | teq ip, r2 | ||
479 | submi r0, r0, r6 | ||
480 | addpl r0, r0, r6 | ||
481 | cmp r0, #(1024 << 18) | ||
482 | movgt r0, #(1024 << 18) | ||
483 | cmp r0, r10 | ||
484 | movlt r0, r10 | ||
485 | |||
486 | .L407: cmp r7, r1 @ loop back if more samples to do | ||
487 | bhi term_minus_3_loop | ||
488 | |||
489 | str r3, [r5, #8] @ else store previous samples & exit | ||
490 | str r8, [r5, #40] | ||
491 | |||
492 | /* | ||
493 | * Before finally exiting we must store weights back for next time | ||
494 | */ | ||
495 | |||
496 | common_exit: | ||
497 | mov r0, r0, asr #18 @ restore weights to real magnitude | ||
498 | mov r4, r4, asr #18 | ||
499 | strh r4, [r5, #4] | ||
500 | strh r0, [r5, #6] | ||
501 | ldmfd sp!, {r4 - r8, r10, r11, pc} | ||
502 | |||
diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c index 0c61e0e38a..af5d71585e 100644 --- a/apps/codecs/libwavpack/unpack.c +++ b/apps/codecs/libwavpack/unpack.c | |||
@@ -290,6 +290,7 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) | |||
290 | extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); | 290 | extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); |
291 | #elif defined(CPU_ARM) && !defined(SIMULATOR) | 291 | #elif defined(CPU_ARM) && !defined(SIMULATOR) |
292 | extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, long *buffer, long sample_count); | 292 | extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, long *buffer, long sample_count); |
293 | extern void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, long *buffer, long sample_count); | ||
293 | #else | 294 | #else |
294 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); | 295 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); |
295 | #endif | 296 | #endif |
@@ -353,7 +354,10 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) | |||
353 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | 354 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) |
354 | decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); | 355 | decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); |
355 | #elif defined(CPU_ARM) && !defined(SIMULATOR) | 356 | #elif defined(CPU_ARM) && !defined(SIMULATOR) |
356 | decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8); | 357 | if (((flags & MAG_MASK) >> MAG_LSB) > 15) |
358 | decorr_stereo_pass_cont_arml (dpp, buffer + 16, sample_count - 8); | ||
359 | else | ||
360 | decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8); | ||
357 | #else | 361 | #else |
358 | decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); | 362 | decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); |
359 | #endif | 363 | #endif |