diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libwavpack/Makefile | 2 | ||||
-rw-r--r-- | apps/codecs/libwavpack/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libwavpack/coldfire.S | 535 | ||||
-rw-r--r-- | apps/codecs/libwavpack/unpack.c | 45 | ||||
-rw-r--r-- | apps/codecs/libwavpack/wputils.c | 2 | ||||
-rw-r--r-- | apps/plugins/wv2wav.c | 2 |
6 files changed, 566 insertions, 23 deletions
diff --git a/apps/codecs/libwavpack/Makefile b/apps/codecs/libwavpack/Makefile index df26559f59..75b9060534 100644 --- a/apps/codecs/libwavpack/Makefile +++ b/apps/codecs/libwavpack/Makefile | |||
@@ -15,7 +15,7 @@ INCLUDES += -I$(APPSDIR)/$(APPEXTRA) | |||
15 | endif | 15 | endif |
16 | 16 | ||
17 | CFLAGS = $(GCCOPTS) \ | 17 | CFLAGS = $(GCCOPTS) \ |
18 | $(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} | 18 | $(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} -O2 \ |
19 | 19 | ||
20 | # This sets up 'SRC' based on the files mentioned in SOURCES | 20 | # This sets up 'SRC' based on the files mentioned in SOURCES |
21 | include $(TOOLSDIR)/makesrc.inc | 21 | include $(TOOLSDIR)/makesrc.inc |
diff --git a/apps/codecs/libwavpack/SOURCES b/apps/codecs/libwavpack/SOURCES index def57b703c..a4f0f2f7a9 100644 --- a/apps/codecs/libwavpack/SOURCES +++ b/apps/codecs/libwavpack/SOURCES | |||
@@ -4,4 +4,7 @@ metadata.c | |||
4 | unpack.c | 4 | unpack.c |
5 | words.c | 5 | words.c |
6 | wputils.c | 6 | wputils.c |
7 | #if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) | ||
8 | coldfire.S | ||
9 | #endif | ||
7 | 10 | ||
diff --git a/apps/codecs/libwavpack/coldfire.S b/apps/codecs/libwavpack/coldfire.S new file mode 100644 index 0000000000..9c7e098e88 --- /dev/null +++ b/apps/codecs/libwavpack/coldfire.S | |||
@@ -0,0 +1,535 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2005 by David Bryant | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | /* This is an assembly optimized version of the following WavPack function: | ||
21 | * | ||
22 | * void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, | ||
23 | * long *buffer, long sample_count); | ||
24 | * | ||
25 | * It performs a single pass of stereo decorrelation on the provided buffer. | ||
26 | * Note that this version of the function requires that the 8 previous stereo | ||
27 | * samples are visible and correct. In other words, it ignores the "samples_*" | ||
28 | * fields in the decorr_pass structure and gets the history data directly | ||
29 | * from the buffer. It does, however, return the appropriate history samples | ||
30 | * to the decorr_pass structure before returning. | ||
31 | * | ||
32 | * This is written to work on a MCF5249 processor, or any processor based on | ||
33 | * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for | ||
34 | * the "apply_weight" function of WavPack decorrelation because it provides | ||
35 | * the requires 40-bit product. The fractional rounding mode of the EMAC is not | ||
36 | * configurable and uses "round to even" while WavPack uses "round to larger", | ||
37 | * so the rounding has to be done manually. | ||
38 | */ | ||
39 | |||
40 | .text | ||
41 | .align 2 | ||
42 | .global decorr_stereo_pass_cont_mcf5249 | ||
43 | |||
44 | decorr_stereo_pass_cont_mcf5249: | ||
45 | |||
46 | lea (-44, %sp), %sp | ||
47 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
48 | move.l 44+4(%sp), %a2 | a2 = dpp-> | ||
49 | move.l 44+8(%sp), %a1 | a1 = bptr | ||
50 | move.w 2(%a2), %a3 | a3 = dpp->delta | ||
51 | move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended) | ||
52 | ext.l %d3 | ||
53 | move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended) | ||
54 | ext.l %d4 | ||
55 | move.l 44+12(%sp), %d0 | d0 = sample_count | ||
56 | jbeq return_only | if zero, nothing to do | ||
57 | |||
58 | lsl.l #3, %d0 | d5 = bptr + (sample_count * 8) | ||
59 | move.l %d0, %d5 | ||
60 | add.l %a1, %d5 | ||
61 | |||
62 | moveq.l #17, %d0 | left shift weights & delta 17 places | ||
63 | asl.l %d0, %d3 | ||
64 | asl.l %d0, %d4 | ||
65 | move.l %a3, %d1 | ||
66 | asl.l %d0, %d1 | ||
67 | move.l %d1, %a3 | ||
68 | |||
69 | move.l #0x20, %macsr | set fractional mode for MAC | ||
70 | move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding) | ||
71 | move.l #0x800000, %accext01 | ||
72 | |||
73 | move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits | ||
74 | move.l #-1024<<17, %d7 | (only used by negative terms) | ||
75 | |||
76 | move.w (%a2), %d0 | d0 = term | ||
77 | ext.l %d0 | ||
78 | cmp.l #17, %d0 | ||
79 | jbeq term_17 | term = 17 | ||
80 | cmp.l #18, %d0 | ||
81 | jbeq term_18 | term = 18 | ||
82 | addq.l #1, %d0 | ||
83 | jbeq term_minus_1 | term = -1 | ||
84 | addq.l #1, %d0 | ||
85 | jbeq term_minus_2 | term = -2 | ||
86 | addq.l #1, %d0 | ||
87 | jbeq term_minus_3 | term = -3 | ||
88 | jbra term_default | default term = 1 - 8 | ||
89 | |||
90 | |------------------------------------------------------------------------------ | ||
91 | | Loop to handle term = 17 condition | ||
92 | | | ||
93 | | a0 = d0 = (2 * bptr [-1]) - bptr [-2] | ||
94 | | a1 = bptr d1 = initial bptr [0] | ||
95 | | a2 = dpp-> d2 = updated bptr [0] | ||
96 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
97 | | a4 = d4 = dpp->weight_B << 17 | ||
98 | | a5 = d5 = eptr | ||
99 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
100 | |------------------------------------------------------------------------------ | ||
101 | |||
102 | term_17: | ||
103 | move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] | ||
104 | add.l %d0, %d0 | ||
105 | sub.l -16(%a1), %d0 | ||
106 | beq .L251 | if zero, skip calculation | ||
107 | move.l %acc1, %acc0 | ||
108 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
109 | mac.l %d0, %d3, %acc0 | ||
110 | move.l (%a1), %d1 | ||
111 | beq .L255 | ||
112 | eor.l %d1, %d0 | else compare signs | ||
113 | bge .L256 | if same, add delta to weight | ||
114 | sub.l %a3, %d3 | else subtract delta from weight | ||
115 | sub.l %a3, %d3 | subtract again instead of branch | ||
116 | .L256: add.l %a3, %d3 | add delta to weight | ||
117 | |||
118 | .L255: move.l %acc0, %d2 | d2 = rounded product | ||
119 | add.l %d1, %d2 | update bptr [0] and store | ||
120 | move.l %d2, (%a1)+ | ||
121 | |||
122 | .L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] | ||
123 | add.l %d0, %d0 | ||
124 | sub.l -16(%a1), %d0 | ||
125 | beq .L257 | if zero, skip calculations | ||
126 | move.l %acc1, %acc0 | ||
127 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
128 | mac.l %d0, %d4, %acc0 | ||
129 | move.l (%a1), %d1 | ||
130 | beq .L254 | ||
131 | eor.l %d1, %d0 | else compare signs | ||
132 | bge .L259 | if same, add delta to weight | ||
133 | sub.l %a3, %d4 | else subtract delta from weight | ||
134 | sub.l %a3, %d4 | subtract again instead of branch | ||
135 | .L259: add.l %a3, %d4 | add delta to weight | ||
136 | |||
137 | .L254: move.l %acc0, %d2 | d2 = rounded product | ||
138 | add.l %d1, %d2 | update bptr [0] and store | ||
139 | move.l %d2, (%a1)+ | ||
140 | |||
141 | .L252: cmp.l %a1, %d5 | loop if bptr < eptr | ||
142 | jbhi term_17 | ||
143 | bra term_17_18_finish | exit through common path | ||
144 | |||
145 | .L251: addq.l #4, %a1 | update point and jump back into loop | ||
146 | bra .L253 | ||
147 | |||
148 | .L257: addq.l #4, %a1 | update point and jump back into loop | ||
149 | bra .L252 | ||
150 | |||
151 | |------------------------------------------------------------------------------ | ||
152 | | Loop to handle term = 18 condition | ||
153 | | | ||
154 | | a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1 | ||
155 | | a1 = bptr d1 = initial bptr [0] | ||
156 | | a2 = dpp-> d2 = updated bptr [0] | ||
157 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
158 | | a4 = d4 = dpp->weight_B << 17 | ||
159 | | a5 = d5 = eptr | ||
160 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
161 | |------------------------------------------------------------------------------ | ||
162 | |||
163 | term_18: | ||
164 | move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 | ||
165 | lea (%a0,%a0.l*2), %a0 | ||
166 | move.l %a0, %d0 | ||
167 | sub.l -16(%a1), %d0 | ||
168 | asr.l #1, %d0 | ||
169 | beq .L260 | ||
170 | move.l %acc1, %acc0 | ||
171 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
172 | mac.l %d0, %d3, %acc0 | ||
173 | move.l (%a1), %d1 | ||
174 | beq .L266 | ||
175 | eor.l %d1, %d0 | else compare signs | ||
176 | bge .L267 | if same, add delta to weight | ||
177 | sub.l %a3, %d3 | else subtract delta from weight | ||
178 | sub.l %a3, %d3 | subtract again instead of branch | ||
179 | .L267: add.l %a3, %d3 | add delta to weight | ||
180 | |||
181 | .L266: move.l %acc0, %d2 | d2 = rounded product | ||
182 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
183 | move.l %d2, (%a1)+ | ||
184 | |||
185 | .L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 | ||
186 | lea (%a0,%a0.l*2), %a0 | ||
187 | move.l %a0, %d0 | ||
188 | sub.l -16(%a1), %d0 | ||
189 | asr.l #1, %d0 | ||
190 | beq .L261 | ||
191 | move.l %acc1, %acc0 | ||
192 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
193 | mac.l %d0, %d4, %acc0 | ||
194 | move.l (%a1), %d1 | ||
195 | beq .L265 | ||
196 | eor.l %d1, %d0 | else compare signs | ||
197 | bge .L270 | if same, add delta to weight | ||
198 | sub.l %a3, %d4 | else subtract delta from weight | ||
199 | sub.l %a3, %d4 | subtract again instead of branch | ||
200 | .L270: add.l %a3, %d4 | add delta to weight | ||
201 | |||
202 | .L265: move.l %acc0, %d2 | d2 = rounded product | ||
203 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
204 | move.l %d2, (%a1)+ | ||
205 | |||
206 | .L269: cmp.l %a1, %d5 | loop if bptr < eptr | ||
207 | jbhi term_18 | ||
208 | bra term_17_18_finish | exit through common path | ||
209 | |||
210 | .L260: addq.l #4, %a1 | bump pointer and jump back into loop | ||
211 | bra .L268 | ||
212 | |||
213 | .L261: addq.l #4, %a1 | bump pointer and jump back into loop | ||
214 | bra .L269 | ||
215 | |||
216 | term_17_18_finish: | ||
217 | move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1] | ||
218 | move.l -8(%a1), 8(%a2) | ||
219 | move.l -12(%a1), 44(%a2) | ||
220 | move.l -16(%a1), 12(%a2) | ||
221 | jbra finish_up | ||
222 | |||
223 | |------------------------------------------------------------------------------ | ||
224 | | Loop to handle default terms (i.e. 1 - 8) | ||
225 | | | ||
226 | | a0 = tptr d0 = tptr [0] | ||
227 | | a1 = bptr d1 = initial bptr [0] | ||
228 | | a2 = dpp-> d2 = updated bptr [0] | ||
229 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
230 | | a4 = d4 = dpp->weight_B << 17 | ||
231 | | a5 = d5 = eptr | ||
232 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
233 | |------------------------------------------------------------------------------ | ||
234 | |||
235 | term_default: | ||
236 | move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8) | ||
237 | ext.l %d0 | ||
238 | lsl.l #3, %d0 | ||
239 | move.l %a1, %a0 | ||
240 | sub.l %d0, %a0 | ||
241 | |||
242 | term_default_loop: | ||
243 | move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero | ||
244 | beq .L271 | ||
245 | move.l %acc1, %acc0 | ||
246 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
247 | mac.l %d0, %d3, %acc0 | ||
248 | move.l (%a1), %d1 | ||
249 | beq .L277 | ||
250 | eor.l %d1, %d0 | else compare signs | ||
251 | bge .L278 | if same, add delta to weight | ||
252 | sub.l %a3, %d3 | else subtract delta from weight | ||
253 | sub.l %a3, %d3 | subtract again instead of branch | ||
254 | .L278: add.l %a3, %d3 | add delta to weight | ||
255 | |||
256 | .L277: move.l %acc0, %d2 | d2 = rounded product | ||
257 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
258 | move.l %d2, (%a1)+ | ||
259 | |||
260 | .L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero | ||
261 | beq .L272 | ||
262 | move.l %acc1, %acc0 | ||
263 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
264 | mac.l %d0, %d4, %acc0 | ||
265 | move.l (%a1), %d1 | ||
266 | beq .L276 | ||
267 | eor.l %d1, %d0 | else compare signs | ||
268 | bge .L281 | if same, add delta to weight | ||
269 | sub.l %a3, %d4 | else subtract delta from weight | ||
270 | sub.l %a3, %d4 | subtract again instead of branch | ||
271 | .L281: add.l %a3, %d4 | add delta to weight | ||
272 | |||
273 | .L276: move.l %acc0, %d2 | d2 = rounded product | ||
274 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
275 | move.l %d2, (%a1)+ | ||
276 | |||
277 | .L274: cmp.l %a1, %d5 | loop back if bptr < eptr | ||
278 | jbhi term_default_loop | ||
279 | move.w (%a2), %d0 | d0 = term - 1 | ||
280 | moveq.l #8, %d1 | d1 = loop counter | ||
281 | |||
282 | .L323: subq.l #1, %d0 | back up & mask index | ||
283 | and.l #7, %d0 | ||
284 | move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0] | ||
285 | move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0] | ||
286 | subq.l #1, %d1 | loop on count | ||
287 | jbne .L323 | ||
288 | jbra finish_up | ||
289 | |||
290 | .L271: addq.l #4, %a1 | bump pointer and jump back into loop | ||
291 | bra .L275 | ||
292 | |||
293 | .L272: addq.l #4, %a1 | bump pointer and jump back into loop | ||
294 | bra .L274 | ||
295 | |||
296 | |||
297 | |------------------------------------------------------------------------------ | ||
298 | | Loop to handle term = -1 condition | ||
299 | | | ||
300 | | a0 = d0 = decorrelation sample | ||
301 | | a1 = bptr d1 = initial bptr [0] | ||
302 | | a2 = dpp-> d2 = updated bptr [0] | ||
303 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
304 | | a4 = d4 = dpp->weight_B << 17 | ||
305 | | a5 = d5 = eptr | ||
306 | | a6 = d6 = 1024 << 17 | ||
307 | | a7 = d7 = -1024 << 17 | ||
308 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
309 | |------------------------------------------------------------------------------ | ||
310 | |||
311 | term_minus_1: | ||
312 | move.l -4(%a1), %d0 | d0 = bptr [-1] | ||
313 | beq .L402 | ||
314 | move.l %acc1, %acc0 | ||
315 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
316 | mac.l %d0, %d3, %acc0 | ||
317 | move.l (%a1), %d1 | ||
318 | beq .L405 | ||
319 | eor.l %d1, %d0 | else compare signs | ||
320 | bge .L404 | if same, add delta to weight | ||
321 | sub.l %a3, %d3 | else subtract delta from weight | ||
322 | cmp.l %d7, %d3 | check for negative clip limit | ||
323 | bge .L405 | ||
324 | move.l %d7, %d3 | ||
325 | bra .L405 | ||
326 | |||
327 | .L404: add.l %a3, %d3 | add delta to weight | ||
328 | cmp.l %d6, %d3 | check for positive clip limit | ||
329 | ble .L405 | ||
330 | move.l %d6, %d3 | ||
331 | |||
332 | .L405: move.l %acc0, %d0 | d2 = rounded product | ||
333 | add.l %d1, %d0 | add applied weight to bptr [0], store | ||
334 | move.l %d0, (%a1)+ | ||
335 | beq .L401 | ||
336 | |||
337 | .L410: move.l %acc1, %acc0 | ||
338 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
339 | mac.l %d0, %d4, %acc0 | ||
340 | move.l (%a1), %d1 | ||
341 | beq .L403 | ||
342 | eor.l %d1, %d0 | else compare signs | ||
343 | bge .L407 | if same, add delta to weight | ||
344 | sub.l %a3, %d4 | else subtract delta from weight | ||
345 | cmp.l %d7, %d4 | check for negative clip limit | ||
346 | bge .L403 | ||
347 | move.l %d7, %d4 | ||
348 | bra .L403 | ||
349 | |||
350 | .L407: add.l %a3, %d4 | add delta to weight | ||
351 | cmp.l %d6, %d4 | check for positive clip limit | ||
352 | ble .L403 | ||
353 | move.l %d6, %d4 | ||
354 | |||
355 | .L403: move.l %acc0, %d2 | d2 = rounded product | ||
356 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
357 | move.l %d2, (%a1)+ | ||
358 | |||
359 | .L411: cmp.l %a1, %d5 | loop back if bptr < eptr | ||
360 | jbhi term_minus_1 | ||
361 | move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] | ||
362 | jbra finish_up | ||
363 | |||
364 | .L402: move.l (%a1)+, %d0 | ||
365 | bne .L410 | ||
366 | |||
367 | .L401: addq.l #4, %a1 | ||
368 | bra .L411 | ||
369 | |||
370 | |||
371 | |------------------------------------------------------------------------------ | ||
372 | | Loop to handle term = -2 condition | ||
373 | | | ||
374 | | a0 = d0 = decorrelation sample | ||
375 | | a1 = bptr d1 = initial bptr [0] | ||
376 | | a2 = dpp-> d2 = updated bptr [0] | ||
377 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
378 | | a4 = d4 = dpp->weight_B << 17 | ||
379 | | a5 = d5 = eptr | ||
380 | | a6 = d6 = 1024 << 17 | ||
381 | | a7 = d7 = -1024 << 17 | ||
382 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
383 | |------------------------------------------------------------------------------ | ||
384 | |||
385 | term_minus_2: | ||
386 | move.l -8(%a1), %d0 | d0 = bptr [-2] | ||
387 | beq .L511 | ||
388 | move.l %acc1, %acc0 | ||
389 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
390 | mac.l %d0, %d4, %acc0 | ||
391 | move.l 4(%a1), %d1 | ||
392 | beq .L505 | ||
393 | eor.l %d1, %d0 | else compare signs | ||
394 | bge .L504 | if same, add delta to weight | ||
395 | sub.l %a3, %d4 | else subtract delta from weight | ||
396 | cmp.l %d7, %d4 | ckeck for negative clip limit | ||
397 | bge .L505 | ||
398 | move.l %d7, %d4 | ||
399 | bra .L505 | ||
400 | |||
401 | .L504: add.l %a3, %d4 | add delta to weight | ||
402 | cmp.l %d6, %d4 | check for positive clip limit | ||
403 | ble .L505 | ||
404 | move.l %d6, %d4 | ||
405 | |||
406 | .L505: move.l %acc0, %d0 | d2 = rounded product | ||
407 | add.l %d1, %d0 | add applied weight to bptr [0], store | ||
408 | move.l %d0, 4(%a1) | ||
409 | beq .L512 | ||
410 | |||
411 | .L510: move.l %acc1, %acc0 | ||
412 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
413 | mac.l %d0, %d3, %acc0 | ||
414 | move.l (%a1), %d1 | ||
415 | beq .L503 | ||
416 | eor.l %d1, %d0 | else compare signs | ||
417 | bge .L507 | if same, add delta to weight | ||
418 | sub.l %a3, %d3 | else subtract delta from weight | ||
419 | cmp.l %d7, %d3 | check for negative clip limit | ||
420 | bge .L503 | ||
421 | move.l %d7, %d3 | ||
422 | bra .L503 | ||
423 | |||
424 | .L507: add.l %a3, %d3 | add delta to weight | ||
425 | cmp.l %d6, %d3 | check for negative clip limit | ||
426 | ble .L503 | ||
427 | move.l %d6, %d3 | ||
428 | |||
429 | .L503: move.l %acc0, %d2 | d2 = rounded product | ||
430 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
431 | move.l %d2, (%a1) | ||
432 | |||
433 | .L512: addq.l #8, %a1 | ||
434 | cmp.l %a1, %d5 | loop if bptr < eptr | ||
435 | jbhi term_minus_2 | ||
436 | move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4] | ||
437 | jbra finish_up | ||
438 | |||
439 | .L511: move.l 4(%a1), %d0 | ||
440 | beq .L512 | ||
441 | bra .L510 | ||
442 | |||
443 | |||
444 | |------------------------------------------------------------------------------ | ||
445 | | Loop to handle term = -3 condition | ||
446 | | | ||
447 | | a0 = d0 = decorrelation sample | ||
448 | | a1 = bptr d1 = initial bptr [0] | ||
449 | | a2 = dpp-> d2 = updated bptr [0] | ||
450 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
451 | | a4 = d4 = dpp->weight_B << 17 | ||
452 | | a5 = d5 = eptr | ||
453 | | a6 = d6 = 1024 << 17 | ||
454 | | a7 = d7 = -1024 << 17 | ||
455 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
456 | |------------------------------------------------------------------------------ | ||
457 | |||
458 | term_minus_3: | ||
459 | move.l -4(%a1), %d0 | d0 = bptr [-1] | ||
460 | beq .L301 | ||
461 | move.l %acc1, %acc0 | ||
462 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
463 | mac.l %d0, %d3, %acc0 | ||
464 | move.l (%a1), %d1 | ||
465 | beq .L320 | ||
466 | eor.l %d1, %d0 | else compare signs | ||
467 | bge .L319 | if same, add delta to weight | ||
468 | sub.l %a3, %d3 | else subtract delta from weight | ||
469 | cmp.l %d7, %d3 | check for negative clip limit | ||
470 | bge .L320 | ||
471 | move.l %d7, %d3 | ||
472 | bra .L320 | ||
473 | |||
474 | .L319: add.l %a3, %d3 | add delta to weight | ||
475 | cmp.l %d6, %d3 | check for positive clip limit | ||
476 | ble .L320 | ||
477 | move.l %d6, %d3 | ||
478 | |||
479 | .L320: move.l %acc0, %d2 | d2 = rounded product | ||
480 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
481 | move.l %d2, (%a1)+ | ||
482 | |||
483 | .L330: move.l -12(%a1), %d0 | d0 = bptr [-2] | ||
484 | beq .L302 | ||
485 | move.l %acc1, %acc0 | ||
486 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
487 | mac.l %d0, %d4, %acc0 | ||
488 | move.l (%a1), %d1 | ||
489 | beq .L318 | ||
490 | eor.l %d1, %d0 | else compare signs | ||
491 | bge .L322 | if same, add delta to weight | ||
492 | sub.l %a3, %d4 | else subtract delta from weight | ||
493 | cmp.l %d7, %d4 | check for negative clip limit | ||
494 | bge .L318 | ||
495 | move.l %d7, %d4 | ||
496 | bra .L318 | ||
497 | |||
498 | .L322: add.l %a3, %d4 | add delta to weight | ||
499 | cmp.l %d6, %d4 | check for positive clip limit | ||
500 | ble .L318 | ||
501 | move.l %d6, %d4 | ||
502 | |||
503 | .L318: move.l %acc0, %d2 | d2 = rounded product | ||
504 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
505 | move.l %d2, (%a1)+ | ||
506 | |||
507 | .L331: cmp.l %a1, %d5 | bptr, eptr | ||
508 | jbhi term_minus_3 | ||
509 | move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] | ||
510 | move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2] | ||
511 | jbra finish_up | ||
512 | |||
513 | .L301: addq.l #4, %a1 | ||
514 | bra .L330 | ||
515 | |||
516 | .L302: addq.l #4, %a1 | ||
517 | bra .L331 | ||
518 | |||
519 | | finish and return | ||
520 | |||
521 | finish_up: | ||
522 | moveq.l #17, %d0 | ||
523 | asr.l %d0, %d3 | ||
524 | asr.l %d0, %d4 | ||
525 | move.w %d3, 4(%a2) | weight_A, dpp->weight_A | ||
526 | move.w %d4, 6(%a2) | weight_B, dpp->weight_B | ||
527 | |||
528 | clr.l %d0 | clear up EMAC | ||
529 | move.l %d0, %acc0 | ||
530 | move.l %d0, %acc1 | ||
531 | |||
532 | return_only: | ||
533 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
534 | lea (44,%sp), %sp | ||
535 | rts | ||
diff --git a/apps/codecs/libwavpack/unpack.c b/apps/codecs/libwavpack/unpack.c index ae473787a7..5afaac3659 100644 --- a/apps/codecs/libwavpack/unpack.c +++ b/apps/codecs/libwavpack/unpack.c | |||
@@ -27,7 +27,11 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d | |||
27 | // these macros implement the weight application and update operations | 27 | // these macros implement the weight application and update operations |
28 | // that are at the heart of the decorrelation loops | 28 | // that are at the heart of the decorrelation loops |
29 | 29 | ||
30 | #if 0 // PERFCOND | ||
30 | #define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10) | 31 | #define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10) |
32 | #else | ||
33 | #define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2) | ||
34 | #endif | ||
31 | 35 | ||
32 | #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \ | 36 | #define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \ |
33 | (((sample & ~0xffff) >> 9) * weight) + 1) >> 1) | 37 | (((sample & ~0xffff) >> 9) * weight) + 1) >> 1) |
@@ -39,7 +43,7 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d | |||
39 | #define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) | 43 | #define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) |
40 | #endif | 44 | #endif |
41 | 45 | ||
42 | #if 1 // PERFCOND | 46 | #if 0 // PERFCOND |
43 | #define update_weight(weight, delta, source, result) \ | 47 | #define update_weight(weight, delta, source, result) \ |
44 | if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta; | 48 | if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta; |
45 | #else | 49 | #else |
@@ -315,9 +319,14 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) | |||
315 | // samples unpacked, which can be less than the number requested if an error | 319 | // samples unpacked, which can be less than the number requested if an error |
316 | // occurs or the end of the block is reached. | 320 | // occurs or the end of the block is reached. |
317 | 321 | ||
322 | #if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) | ||
323 | extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count); | ||
324 | #else | ||
325 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); | ||
326 | #endif | ||
327 | |||
318 | static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count); | 328 | static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count); |
319 | static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count); | 329 | static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count); |
320 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count); | ||
321 | static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count); | 330 | static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count); |
322 | 331 | ||
323 | long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) | 332 | long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) |
@@ -372,7 +381,11 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count) | |||
372 | else | 381 | else |
373 | for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { | 382 | for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { |
374 | decorr_stereo_pass (dpp, buffer, 8); | 383 | decorr_stereo_pass (dpp, buffer, 8); |
384 | #if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) | ||
385 | decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); | ||
386 | #else | ||
375 | decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); | 387 | decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); |
388 | #endif | ||
376 | } | 389 | } |
377 | 390 | ||
378 | if (flags & JOINT_STEREO) | 391 | if (flags & JOINT_STEREO) |
@@ -530,11 +543,13 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long samp | |||
530 | dpp->weight_B = weight_B; | 543 | dpp->weight_B = weight_B; |
531 | } | 544 | } |
532 | 545 | ||
546 | #if CONFIG_CPU != MCF5249 || defined(SIMULATOR) | ||
547 | |||
533 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count) | 548 | static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count) |
534 | { | 549 | { |
535 | long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; | 550 | long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; |
536 | long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; | 551 | long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; |
537 | int k; | 552 | int k, i; |
538 | 553 | ||
539 | switch (dpp->term) { | 554 | switch (dpp->term) { |
540 | 555 | ||
@@ -581,23 +596,11 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long | |||
581 | update_weight (weight_B, delta, tptr [1], sam_A); | 596 | update_weight (weight_B, delta, tptr [1], sam_A); |
582 | } | 597 | } |
583 | 598 | ||
584 | k = dpp->term; | 599 | for (k = dpp->term - 1, i = 8; i--; k--) { |
585 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-1]; | 600 | dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr; |
586 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-2]; | 601 | dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr; |
587 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-3]; | 602 | } |
588 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-4]; | 603 | |
589 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-5]; | ||
590 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-6]; | ||
591 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-7]; | ||
592 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-8]; | ||
593 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-9]; | ||
594 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-10]; | ||
595 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-11]; | ||
596 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-12]; | ||
597 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-13]; | ||
598 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-14]; | ||
599 | dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-15]; | ||
600 | dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-16]; | ||
601 | break; | 604 | break; |
602 | 605 | ||
603 | case -1: | 606 | case -1: |
@@ -639,6 +642,8 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long | |||
639 | dpp->weight_B = weight_B; | 642 | dpp->weight_B = weight_B; |
640 | } | 643 | } |
641 | 644 | ||
645 | #endif | ||
646 | |||
642 | static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count) | 647 | static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count) |
643 | { | 648 | { |
644 | long delta = dpp->delta, weight_A = dpp->weight_A; | 649 | long delta = dpp->delta, weight_A = dpp->weight_A; |
diff --git a/apps/codecs/libwavpack/wputils.c b/apps/codecs/libwavpack/wputils.c index 9227b66e46..8d58b3b4d7 100644 --- a/apps/codecs/libwavpack/wputils.c +++ b/apps/codecs/libwavpack/wputils.c | |||
@@ -45,7 +45,7 @@ static ulong read_next_header (read_stream infile, WavpackHeader *wphdr); | |||
45 | // large integer or floating point files (but always provides at least 24 bits | 45 | // large integer or floating point files (but always provides at least 24 bits |
46 | // of resolution). | 46 | // of resolution). |
47 | 47 | ||
48 | static WavpackContext wpc; | 48 | static WavpackContext wpc IDATA_ATTR; |
49 | 49 | ||
50 | WavpackContext *WavpackOpenFileInput (read_stream infile, char *error) | 50 | WavpackContext *WavpackOpenFileInput (read_stream infile, char *error) |
51 | { | 51 | { |
diff --git a/apps/plugins/wv2wav.c b/apps/plugins/wv2wav.c index c0bc05cf12..909a0c3c63 100644 --- a/apps/plugins/wv2wav.c +++ b/apps/plugins/wv2wav.c | |||
@@ -29,7 +29,7 @@ | |||
29 | 29 | ||
30 | static struct plugin_api* rb; | 30 | static struct plugin_api* rb; |
31 | static file_info_struct file_info; | 31 | static file_info_struct file_info; |
32 | static long temp_buffer [BUFFER_SIZE]; | 32 | static long temp_buffer [BUFFER_SIZE] IDATA_ATTR; |
33 | 33 | ||
34 | /* Reformat samples from longs in processor's native endian mode to | 34 | /* Reformat samples from longs in processor's native endian mode to |
35 | little-endian data with 2 bytes / sample. */ | 35 | little-endian data with 2 bytes / sample. */ |