summaryrefslogtreecommitdiff
path: root/apps/codecs/libwavpack/coldfire.S
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libwavpack/coldfire.S')
-rw-r--r--apps/codecs/libwavpack/coldfire.S535
1 files changed, 535 insertions, 0 deletions
diff --git a/apps/codecs/libwavpack/coldfire.S b/apps/codecs/libwavpack/coldfire.S
new file mode 100644
index 0000000000..9c7e098e88
--- /dev/null
+++ b/apps/codecs/libwavpack/coldfire.S
@@ -0,0 +1,535 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2005 by David Bryant
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20/* This is an assembly optimized version of the following WavPack function:
21 *
22 * void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp,
23 * long *buffer, long sample_count);
24 *
25 * It performs a single pass of stereo decorrelation on the provided buffer.
26 * Note that this version of the function requires that the 8 previous stereo
27 * samples are visible and correct. In other words, it ignores the "samples_*"
28 * fields in the decorr_pass structure and gets the history data directly
29 * from the buffer. It does, however, return the appropriate history samples
30 * to the decorr_pass structure before returning.
31 *
32 * This is written to work on a MCF5249 processor, or any processor based on
33 * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for
34 * the "apply_weight" function of WavPack decorrelation because it provides
35 * the requires 40-bit product. The fractional rounding mode of the EMAC is not
36 * configurable and uses "round to even" while WavPack uses "round to larger",
37 * so the rounding has to be done manually.
38 */
39
40 .text
41 .align 2
42 .global decorr_stereo_pass_cont_mcf5249
43
44decorr_stereo_pass_cont_mcf5249:
45
46 lea (-44, %sp), %sp
47 movem.l %d2-%d7/%a2-%a6, (%sp)
48 move.l 44+4(%sp), %a2 | a2 = dpp->
49 move.l 44+8(%sp), %a1 | a1 = bptr
50 move.w 2(%a2), %a3 | a3 = dpp->delta
51 move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended)
52 ext.l %d3
53 move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended)
54 ext.l %d4
55 move.l 44+12(%sp), %d0 | d0 = sample_count
56 jbeq return_only | if zero, nothing to do
57
58 lsl.l #3, %d0 | d5 = bptr + (sample_count * 8)
59 move.l %d0, %d5
60 add.l %a1, %d5
61
62 moveq.l #17, %d0 | left shift weights & delta 17 places
63 asl.l %d0, %d3
64 asl.l %d0, %d4
65 move.l %a3, %d1
66 asl.l %d0, %d1
67 move.l %d1, %a3
68
69 move.l #0x20, %macsr | set fractional mode for MAC
70 move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding)
71 move.l #0x800000, %accext01
72
73 move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits
74 move.l #-1024<<17, %d7 | (only used by negative terms)
75
76 move.w (%a2), %d0 | d0 = term
77 ext.l %d0
78 cmp.l #17, %d0
79 jbeq term_17 | term = 17
80 cmp.l #18, %d0
81 jbeq term_18 | term = 18
82 addq.l #1, %d0
83 jbeq term_minus_1 | term = -1
84 addq.l #1, %d0
85 jbeq term_minus_2 | term = -2
86 addq.l #1, %d0
87 jbeq term_minus_3 | term = -3
88 jbra term_default | default term = 1 - 8
89
90|------------------------------------------------------------------------------
91| Loop to handle term = 17 condition
92|
93| a0 = d0 = (2 * bptr [-1]) - bptr [-2]
94| a1 = bptr d1 = initial bptr [0]
95| a2 = dpp-> d2 = updated bptr [0]
96| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
97| a4 = d4 = dpp->weight_B << 17
98| a5 = d5 = eptr
99| macsr = 0x20 acc1 = 0x00 0000 80
100|------------------------------------------------------------------------------
101
102term_17:
103 move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2]
104 add.l %d0, %d0
105 sub.l -16(%a1), %d0
106 beq .L251 | if zero, skip calculation
107 move.l %acc1, %acc0
108 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
109 mac.l %d0, %d3, %acc0
110 move.l (%a1), %d1
111 beq .L255
112 eor.l %d1, %d0 | else compare signs
113 bge .L256 | if same, add delta to weight
114 sub.l %a3, %d3 | else subtract delta from weight
115 sub.l %a3, %d3 | subtract again instead of branch
116.L256: add.l %a3, %d3 | add delta to weight
117
118.L255: move.l %acc0, %d2 | d2 = rounded product
119 add.l %d1, %d2 | update bptr [0] and store
120 move.l %d2, (%a1)+
121
122.L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2]
123 add.l %d0, %d0
124 sub.l -16(%a1), %d0
125 beq .L257 | if zero, skip calculations
126 move.l %acc1, %acc0
127 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
128 mac.l %d0, %d4, %acc0
129 move.l (%a1), %d1
130 beq .L254
131 eor.l %d1, %d0 | else compare signs
132 bge .L259 | if same, add delta to weight
133 sub.l %a3, %d4 | else subtract delta from weight
134 sub.l %a3, %d4 | subtract again instead of branch
135.L259: add.l %a3, %d4 | add delta to weight
136
137.L254: move.l %acc0, %d2 | d2 = rounded product
138 add.l %d1, %d2 | update bptr [0] and store
139 move.l %d2, (%a1)+
140
141.L252: cmp.l %a1, %d5 | loop if bptr < eptr
142 jbhi term_17
143 bra term_17_18_finish | exit through common path
144
145.L251: addq.l #4, %a1 | update point and jump back into loop
146 bra .L253
147
148.L257: addq.l #4, %a1 | update point and jump back into loop
149 bra .L252
150
151|------------------------------------------------------------------------------
152| Loop to handle term = 18 condition
153|
154| a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1
155| a1 = bptr d1 = initial bptr [0]
156| a2 = dpp-> d2 = updated bptr [0]
157| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
158| a4 = d4 = dpp->weight_B << 17
159| a5 = d5 = eptr
160| macsr = 0x20 acc1 = 0x00 0000 80
161|------------------------------------------------------------------------------
162
163term_18:
164 move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
165 lea (%a0,%a0.l*2), %a0
166 move.l %a0, %d0
167 sub.l -16(%a1), %d0
168 asr.l #1, %d0
169 beq .L260
170 move.l %acc1, %acc0
171 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
172 mac.l %d0, %d3, %acc0
173 move.l (%a1), %d1
174 beq .L266
175 eor.l %d1, %d0 | else compare signs
176 bge .L267 | if same, add delta to weight
177 sub.l %a3, %d3 | else subtract delta from weight
178 sub.l %a3, %d3 | subtract again instead of branch
179.L267: add.l %a3, %d3 | add delta to weight
180
181.L266: move.l %acc0, %d2 | d2 = rounded product
182 add.l %d1, %d2 | add applied weight to bptr [0], store
183 move.l %d2, (%a1)+
184
185.L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
186 lea (%a0,%a0.l*2), %a0
187 move.l %a0, %d0
188 sub.l -16(%a1), %d0
189 asr.l #1, %d0
190 beq .L261
191 move.l %acc1, %acc0
192 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
193 mac.l %d0, %d4, %acc0
194 move.l (%a1), %d1
195 beq .L265
196 eor.l %d1, %d0 | else compare signs
197 bge .L270 | if same, add delta to weight
198 sub.l %a3, %d4 | else subtract delta from weight
199 sub.l %a3, %d4 | subtract again instead of branch
200.L270: add.l %a3, %d4 | add delta to weight
201
202.L265: move.l %acc0, %d2 | d2 = rounded product
203 add.l %d1, %d2 | add applied weight to bptr [0], store
204 move.l %d2, (%a1)+
205
206.L269: cmp.l %a1, %d5 | loop if bptr < eptr
207 jbhi term_18
208 bra term_17_18_finish | exit through common path
209
210.L260: addq.l #4, %a1 | bump pointer and jump back into loop
211 bra .L268
212
213.L261: addq.l #4, %a1 | bump pointer and jump back into loop
214 bra .L269
215
216term_17_18_finish:
217 move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1]
218 move.l -8(%a1), 8(%a2)
219 move.l -12(%a1), 44(%a2)
220 move.l -16(%a1), 12(%a2)
221 jbra finish_up
222
223|------------------------------------------------------------------------------
224| Loop to handle default terms (i.e. 1 - 8)
225|
226| a0 = tptr d0 = tptr [0]
227| a1 = bptr d1 = initial bptr [0]
228| a2 = dpp-> d2 = updated bptr [0]
229| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
230| a4 = d4 = dpp->weight_B << 17
231| a5 = d5 = eptr
232| macsr = 0x20 acc1 = 0x00 0000 80
233|------------------------------------------------------------------------------
234
235term_default:
236 move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8)
237 ext.l %d0
238 lsl.l #3, %d0
239 move.l %a1, %a0
240 sub.l %d0, %a0
241
242term_default_loop:
243 move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero
244 beq .L271
245 move.l %acc1, %acc0
246 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
247 mac.l %d0, %d3, %acc0
248 move.l (%a1), %d1
249 beq .L277
250 eor.l %d1, %d0 | else compare signs
251 bge .L278 | if same, add delta to weight
252 sub.l %a3, %d3 | else subtract delta from weight
253 sub.l %a3, %d3 | subtract again instead of branch
254.L278: add.l %a3, %d3 | add delta to weight
255
256.L277: move.l %acc0, %d2 | d2 = rounded product
257 add.l %d1, %d2 | add applied weight to bptr [0], store
258 move.l %d2, (%a1)+
259
260.L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero
261 beq .L272
262 move.l %acc1, %acc0
263 asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
264 mac.l %d0, %d4, %acc0
265 move.l (%a1), %d1
266 beq .L276
267 eor.l %d1, %d0 | else compare signs
268 bge .L281 | if same, add delta to weight
269 sub.l %a3, %d4 | else subtract delta from weight
270 sub.l %a3, %d4 | subtract again instead of branch
271.L281: add.l %a3, %d4 | add delta to weight
272
273.L276: move.l %acc0, %d2 | d2 = rounded product
274 add.l %d1, %d2 | add applied weight to bptr [0], store
275 move.l %d2, (%a1)+
276
277.L274: cmp.l %a1, %d5 | loop back if bptr < eptr
278 jbhi term_default_loop
279 move.w (%a2), %d0 | d0 = term - 1
280 moveq.l #8, %d1 | d1 = loop counter
281
282.L323: subq.l #1, %d0 | back up & mask index
283 and.l #7, %d0
284 move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0]
285 move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0]
286 subq.l #1, %d1 | loop on count
287 jbne .L323
288 jbra finish_up
289
290.L271: addq.l #4, %a1 | bump pointer and jump back into loop
291 bra .L275
292
293.L272: addq.l #4, %a1 | bump pointer and jump back into loop
294 bra .L274
295
296
297|------------------------------------------------------------------------------
298| Loop to handle term = -1 condition
299|
300| a0 = d0 = decorrelation sample
301| a1 = bptr d1 = initial bptr [0]
302| a2 = dpp-> d2 = updated bptr [0]
303| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
304| a4 = d4 = dpp->weight_B << 17
305| a5 = d5 = eptr
306| a6 = d6 = 1024 << 17
307| a7 = d7 = -1024 << 17
308| macsr = 0x20 acc1 = 0x00 0000 80
309|------------------------------------------------------------------------------
310
311term_minus_1:
312 move.l -4(%a1), %d0 | d0 = bptr [-1]
313 beq .L402
314 move.l %acc1, %acc0
315 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
316 mac.l %d0, %d3, %acc0
317 move.l (%a1), %d1
318 beq .L405
319 eor.l %d1, %d0 | else compare signs
320 bge .L404 | if same, add delta to weight
321 sub.l %a3, %d3 | else subtract delta from weight
322 cmp.l %d7, %d3 | check for negative clip limit
323 bge .L405
324 move.l %d7, %d3
325 bra .L405
326
327.L404: add.l %a3, %d3 | add delta to weight
328 cmp.l %d6, %d3 | check for positive clip limit
329 ble .L405
330 move.l %d6, %d3
331
332.L405: move.l %acc0, %d0 | d2 = rounded product
333 add.l %d1, %d0 | add applied weight to bptr [0], store
334 move.l %d0, (%a1)+
335 beq .L401
336
337.L410: move.l %acc1, %acc0
338 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
339 mac.l %d0, %d4, %acc0
340 move.l (%a1), %d1
341 beq .L403
342 eor.l %d1, %d0 | else compare signs
343 bge .L407 | if same, add delta to weight
344 sub.l %a3, %d4 | else subtract delta from weight
345 cmp.l %d7, %d4 | check for negative clip limit
346 bge .L403
347 move.l %d7, %d4
348 bra .L403
349
350.L407: add.l %a3, %d4 | add delta to weight
351 cmp.l %d6, %d4 | check for positive clip limit
352 ble .L403
353 move.l %d6, %d4
354
355.L403: move.l %acc0, %d2 | d2 = rounded product
356 add.l %d1, %d2 | add applied weight to bptr [1], store
357 move.l %d2, (%a1)+
358
359.L411: cmp.l %a1, %d5 | loop back if bptr < eptr
360 jbhi term_minus_1
361 move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1]
362 jbra finish_up
363
364.L402: move.l (%a1)+, %d0
365 bne .L410
366
367.L401: addq.l #4, %a1
368 bra .L411
369
370
371|------------------------------------------------------------------------------
372| Loop to handle term = -2 condition
373|
374| a0 = d0 = decorrelation sample
375| a1 = bptr d1 = initial bptr [0]
376| a2 = dpp-> d2 = updated bptr [0]
377| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
378| a4 = d4 = dpp->weight_B << 17
379| a5 = d5 = eptr
380| a6 = d6 = 1024 << 17
381| a7 = d7 = -1024 << 17
382| macsr = 0x20 acc1 = 0x00 0000 80
383|------------------------------------------------------------------------------
384
385term_minus_2:
386 move.l -8(%a1), %d0 | d0 = bptr [-2]
387 beq .L511
388 move.l %acc1, %acc0
389 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
390 mac.l %d0, %d4, %acc0
391 move.l 4(%a1), %d1
392 beq .L505
393 eor.l %d1, %d0 | else compare signs
394 bge .L504 | if same, add delta to weight
395 sub.l %a3, %d4 | else subtract delta from weight
396 cmp.l %d7, %d4 | ckeck for negative clip limit
397 bge .L505
398 move.l %d7, %d4
399 bra .L505
400
401.L504: add.l %a3, %d4 | add delta to weight
402 cmp.l %d6, %d4 | check for positive clip limit
403 ble .L505
404 move.l %d6, %d4
405
406.L505: move.l %acc0, %d0 | d2 = rounded product
407 add.l %d1, %d0 | add applied weight to bptr [0], store
408 move.l %d0, 4(%a1)
409 beq .L512
410
411.L510: move.l %acc1, %acc0
412 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
413 mac.l %d0, %d3, %acc0
414 move.l (%a1), %d1
415 beq .L503
416 eor.l %d1, %d0 | else compare signs
417 bge .L507 | if same, add delta to weight
418 sub.l %a3, %d3 | else subtract delta from weight
419 cmp.l %d7, %d3 | check for negative clip limit
420 bge .L503
421 move.l %d7, %d3
422 bra .L503
423
424.L507: add.l %a3, %d3 | add delta to weight
425 cmp.l %d6, %d3 | check for negative clip limit
426 ble .L503
427 move.l %d6, %d3
428
429.L503: move.l %acc0, %d2 | d2 = rounded product
430 add.l %d1, %d2 | add applied weight to bptr [1], store
431 move.l %d2, (%a1)
432
433.L512: addq.l #8, %a1
434 cmp.l %a1, %d5 | loop if bptr < eptr
435 jbhi term_minus_2
436 move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4]
437 jbra finish_up
438
439.L511: move.l 4(%a1), %d0
440 beq .L512
441 bra .L510
442
443
444|------------------------------------------------------------------------------
445| Loop to handle term = -3 condition
446|
447| a0 = d0 = decorrelation sample
448| a1 = bptr d1 = initial bptr [0]
449| a2 = dpp-> d2 = updated bptr [0]
450| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
451| a4 = d4 = dpp->weight_B << 17
452| a5 = d5 = eptr
453| a6 = d6 = 1024 << 17
454| a7 = d7 = -1024 << 17
455| macsr = 0x20 acc1 = 0x00 0000 80
456|------------------------------------------------------------------------------
457
458term_minus_3:
459 move.l -4(%a1), %d0 | d0 = bptr [-1]
460 beq .L301
461 move.l %acc1, %acc0
462 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
463 mac.l %d0, %d3, %acc0
464 move.l (%a1), %d1
465 beq .L320
466 eor.l %d1, %d0 | else compare signs
467 bge .L319 | if same, add delta to weight
468 sub.l %a3, %d3 | else subtract delta from weight
469 cmp.l %d7, %d3 | check for negative clip limit
470 bge .L320
471 move.l %d7, %d3
472 bra .L320
473
474.L319: add.l %a3, %d3 | add delta to weight
475 cmp.l %d6, %d3 | check for positive clip limit
476 ble .L320
477 move.l %d6, %d3
478
479.L320: move.l %acc0, %d2 | d2 = rounded product
480 add.l %d1, %d2 | add applied weight to bptr [0], store
481 move.l %d2, (%a1)+
482
483.L330: move.l -12(%a1), %d0 | d0 = bptr [-2]
484 beq .L302
485 move.l %acc1, %acc0
486 asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
487 mac.l %d0, %d4, %acc0
488 move.l (%a1), %d1
489 beq .L318
490 eor.l %d1, %d0 | else compare signs
491 bge .L322 | if same, add delta to weight
492 sub.l %a3, %d4 | else subtract delta from weight
493 cmp.l %d7, %d4 | check for negative clip limit
494 bge .L318
495 move.l %d7, %d4
496 bra .L318
497
498.L322: add.l %a3, %d4 | add delta to weight
499 cmp.l %d6, %d4 | check for positive clip limit
500 ble .L318
501 move.l %d6, %d4
502
503.L318: move.l %acc0, %d2 | d2 = rounded product
504 add.l %d1, %d2 | add applied weight to bptr [1], store
505 move.l %d2, (%a1)+
506
507.L331: cmp.l %a1, %d5 | bptr, eptr
508 jbhi term_minus_3
509 move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1]
510 move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2]
511 jbra finish_up
512
513.L301: addq.l #4, %a1
514 bra .L330
515
516.L302: addq.l #4, %a1
517 bra .L331
518
519| finish and return
520
521finish_up:
522 moveq.l #17, %d0
523 asr.l %d0, %d3
524 asr.l %d0, %d4
525 move.w %d3, 4(%a2) | weight_A, dpp->weight_A
526 move.w %d4, 6(%a2) | weight_B, dpp->weight_B
527
528 clr.l %d0 | clear up EMAC
529 move.l %d0, %acc0
530 move.l %d0, %acc1
531
532return_only:
533 movem.l (%sp), %d2-%d7/%a2-%a6
534 lea (44,%sp), %sp
535 rts