diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/libwavpack/coldfire.S | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libwavpack/coldfire.S')
-rw-r--r-- | lib/rbcodec/codecs/libwavpack/coldfire.S | 537 |
1 files changed, 537 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libwavpack/coldfire.S b/lib/rbcodec/codecs/libwavpack/coldfire.S new file mode 100644 index 0000000000..884a0ac90f --- /dev/null +++ b/lib/rbcodec/codecs/libwavpack/coldfire.S | |||
@@ -0,0 +1,537 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2005 by David Bryant | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | /* This is an assembly optimized version of the following WavPack function: | ||
23 | * | ||
24 | * void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, | ||
25 | * long *buffer, long sample_count); | ||
26 | * | ||
27 | * It performs a single pass of stereo decorrelation on the provided buffer. | ||
28 | * Note that this version of the function requires that the 8 previous stereo | ||
29 | * samples are visible and correct. In other words, it ignores the "samples_*" | ||
30 | * fields in the decorr_pass structure and gets the history data directly | ||
31 | * from the buffer. It does, however, return the appropriate history samples | ||
32 | * to the decorr_pass structure before returning. | ||
33 | * | ||
34 | * This is written to work on a MCF5249 processor, or any processor based on | ||
35 | * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for | ||
36 | * the "apply_weight" function of WavPack decorrelation because it provides | ||
37 | * the requires 40-bit product. The fractional rounding mode of the EMAC is not | ||
38 | * configurable and uses "round to even" while WavPack uses "round to larger", | ||
39 | * so the rounding has to be done manually. | ||
40 | */ | ||
41 | |||
42 | .text | ||
43 | .align 2 | ||
44 | .global decorr_stereo_pass_cont_mcf5249 | ||
45 | |||
46 | decorr_stereo_pass_cont_mcf5249: | ||
47 | |||
48 | lea (-44, %sp), %sp | ||
49 | movem.l %d2-%d7/%a2-%a6, (%sp) | ||
50 | move.l 44+4(%sp), %a2 | a2 = dpp-> | ||
51 | move.l 44+8(%sp), %a1 | a1 = bptr | ||
52 | move.w 2(%a2), %a3 | a3 = dpp->delta | ||
53 | move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended) | ||
54 | ext.l %d3 | ||
55 | move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended) | ||
56 | ext.l %d4 | ||
57 | move.l 44+12(%sp), %d0 | d0 = sample_count | ||
58 | jbeq return_only | if zero, nothing to do | ||
59 | |||
60 | lsl.l #3, %d0 | d5 = bptr + (sample_count * 8) | ||
61 | move.l %d0, %d5 | ||
62 | add.l %a1, %d5 | ||
63 | |||
64 | moveq.l #17, %d0 | left shift weights & delta 17 places | ||
65 | asl.l %d0, %d3 | ||
66 | asl.l %d0, %d4 | ||
67 | move.l %a3, %d1 | ||
68 | asl.l %d0, %d1 | ||
69 | move.l %d1, %a3 | ||
70 | |||
71 | moveq.l #0x20, %d6 | ||
72 | move.l %d6, %macsr | set fractional mode for MAC | ||
73 | move.l #0x800000, %accext01 | acc1 = 0x00 0000 80 (for rounding) | ||
74 | |||
75 | move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits | ||
76 | move.l #-1024<<17, %d7 | (only used by negative terms) | ||
77 | |||
78 | move.w (%a2), %d0 | d0 = term | ||
79 | ext.l %d0 | ||
80 | cmp.l #17, %d0 | ||
81 | jbeq term_17 | term = 17 | ||
82 | cmp.l #18, %d0 | ||
83 | jbeq term_18 | term = 18 | ||
84 | addq.l #1, %d0 | ||
85 | jbeq term_minus_1 | term = -1 | ||
86 | addq.l #1, %d0 | ||
87 | jbeq term_minus_2 | term = -2 | ||
88 | addq.l #1, %d0 | ||
89 | jbeq term_minus_3 | term = -3 | ||
90 | jbra term_default | default term = 1 - 8 | ||
91 | |||
92 | |------------------------------------------------------------------------------ | ||
93 | | Loop to handle term = 17 condition | ||
94 | | | ||
95 | | a0 = d0 = (2 * bptr [-1]) - bptr [-2] | ||
96 | | a1 = bptr d1 = initial bptr [0] | ||
97 | | a2 = dpp-> d2 = updated bptr [0] | ||
98 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
99 | | a4 = d4 = dpp->weight_B << 17 | ||
100 | | a5 = d5 = eptr | ||
101 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
102 | |------------------------------------------------------------------------------ | ||
103 | |||
104 | term_17: | ||
105 | move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] | ||
106 | add.l %d0, %d0 | ||
107 | sub.l -16(%a1), %d0 | ||
108 | beq .L251 | if zero, skip calculation | ||
109 | move.l %acc1, %acc0 | ||
110 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
111 | mac.l %d0, %d3, %acc0 | ||
112 | move.l (%a1), %d1 | ||
113 | beq .L255 | ||
114 | eor.l %d1, %d0 | else compare signs | ||
115 | bge .L256 | if same, add delta to weight | ||
116 | sub.l %a3, %d3 | else subtract delta from weight | ||
117 | sub.l %a3, %d3 | subtract again instead of branch | ||
118 | .L256: add.l %a3, %d3 | add delta to weight | ||
119 | |||
120 | .L255: move.l %acc0, %d2 | d2 = rounded product | ||
121 | add.l %d1, %d2 | update bptr [0] and store | ||
122 | move.l %d2, (%a1)+ | ||
123 | |||
124 | .L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] | ||
125 | add.l %d0, %d0 | ||
126 | sub.l -16(%a1), %d0 | ||
127 | beq .L257 | if zero, skip calculations | ||
128 | move.l %acc1, %acc0 | ||
129 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
130 | mac.l %d0, %d4, %acc0 | ||
131 | move.l (%a1), %d1 | ||
132 | beq .L254 | ||
133 | eor.l %d1, %d0 | else compare signs | ||
134 | bge .L259 | if same, add delta to weight | ||
135 | sub.l %a3, %d4 | else subtract delta from weight | ||
136 | sub.l %a3, %d4 | subtract again instead of branch | ||
137 | .L259: add.l %a3, %d4 | add delta to weight | ||
138 | |||
139 | .L254: move.l %acc0, %d2 | d2 = rounded product | ||
140 | add.l %d1, %d2 | update bptr [0] and store | ||
141 | move.l %d2, (%a1)+ | ||
142 | |||
143 | .L252: cmp.l %a1, %d5 | loop if bptr < eptr | ||
144 | jbhi term_17 | ||
145 | bra term_17_18_finish | exit through common path | ||
146 | |||
147 | .L251: addq.l #4, %a1 | update point and jump back into loop | ||
148 | bra .L253 | ||
149 | |||
150 | .L257: addq.l #4, %a1 | update point and jump back into loop | ||
151 | bra .L252 | ||
152 | |||
153 | |------------------------------------------------------------------------------ | ||
154 | | Loop to handle term = 18 condition | ||
155 | | | ||
156 | | a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1 | ||
157 | | a1 = bptr d1 = initial bptr [0] | ||
158 | | a2 = dpp-> d2 = updated bptr [0] | ||
159 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
160 | | a4 = d4 = dpp->weight_B << 17 | ||
161 | | a5 = d5 = eptr | ||
162 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
163 | |------------------------------------------------------------------------------ | ||
164 | |||
165 | term_18: | ||
166 | move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 | ||
167 | lea (%a0,%a0.l*2), %a0 | ||
168 | move.l %a0, %d0 | ||
169 | sub.l -16(%a1), %d0 | ||
170 | asr.l #1, %d0 | ||
171 | beq .L260 | ||
172 | move.l %acc1, %acc0 | ||
173 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
174 | mac.l %d0, %d3, %acc0 | ||
175 | move.l (%a1), %d1 | ||
176 | beq .L266 | ||
177 | eor.l %d1, %d0 | else compare signs | ||
178 | bge .L267 | if same, add delta to weight | ||
179 | sub.l %a3, %d3 | else subtract delta from weight | ||
180 | sub.l %a3, %d3 | subtract again instead of branch | ||
181 | .L267: add.l %a3, %d3 | add delta to weight | ||
182 | |||
183 | .L266: move.l %acc0, %d2 | d2 = rounded product | ||
184 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
185 | move.l %d2, (%a1)+ | ||
186 | |||
187 | .L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 | ||
188 | lea (%a0,%a0.l*2), %a0 | ||
189 | move.l %a0, %d0 | ||
190 | sub.l -16(%a1), %d0 | ||
191 | asr.l #1, %d0 | ||
192 | beq .L261 | ||
193 | move.l %acc1, %acc0 | ||
194 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
195 | mac.l %d0, %d4, %acc0 | ||
196 | move.l (%a1), %d1 | ||
197 | beq .L265 | ||
198 | eor.l %d1, %d0 | else compare signs | ||
199 | bge .L270 | if same, add delta to weight | ||
200 | sub.l %a3, %d4 | else subtract delta from weight | ||
201 | sub.l %a3, %d4 | subtract again instead of branch | ||
202 | .L270: add.l %a3, %d4 | add delta to weight | ||
203 | |||
204 | .L265: move.l %acc0, %d2 | d2 = rounded product | ||
205 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
206 | move.l %d2, (%a1)+ | ||
207 | |||
208 | .L269: cmp.l %a1, %d5 | loop if bptr < eptr | ||
209 | jbhi term_18 | ||
210 | bra term_17_18_finish | exit through common path | ||
211 | |||
212 | .L260: addq.l #4, %a1 | bump pointer and jump back into loop | ||
213 | bra .L268 | ||
214 | |||
215 | .L261: addq.l #4, %a1 | bump pointer and jump back into loop | ||
216 | bra .L269 | ||
217 | |||
218 | term_17_18_finish: | ||
219 | move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1] | ||
220 | move.l -8(%a1), 8(%a2) | ||
221 | move.l -12(%a1), 44(%a2) | ||
222 | move.l -16(%a1), 12(%a2) | ||
223 | jbra finish_up | ||
224 | |||
225 | |------------------------------------------------------------------------------ | ||
226 | | Loop to handle default terms (i.e. 1 - 8) | ||
227 | | | ||
228 | | a0 = tptr d0 = tptr [0] | ||
229 | | a1 = bptr d1 = initial bptr [0] | ||
230 | | a2 = dpp-> d2 = updated bptr [0] | ||
231 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
232 | | a4 = d4 = dpp->weight_B << 17 | ||
233 | | a5 = d5 = eptr | ||
234 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
235 | |------------------------------------------------------------------------------ | ||
236 | |||
237 | term_default: | ||
238 | move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8) | ||
239 | ext.l %d0 | ||
240 | lsl.l #3, %d0 | ||
241 | move.l %a1, %a0 | ||
242 | sub.l %d0, %a0 | ||
243 | |||
244 | term_default_loop: | ||
245 | move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero | ||
246 | beq .L271 | ||
247 | move.l %acc1, %acc0 | ||
248 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A | ||
249 | mac.l %d0, %d3, %acc0 | ||
250 | move.l (%a1), %d1 | ||
251 | beq .L277 | ||
252 | eor.l %d1, %d0 | else compare signs | ||
253 | bge .L278 | if same, add delta to weight | ||
254 | sub.l %a3, %d3 | else subtract delta from weight | ||
255 | sub.l %a3, %d3 | subtract again instead of branch | ||
256 | .L278: add.l %a3, %d3 | add delta to weight | ||
257 | |||
258 | .L277: move.l %acc0, %d2 | d2 = rounded product | ||
259 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
260 | move.l %d2, (%a1)+ | ||
261 | |||
262 | .L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero | ||
263 | beq .L272 | ||
264 | move.l %acc1, %acc0 | ||
265 | asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B | ||
266 | mac.l %d0, %d4, %acc0 | ||
267 | move.l (%a1), %d1 | ||
268 | beq .L276 | ||
269 | eor.l %d1, %d0 | else compare signs | ||
270 | bge .L281 | if same, add delta to weight | ||
271 | sub.l %a3, %d4 | else subtract delta from weight | ||
272 | sub.l %a3, %d4 | subtract again instead of branch | ||
273 | .L281: add.l %a3, %d4 | add delta to weight | ||
274 | |||
275 | .L276: move.l %acc0, %d2 | d2 = rounded product | ||
276 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
277 | move.l %d2, (%a1)+ | ||
278 | |||
279 | .L274: cmp.l %a1, %d5 | loop back if bptr < eptr | ||
280 | jbhi term_default_loop | ||
281 | move.w (%a2), %d0 | d0 = term - 1 | ||
282 | moveq.l #8, %d1 | d1 = loop counter | ||
283 | |||
284 | .L323: subq.l #1, %d0 | back up & mask index | ||
285 | and.l #7, %d0 | ||
286 | move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0] | ||
287 | move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0] | ||
288 | subq.l #1, %d1 | loop on count | ||
289 | jbne .L323 | ||
290 | jbra finish_up | ||
291 | |||
292 | .L271: addq.l #4, %a1 | bump pointer and jump back into loop | ||
293 | bra .L275 | ||
294 | |||
295 | .L272: addq.l #4, %a1 | bump pointer and jump back into loop | ||
296 | bra .L274 | ||
297 | |||
298 | |||
299 | |------------------------------------------------------------------------------ | ||
300 | | Loop to handle term = -1 condition | ||
301 | | | ||
302 | | a0 = d0 = decorrelation sample | ||
303 | | a1 = bptr d1 = initial bptr [0] | ||
304 | | a2 = dpp-> d2 = updated bptr [0] | ||
305 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
306 | | a4 = d4 = dpp->weight_B << 17 | ||
307 | | a5 = d5 = eptr | ||
308 | | a6 = d6 = 1024 << 17 | ||
309 | | a7 = d7 = -1024 << 17 | ||
310 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
311 | |------------------------------------------------------------------------------ | ||
312 | |||
313 | term_minus_1: | ||
314 | move.l -4(%a1), %d0 | d0 = bptr [-1] | ||
315 | beq .L402 | ||
316 | move.l %acc1, %acc0 | ||
317 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
318 | mac.l %d0, %d3, %acc0 | ||
319 | move.l (%a1), %d1 | ||
320 | beq .L405 | ||
321 | eor.l %d1, %d0 | else compare signs | ||
322 | bge .L404 | if same, add delta to weight | ||
323 | sub.l %a3, %d3 | else subtract delta from weight | ||
324 | cmp.l %d7, %d3 | check for negative clip limit | ||
325 | bge .L405 | ||
326 | move.l %d7, %d3 | ||
327 | bra .L405 | ||
328 | |||
329 | .L404: add.l %a3, %d3 | add delta to weight | ||
330 | cmp.l %d6, %d3 | check for positive clip limit | ||
331 | ble .L405 | ||
332 | move.l %d6, %d3 | ||
333 | |||
334 | .L405: move.l %acc0, %d0 | d2 = rounded product | ||
335 | add.l %d1, %d0 | add applied weight to bptr [0], store | ||
336 | move.l %d0, (%a1)+ | ||
337 | beq .L401 | ||
338 | |||
339 | .L410: move.l %acc1, %acc0 | ||
340 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
341 | mac.l %d0, %d4, %acc0 | ||
342 | move.l (%a1), %d1 | ||
343 | beq .L403 | ||
344 | eor.l %d1, %d0 | else compare signs | ||
345 | bge .L407 | if same, add delta to weight | ||
346 | sub.l %a3, %d4 | else subtract delta from weight | ||
347 | cmp.l %d7, %d4 | check for negative clip limit | ||
348 | bge .L403 | ||
349 | move.l %d7, %d4 | ||
350 | bra .L403 | ||
351 | |||
352 | .L407: add.l %a3, %d4 | add delta to weight | ||
353 | cmp.l %d6, %d4 | check for positive clip limit | ||
354 | ble .L403 | ||
355 | move.l %d6, %d4 | ||
356 | |||
357 | .L403: move.l %acc0, %d2 | d2 = rounded product | ||
358 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
359 | move.l %d2, (%a1)+ | ||
360 | |||
361 | .L411: cmp.l %a1, %d5 | loop back if bptr < eptr | ||
362 | jbhi term_minus_1 | ||
363 | move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] | ||
364 | jbra finish_up | ||
365 | |||
366 | .L402: move.l (%a1)+, %d0 | ||
367 | bne .L410 | ||
368 | |||
369 | .L401: addq.l #4, %a1 | ||
370 | bra .L411 | ||
371 | |||
372 | |||
373 | |------------------------------------------------------------------------------ | ||
374 | | Loop to handle term = -2 condition | ||
375 | | | ||
376 | | a0 = d0 = decorrelation sample | ||
377 | | a1 = bptr d1 = initial bptr [0] | ||
378 | | a2 = dpp-> d2 = updated bptr [0] | ||
379 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
380 | | a4 = d4 = dpp->weight_B << 17 | ||
381 | | a5 = d5 = eptr | ||
382 | | a6 = d6 = 1024 << 17 | ||
383 | | a7 = d7 = -1024 << 17 | ||
384 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
385 | |------------------------------------------------------------------------------ | ||
386 | |||
387 | term_minus_2: | ||
388 | move.l -8(%a1), %d0 | d0 = bptr [-2] | ||
389 | beq .L511 | ||
390 | move.l %acc1, %acc0 | ||
391 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
392 | mac.l %d0, %d4, %acc0 | ||
393 | move.l 4(%a1), %d1 | ||
394 | beq .L505 | ||
395 | eor.l %d1, %d0 | else compare signs | ||
396 | bge .L504 | if same, add delta to weight | ||
397 | sub.l %a3, %d4 | else subtract delta from weight | ||
398 | cmp.l %d7, %d4 | ckeck for negative clip limit | ||
399 | bge .L505 | ||
400 | move.l %d7, %d4 | ||
401 | bra .L505 | ||
402 | |||
403 | .L504: add.l %a3, %d4 | add delta to weight | ||
404 | cmp.l %d6, %d4 | check for positive clip limit | ||
405 | ble .L505 | ||
406 | move.l %d6, %d4 | ||
407 | |||
408 | .L505: move.l %acc0, %d0 | d2 = rounded product | ||
409 | add.l %d1, %d0 | add applied weight to bptr [0], store | ||
410 | move.l %d0, 4(%a1) | ||
411 | beq .L512 | ||
412 | |||
413 | .L510: move.l %acc1, %acc0 | ||
414 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
415 | mac.l %d0, %d3, %acc0 | ||
416 | move.l (%a1), %d1 | ||
417 | beq .L503 | ||
418 | eor.l %d1, %d0 | else compare signs | ||
419 | bge .L507 | if same, add delta to weight | ||
420 | sub.l %a3, %d3 | else subtract delta from weight | ||
421 | cmp.l %d7, %d3 | check for negative clip limit | ||
422 | bge .L503 | ||
423 | move.l %d7, %d3 | ||
424 | bra .L503 | ||
425 | |||
426 | .L507: add.l %a3, %d3 | add delta to weight | ||
427 | cmp.l %d6, %d3 | check for negative clip limit | ||
428 | ble .L503 | ||
429 | move.l %d6, %d3 | ||
430 | |||
431 | .L503: move.l %acc0, %d2 | d2 = rounded product | ||
432 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
433 | move.l %d2, (%a1) | ||
434 | |||
435 | .L512: addq.l #8, %a1 | ||
436 | cmp.l %a1, %d5 | loop if bptr < eptr | ||
437 | jbhi term_minus_2 | ||
438 | move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4] | ||
439 | jbra finish_up | ||
440 | |||
441 | .L511: move.l 4(%a1), %d0 | ||
442 | beq .L512 | ||
443 | bra .L510 | ||
444 | |||
445 | |||
446 | |------------------------------------------------------------------------------ | ||
447 | | Loop to handle term = -3 condition | ||
448 | | | ||
449 | | a0 = d0 = decorrelation sample | ||
450 | | a1 = bptr d1 = initial bptr [0] | ||
451 | | a2 = dpp-> d2 = updated bptr [0] | ||
452 | | a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 | ||
453 | | a4 = d4 = dpp->weight_B << 17 | ||
454 | | a5 = d5 = eptr | ||
455 | | a6 = d6 = 1024 << 17 | ||
456 | | a7 = d7 = -1024 << 17 | ||
457 | | macsr = 0x20 acc1 = 0x00 0000 80 | ||
458 | |------------------------------------------------------------------------------ | ||
459 | |||
460 | term_minus_3: | ||
461 | move.l -4(%a1), %d0 | d0 = bptr [-1] | ||
462 | beq .L301 | ||
463 | move.l %acc1, %acc0 | ||
464 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) | ||
465 | mac.l %d0, %d3, %acc0 | ||
466 | move.l (%a1), %d1 | ||
467 | beq .L320 | ||
468 | eor.l %d1, %d0 | else compare signs | ||
469 | bge .L319 | if same, add delta to weight | ||
470 | sub.l %a3, %d3 | else subtract delta from weight | ||
471 | cmp.l %d7, %d3 | check for negative clip limit | ||
472 | bge .L320 | ||
473 | move.l %d7, %d3 | ||
474 | bra .L320 | ||
475 | |||
476 | .L319: add.l %a3, %d3 | add delta to weight | ||
477 | cmp.l %d6, %d3 | check for positive clip limit | ||
478 | ble .L320 | ||
479 | move.l %d6, %d3 | ||
480 | |||
481 | .L320: move.l %acc0, %d2 | d2 = rounded product | ||
482 | add.l %d1, %d2 | add applied weight to bptr [0], store | ||
483 | move.l %d2, (%a1)+ | ||
484 | |||
485 | .L330: move.l -12(%a1), %d0 | d0 = bptr [-2] | ||
486 | beq .L302 | ||
487 | move.l %acc1, %acc0 | ||
488 | asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) | ||
489 | mac.l %d0, %d4, %acc0 | ||
490 | move.l (%a1), %d1 | ||
491 | beq .L318 | ||
492 | eor.l %d1, %d0 | else compare signs | ||
493 | bge .L322 | if same, add delta to weight | ||
494 | sub.l %a3, %d4 | else subtract delta from weight | ||
495 | cmp.l %d7, %d4 | check for negative clip limit | ||
496 | bge .L318 | ||
497 | move.l %d7, %d4 | ||
498 | bra .L318 | ||
499 | |||
500 | .L322: add.l %a3, %d4 | add delta to weight | ||
501 | cmp.l %d6, %d4 | check for positive clip limit | ||
502 | ble .L318 | ||
503 | move.l %d6, %d4 | ||
504 | |||
505 | .L318: move.l %acc0, %d2 | d2 = rounded product | ||
506 | add.l %d1, %d2 | add applied weight to bptr [1], store | ||
507 | move.l %d2, (%a1)+ | ||
508 | |||
509 | .L331: cmp.l %a1, %d5 | bptr, eptr | ||
510 | jbhi term_minus_3 | ||
511 | move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] | ||
512 | move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2] | ||
513 | jbra finish_up | ||
514 | |||
515 | .L301: addq.l #4, %a1 | ||
516 | bra .L330 | ||
517 | |||
518 | .L302: addq.l #4, %a1 | ||
519 | bra .L331 | ||
520 | |||
521 | | finish and return | ||
522 | |||
523 | finish_up: | ||
524 | moveq.l #17, %d0 | ||
525 | asr.l %d0, %d3 | ||
526 | asr.l %d0, %d4 | ||
527 | move.w %d3, 4(%a2) | weight_A, dpp->weight_A | ||
528 | move.w %d4, 6(%a2) | weight_B, dpp->weight_B | ||
529 | |||
530 | clr.l %d0 | clear up EMAC | ||
531 | move.l %d0, %acc0 | ||
532 | move.l %d0, %acc1 | ||
533 | |||
534 | return_only: | ||
535 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
536 | lea (44,%sp), %sp | ||
537 | rts | ||