summaryrefslogtreecommitdiff
path: root/lib/rbcodec/dsp/dsp_cf.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/dsp/dsp_cf.S')
-rw-r--r--lib/rbcodec/dsp/dsp_cf.S611
1 files changed, 611 insertions, 0 deletions
diff --git a/lib/rbcodec/dsp/dsp_cf.S b/lib/rbcodec/dsp/dsp_cf.S
new file mode 100644
index 0000000000..cda811a7d5
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_cf.S
@@ -0,0 +1,611 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Thom Johansen
11 * Portions Copyright (C) 2007 Michael Sevakis
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ****************************************************************************/
22
23/****************************************************************************
24 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
25 */
26 .section .text
27 .align 2
28 .global dsp_apply_gain
29dsp_apply_gain:
30 lea.l -20(%sp), %sp | save registers
31 movem.l %d2-%d4/%a2-%a3, (%sp) |
32 movem.l 28(%sp), %a0-%a1 | %a0 = data,
33 | %a1 = buf
34 move.l 4(%a0), %d1 | %d1 = data->num_channels
35 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
3610: | channel loop |
37 move.l 24(%sp), %d0 | %d0 = count
38 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
39 move.l %a2, %a3 | %a3 = d = s
40 move.l (%a2)+, %d2 | %d2 = *s++,
41 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
42 subq.l #1, %d0 | --count > 0 ? : effectively n++
43 ble.b 30f | loop done | no? finish up
4420: | loop |
45 move.l %accext01, %d4 | fetch S(n-1)[7:0]
46 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
47 asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
48 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
49 move.b %d4, %d3 |
50 move.l %d3, (%a3)+ |
51 subq.l #1, %d0 | --count > 0 ? : effectively n++
52 bgt.b 20b | loop | yes? do more samples
5330: | loop done |
54 move.l %accext01, %d4 | fetch S(n-1)[7:0]
55 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
56 asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
57 move.b %d4, %d3 |
58 move.l %d3, (%a3) |
59 subq.l #1, %d1 | next channel
60 bgt.b 10b | channel loop |
61 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
62 lea.l 20(%sp), %sp | cleanup stack
63 rts |
64 .size dsp_apply_gain,.-dsp_apply_gain
65
66/****************************************************************************
67 * void apply_crossfeed(int count, int32_t *buf[])
68 */
69 .section .text
70 .align 2
71 .global apply_crossfeed
72apply_crossfeed:
73 lea.l -44(%sp), %sp |
74 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
75 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
76 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
77 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
78 move.l (%a1)+, %d6 | %d6 = direct gain
79 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
80 move.l 132(%a1), %a0 | fetch delay line address
81 movem.l (%a1), %a1-%a3 | load filter coefs
82 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
83 bra.b 20f | loop start | go to loop start point
84 /* Register usage in loop:
85 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
86 * %a4 = buf[0], %a5 = buf[1],
87 * %a6 = delay line pointer wrap limit,
88 * %d0..%d3 = history
89 * %d4..%d5 = temp.
90 * %d6 = direct gain,
91 * %d7 = count
92 */
9310: | loop |
94 movclr.l %acc0, %d4 | write outputs
95 move.l %d4, (%a4)+ | .
96 movclr.l %acc1, %d5 | .
97 move.l %d5, (%a5)+ | .
9820: | loop start |
99 mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
100 mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
101 mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
102 mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
103 mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
104 mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
105 movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
106 move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
107 move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
108 mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
109 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
110 cmp.l %a6, %a0 | wrap %a0 if passed end
111 bhs.b 30f | wrap buffer |
112 .word 0x51fb | tpf.l | trap the buffer wrap
11330: | wrap buffer | ...fwd taken branches more costly
114 lea.l -104(%a0), %a0 | wrap it up
115 subq.l #1, %d7 | --count > 0 ?
116 bgt.b 10b | loop | yes? do more
117 movclr.l %acc0, %d4 | write last outputs
118 move.l %d4, (%a4) | .
119 movclr.l %acc1, %d5 | .
120 move.l %d5, (%a5) | .
121 lea.l crossfeed_data+16, %a1 | save data back to struct
122 movem.l %d0-%d3, (%a1) | ...history
123 move.l %a0, 120(%a1) | ...delay_p
124 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
125 lea.l 44(%sp), %sp |
126 rts |
127 .size apply_crossfeed,.-apply_crossfeed
128
129/****************************************************************************
130 * int dsp_downsample(int count, struct dsp_data *data,
131 * in32_t *src[], int32_t *dst[])
132 */
133 .section .text
134 .align 2
135 .global dsp_downsample
136dsp_downsample:
137 lea.l -40(%sp), %sp | save non-clobberables
138 movem.l %d2-%d7/%a2-%a5, (%sp) |
139 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
140 | %a0 = data
141 | %a1 = src
142 | %a2 = dst
143 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
144 | %d4 = delta = data->resample_data.delta
145 moveq.l #16, %d7 | %d7 = shift
14610: | channel loop |
147 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
148 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
149 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
150 lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1]
151 move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
152 move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
153 move.l %d5, %d6 | %d6 = pos = phase >> 16
154 lsr.l %d7, %d6 |
155 cmp.l %d2, %d6 | past end of samples?
156 bge.b 40f | skip resample loop| yes? skip loop
157 tst.l %d6 | need last sample of prev. frame?
158 bne.b 20f | resample loop | no? start main loop
159 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
160 bra.b 30f | resample start last | start with last (last in %d0)
16120: | resample loop |
162 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
163 movem.l (%a5), %d0-%d1 |
16430: | resample start last |
165 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
166 move.l %d0, %acc0 | %acc0 = previous sample
167 move.l %d5, %d0 | frac = (phase << 16) >> 1
168 lsl.l %d7, %d0 |
169 lsr.l #1, %d0 |
170 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
171 add.l %d4, %d5 | phase += delta
172 move.l %d5, %d6 | pos = phase >> 16
173 lsr.l %d7, %d6 |
174 movclr.l %acc0, %d0 |
175 move.l %d0, (%a4)+ | *d++ = %d0
176 cmp.l %d2, %d6 | pos < count?
177 blt.b 20b | resample loop | yes? continue resampling
17840: | skip resample loop |
179 subq.l #1, %d3 | ch > 0?
180 bgt.b 10b | channel loop | yes? process next channel
181 lsl.l %d7, %d2 | wrap phase to start of next frame
182 sub.l %d2, %d5 | data->resample_data.phase =
183 move.l %d5, 12(%a0) | ... phase - (count << 16)
184 move.l %a4, %d0 | return d - d[0]
185 sub.l (%a2), %d0 |
186 asr.l #2, %d0 | convert bytes->samples
187 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
188 lea.l 40(%sp), %sp | cleanup stack
189 rts | buh-bye
190 .size dsp_downsample,.-dsp_downsample
191
192/****************************************************************************
193 * int dsp_upsample(int count, struct dsp_data *dsp,
194 * const int32_t *src[], int32_t *dst[])
195 */
196 .section .text
197 .align 2
198 .global dsp_upsample
199dsp_upsample:
200 lea.l -40(%sp), %sp | save non-clobberables
201 movem.l %d2-%d7/%a2-%a5, (%sp) |
202 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
203 | %a0 = data
204 | %a1 = src
205 | %a2 = dst
206 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
207 | %d4 = delta = data->resample_data.delta
208 swap %d4 | swap delta to high word to use...
209 | ...carries to increment position
21010: | channel loop |
211 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
212 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
213 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
214 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
215 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
216 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
217 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
218 move.l (%a3)+, %d1 | fetch first sample - might throw this...
219 | ...away later but we'll be preincremented
220 move.l %d1, %d6 | save sample value
221 sub.l %d0, %d1 | %d1 = diff = s[0] - last
222 swap %d5 | swap phase to high word to use
223 | carries to increment position
224 move.l %d5, %d7 | %d7 = pos = phase >> 16
225 clr.w %d5 |
226 eor.l %d5, %d7 | pos == 0?
227 beq.b 40f | loop start | yes? start loop
228 cmp.l %d2, %d7 | past end of samples?
229 bge.b 50f | skip resample loop| yes? go to next channel and collect info
230 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
231 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
232 move.l %d1, %d6 | save sample value
233 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
234 bra.b 40f | loop start |
23520: | next sample loop |
236 move.l %d6, %d0 | move previous sample to %d0
237 move.l (%a3)+, %d1 | fetch next sample
238 move.l %d1, %d6 | save sample value
239 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
24030: | same sample loop |
241 movclr.l %acc0, %d7 | %d7 = result
242 move.l %d7, (%a4)+ | *d++ = %d7
24340: | loop start |
244 lsr.l #1, %d5 | make phase into frac
245 move.l %d0, %acc0 | %acc0 = s[pos-1]
246 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
247 lsl.l #1, %d5 | restore frac to phase
248 add.l %d4, %d5 | phase += delta
249 bcc.b 30b | same sample loop | load next values?
250 cmp.l %a5, %a3 | src <= src_end?
251 bls.b 20b | next sample loop | yes? continue resampling
252 movclr.l %acc0, %d7 | %d7 = result
253 move.l %d7, (%a4)+ | *d++ = %d7
25450: | skip resample loop |
255 subq.l #1, %d3 | ch > 0?
256 bgt.b 10b | channel loop | yes? process next channel
257 swap %d5 | wrap phase to start of next frame
258 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
259 move.l %a4, %d0 | return d - d[0]
260 sub.l (%a2), %d0 |
261 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
262 asr.l #2, %d0 | convert bytes->samples
263 lea.l 40(%sp), %sp | cleanup stack
264 rts | buh-bye
265 .size dsp_upsample,.-dsp_upsample
266
267/****************************************************************************
268 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
269 *
270 * Mix left and right channels 50/50 into a center channel.
271 */
272 .section .text
273 .align 2
274 .global channels_process_sound_chan_mono
275channels_process_sound_chan_mono:
276 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
277 lea.l -20(%sp), %sp | save registers
278 movem.l %d2-%d4/%a2-%a3, (%sp) |
279 movem.l (%a0), %a0-%a1 | get channel pointers
280 move.l %a0, %a2 | use separate dst pointers since read
281 move.l %a1, %a3 | pointers run one ahead of write
282 move.l #0x40000000, %d3 | %d3 = 0.5
283 move.l (%a0)+, %d1 | prime the input registers
284 move.l (%a1)+, %d2 |
285 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
286 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
287 subq.l #1, %d0 |
288 ble.s 20f | loop done |
28910: | loop |
290 movclr.l %acc0, %d4 | L = R = l/2 + r/2
291 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
292 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
293 move.l %d4, (%a2)+ | output to original buffer
294 move.l %d4, (%a3)+ |
295 subq.l #1, %d0 |
296 bgt.s 10b | loop |
29720: | loop done |
298 movclr.l %acc0, %d4 | output last sample
299 move.l %d4, (%a2) |
300 move.l %d4, (%a3) |
301 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
302 lea.l 20(%sp), %sp | cleanup
303 rts |
304 .size channels_process_sound_chan_mono, \
305 .-channels_process_sound_chan_mono
306
307/****************************************************************************
308 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
309 *
310 * Apply stereo width (narrowing/expanding) effect.
311 */
312 .section .text
313 .align 2
314 .global channels_process_sound_chan_custom
315channels_process_sound_chan_custom:
316 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
317 lea.l -28(%sp), %sp | save registers
318 movem.l %d2-%d6/%a2-%a3, (%sp) |
319 movem.l (%a0), %a0-%a1 | get channel pointers
320 move.l %a0, %a2 | use separate dst pointers since read
321 move.l %a1, %a3 | pointers run one ahead of write
322 move.l dsp_sw_gain, %d3 | load straight (mid) gain
323 move.l dsp_sw_cross, %d4 | load cross (side) gain
324 move.l (%a0)+, %d1 | prime the input registers
325 move.l (%a1)+, %d2 |
326 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
327 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
328 mac.l %d2, %d4 , %acc0 |
329 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
330 subq.l #1, %d0 |
331 ble.b 20f | loop done |
33210: | loop |
333 movclr.l %acc0, %d5 |
334 movclr.l %acc1, %d6 |
335 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
336 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
337 mac.l %d2, %d4 , %acc0 |
338 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
339 move.l %d5, (%a2)+ |
340 move.l %d6, (%a3)+ |
341 subq.l #1, %d0 |
342 bgt.s 10b | loop |
34320: | loop done |
344 movclr.l %acc0, %d5 | output last sample
345 movclr.l %acc1, %d6 |
346 move.l %d5, (%a2) |
347 move.l %d6, (%a3) |
348 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
349 lea.l 28(%sp), %sp | cleanup
350 rts |
351 .size channels_process_sound_chan_custom, \
352 .-channels_process_sound_chan_custom
353
354/****************************************************************************
355 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
356 *
357 * Separate channels into side channels.
358 */
359 .section .text
360 .align 2
361 .global channels_process_sound_chan_karaoke
362channels_process_sound_chan_karaoke:
363 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
364 lea.l -20(%sp), %sp | save registers
365 movem.l %d2-%d4/%a2-%a3, (%sp) |
366 movem.l (%a0), %a0-%a1 | get channel src pointers
367 move.l %a0, %a2 | use separate dst pointers since read
368 move.l %a1, %a3 | pointers run one ahead of write
369 move.l #0x40000000, %d3 | %d3 = 0.5
370 move.l (%a0)+, %d1 | prime the input registers
371 move.l (%a1)+, %d2 |
372 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
373 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
374 subq.l #1, %d0 |
375 ble.b 20f | loop done |
37610: | loop |
377 movclr.l %acc0, %d4 |
378 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
379 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
380 move.l %d4, (%a2)+ |
381 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
382 move.l %d4, (%a3)+ |
383 subq.l #1, %d0 |
384 bgt.s 10b | loop |
38520: | loop done |
386 movclr.l %acc0, %d4 | output last sample
387 move.l %d4, (%a2) |
388 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
389 move.l %d4, (%a3) |
390 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
391 lea.l 20(%sp), %sp | cleanup
392 rts |
393 .size channels_process_sound_chan_karaoke, \
394 .-channels_process_sound_chan_karaoke
395
396/****************************************************************************
397 * void sample_output_stereo(int count, struct dsp_data *data,
398 * const int32_t *src[], int16_t *dst)
399 *
400 * Framework based on the ubiquitous Rockbox line transfer logic for
401 * Coldfire CPUs.
402 *
403 * Does emac clamping and scaling (which proved faster than the usual
404 * checks and branches - even single test clamping) and writes using
405 * line burst transfers. Also better than writing a single L-R pair per
406 * loop but a good deal more code.
407 *
408 * Attemping bursting during reads is rather futile since the source and
409 * destination alignments rarely agree and too much complication will
410 * slow us up. The parallel loads seem to do a bit better at least until
411 * a pcm buffer can always give line aligned chunk and then aligning the
412 * dest can then imply the source is aligned if the source buffers are.
413 * For now longword alignment is assumed of both the source and dest.
414 *
415 */
416 .section .text
417 .align 2
418 .global sample_output_stereo
419sample_output_stereo:
420 lea.l -48(%sp), %sp | save registers
421 move.l %macsr, %d1 | do it now as at many lines will
422 movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition
423 move.l #0x80, %macsr | put emac unit in signed int mode
424 movem.l 52(%sp), %a0-%a2/%a4 |
425 lea.l (%a4, %a0.l*4), %a0 | %a0 = end address
426 move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale))
427 sub.l #16, %d1 |
428 neg.l %d1 |
429 moveq.l #1, %d0 |
430 asl.l %d1, %d0 |
431 move.l %d0, %a1 |
432 move.l #0x8000, %a6 | %a6 = rounding term
433 movem.l (%a2), %a2-%a3 | get L/R channel pointers
434 moveq.l #28, %d0 | %d0 = second line bound
435 add.l %a4, %d0 |
436 and.l #0xfffffff0, %d0 |
437 cmp.l %a0, %d0 | at least a full line?
438 bhi.w 40f | long loop 1 start | no? do as trailing longwords
439 sub.l #16, %d0 | %d1 = first line bound
440 cmp.l %a4, %d0 | any leading longwords?
441 bls.b 20f | line loop start | no? start line loop
44210: | long loop 0 |
443 move.l (%a2)+, %d1 | read longword from L and R
444 move.l %a6, %acc0 |
445 move.l %acc0, %acc1 |
446 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
447 mac.l %d2, %a1, %acc1 | shift R to high word
448 movclr.l %acc0, %d1 | get possibly saturated results
449 movclr.l %acc1, %d2 |
450 swap %d2 | move R to low word
451 move.w %d2, %d1 | interleave MS 16 bits of each
452 move.l %d1, (%a4)+ | ...and write both
453 cmp.l %a4, %d0 |
454 bhi.b 10b | long loop 0 |
45520: | line loop start |
456 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
45730: | line loop |
458 move.l (%a3)+, %d4 | get next 4 R samples and scale
459 move.l %a6, %acc0 |
460 move.l %acc0, %acc1 |
461 move.l %acc1, %acc2 |
462 move.l %acc2, %acc3 |
463 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
464 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
465 mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
466 mac.l %d7, %a1, (%a2)+, %d0, %acc3 |
467 lea.l 16(%a4), %a4 | increment dest here, mitigate stalls
468 movclr.l %acc0, %d4 | obtain R results
469 movclr.l %acc1, %d5 |
470 movclr.l %acc2, %d6 |
471 movclr.l %acc3, %d7 |
472 move.l %a6, %acc0 |
473 move.l %acc0, %acc1 |
474 move.l %acc1, %acc2 |
475 move.l %acc2, %acc3 |
476 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale
477 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
478 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
479 mac.l %d3, %a1 , %acc3 |
480 swap %d4 | a) interleave most significant...
481 swap %d5 |
482 swap %d6 |
483 swap %d7 |
484 movclr.l %acc0, %d0 | obtain L results
485 movclr.l %acc1, %d1 |
486 movclr.l %acc2, %d2 |
487 movclr.l %acc3, %d3 |
488 move.w %d4, %d0 | a) ... 16 bits of L and R
489 move.w %d5, %d1 |
490 move.w %d6, %d2 |
491 move.w %d7, %d3 |
492 movem.l %d0-%d3, -16(%a4) | write four stereo samples
493 cmp.l %a4, %a5 |
494 bhi.b 30b | line loop |
49540: | long loop 1 start |
496 cmp.l %a4, %a0 | any longwords left?
497 bls.b 60f | output end | no? stop
49850: | long loop 1 |
499 move.l (%a2)+, %d1 | handle trailing longwords
500 move.l %a6, %acc0 |
501 move.l %acc0, %acc1 |
502 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
503 mac.l %d2, %a1, %acc1 |
504 movclr.l %acc0, %d1 |
505 movclr.l %acc1, %d2 |
506 swap %d2 |
507 move.w %d2, %d1 |
508 move.l %d1, (%a4)+ |
509 cmp.l %a4, %a0 |
510 bhi.b 50b | long loop 1
51160: | output end |
512 movem.l (%sp), %d1-%d7/%a2-%a6 | restore registers
513 move.l %d1, %macsr |
514 lea.l 48(%sp), %sp | cleanup
515 rts |
516 .size sample_output_stereo, .-sample_output_stereo
517
518/****************************************************************************
519 * void sample_output_mono(int count, struct dsp_data *data,
520 * const int32_t *src[], int16_t *dst)
521 *
522 * Same treatment as sample_output_stereo but for one channel.
523 */
524 .section .text
525 .align 2
526 .global sample_output_mono
527sample_output_mono:
528 lea.l -32(%sp), %sp | save registers
529 move.l %macsr, %d1 | do it now as at many lines will
530 movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition
531 move.l #0x80, %macsr | put emac unit in signed int mode
532 movem.l 36(%sp), %a0-%a3 |
533 lea.l (%a3, %a0.l*4), %a0 | %a0 = end address
534 move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale))
535 sub.l #16, %d1 |
536 neg.l %d1 |
537 moveq.l #1, %d5 |
538 asl.l %d1, %d5 |
539 move.l #0x8000, %a4 | %a4 = rounding term
540 movem.l (%a2), %a2 | get source channel pointer
541 moveq.l #28, %d0 | %d0 = second line bound
542 add.l %a3, %d0 |
543 and.l #0xfffffff0, %d0 |
544 cmp.l %a0, %d0 | at least a full line?
545 bhi.w 40f | long loop 1 start | no? do as trailing longwords
546 sub.l #16, %d0 | %d1 = first line bound
547 cmp.l %a3, %d0 | any leading longwords?
548 bls.b 20f | line loop start | no? start line loop
54910: | long loop 0 |
550 move.l (%a2)+, %d1 | read longword from L and R
551 move.l %a4, %acc0 |
552 mac.l %d1, %d5, %acc0 | shift L to high word
553 movclr.l %acc0, %d1 | get possibly saturated results
554 move.l %d1, %d2 |
555 swap %d2 | move R to low word
556 move.w %d2, %d1 | duplicate single channel into
557 move.l %d1, (%a3)+ | L and R
558 cmp.l %a3, %d0 |
559 bhi.b 10b | long loop 0 |
56020: | line loop start |
561 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
56230: | line loop |
563 move.l (%a2)+, %d0 | get next 4 L samples and scale
564 move.l %a4, %acc0 |
565 move.l %acc0, %acc1 |
566 move.l %acc1, %acc2 |
567 move.l %acc2, %acc3 |
568 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
569 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
570 mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
571 mac.l %d3, %d5 , %acc3 |
572 lea.l 16(%a3), %a3 | increment dest here, mitigate stalls
573 movclr.l %acc0, %d0 | obtain results
574 movclr.l %acc1, %d1 |
575 movclr.l %acc2, %d2 |
576 movclr.l %acc3, %d3 |
577 move.l %d0, %d4 | duplicate single channel
578 swap %d4 | into L and R
579 move.w %d4, %d0 |
580 move.l %d1, %d4 |
581 swap %d4 |
582 move.w %d4, %d1 |
583 move.l %d2, %d4 |
584 swap %d4 |
585 move.w %d4, %d2 |
586 move.l %d3, %d4 |
587 swap %d4 |
588 move.w %d4, %d3 |
589 movem.l %d0-%d3, -16(%a3) | write four stereo samples
590 cmp.l %a3, %a1 |
591 bhi.b 30b | line loop |
59240: | long loop 1 start |
593 cmp.l %a3, %a0 | any longwords left?
594 bls.b 60f | output end | no? stop
59550: | loop loop 1 |
596 move.l (%a2)+, %d1 | handle trailing longwords
597 move.l %a4, %acc0 |
598 mac.l %d1, %d5, %acc0 | the same way as leading ones
599 movclr.l %acc0, %d1 |
600 move.l %d1, %d2 |
601 swap %d2 |
602 move.w %d2, %d1 |
603 move.l %d1, (%a3)+ |
604 cmp.l %a3, %a0 |
605 bhi.b 50b | long loop 1 |
60660: | output end |
607 movem.l (%sp), %d1-%d5/%a2-%a4 | restore registers
608 move.l %d1, %macsr |
609 lea.l 32(%sp), %sp | cleanup
610 rts |
611 .size sample_output_mono, .-sample_output_mono