summaryrefslogtreecommitdiff
path: root/apps/dsp_cf.S
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-02-24 17:06:36 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-02-24 17:06:36 +0000
commitd4e904bf3557c63fb358d2d8e91bb103ca369e1a (patch)
tree2405fea04069c5d13286438d38ef7c246bb75075 /apps/dsp_cf.S
parentdbf772bae969703972a672a866f07edc9a9031a5 (diff)
downloadrockbox-d4e904bf3557c63fb358d2d8e91bb103ca369e1a.tar.gz
rockbox-d4e904bf3557c63fb358d2d8e91bb103ca369e1a.zip
SWCODEC: Dsp speed optimizations. Changes for more modularity. Removal of some usless stuff. Some assembly routines for Coldfire with speed in mind over size for the outputs but the channel modes remain compact. Miscellaneous coldfire asm updates to accomodate the changes. Codec API structure version has to increase so do a full update.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12472 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/dsp_cf.S')
-rw-r--r--apps/dsp_cf.S380
1 files changed, 341 insertions, 39 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index 295ef05fe0..1f8dd48cee 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -18,7 +18,7 @@
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/**************************************************************************** 20/****************************************************************************
21 * apply_crossfeed(int32_t* src[], int count) 21 * void apply_crossfeed(int32_t *src[], int count)
22 */ 22 */
23 .section .text 23 .section .text
24 .global apply_crossfeed 24 .global apply_crossfeed
@@ -88,32 +88,31 @@ apply_crossfeed:
88 .size apply_crossfeed,.cfend-apply_crossfeed 88 .size apply_crossfeed,.cfend-apply_crossfeed
89 89
90/**************************************************************************** 90/****************************************************************************
91 * dsp_downsample(int channels, int count, struct resample_data *r, 91 * int dsp_downsample(int count, struct dsp_data *data,
92 * in32_t **src, int32_t **dst) 92 * in32_t *src[], int32_t *dst[])
93 */ 93 */
94 .section .text 94 .section .text
95 .global dsp_downsample 95 .global dsp_downsample
96dsp_downsample: 96dsp_downsample:
97 lea.l -40(%sp), %sp | save non-clobberables 97 lea.l -40(%sp), %sp | save non-clobberables
98 movem.l %d2-%d7/%a2-%a5, (%sp) | 98 movem.l %d2-%d7/%a2-%a5, (%sp) |
99 movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels 99 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
100 | %d3 = count 100 | %a0 = data
101 | %a0 = r
102 | %a1 = src 101 | %a1 = src
103 | %a2 = dst 102 | %a2 = dst
104 move.l 4(%a0), %d4 | %d4 = delta = r->delta 103 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
105 move.l #16, %d7 | %d7 = shift 104 | %d4 = delta = data->resample_data.delta
105 moveq.l #16, %d7 | %d7 = shift
106.dschannel_loop: 106.dschannel_loop:
107 move.l (%a0), %d5 | %d5 = phase = r->phase 107 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
108 move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] 108 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
109 move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] 109 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
110 lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] 110 lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1]
111 move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] 111 move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
112 move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] 112 move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
113 move.l %d1, (%a5) |
114 move.l %d5, %d6 | %d6 = pos = phase >> 16 113 move.l %d5, %d6 | %d6 = pos = phase >> 16
115 lsr.l %d7, %d6 | 114 lsr.l %d7, %d6 |
116 cmp.l %d3, %d6 | past end of samples? 115 cmp.l %d2, %d6 | past end of samples?
117 bge.b .dsloop_skip | yes? skip loop 116 bge.b .dsloop_skip | yes? skip loop
118 tst.l %d6 | need last sample of prev. frame? 117 tst.l %d6 | need last sample of prev. frame?
119 bne.b .dsloop | no? start main loop 118 bne.b .dsloop | no? start main loop
@@ -134,14 +133,14 @@ dsp_downsample:
134 move.l %d5, %d6 | pos = phase >> 16 133 move.l %d5, %d6 | pos = phase >> 16
135 lsr.l %d7, %d6 | 134 lsr.l %d7, %d6 |
136 move.l %d0, (%a4)+ | *d++ = %d0 135 move.l %d0, (%a4)+ | *d++ = %d0
137 cmp.l %d3, %d6 | pos < count? 136 cmp.l %d2, %d6 | pos < count?
138 blt.b .dsloop | yes? continue resampling 137 blt.b .dsloop | yes? continue resampling
139.dsloop_skip: 138.dsloop_skip:
140 subq.l #1, %d2 | ch > 0? 139 subq.l #1, %d3 | ch > 0?
141 bgt.b .dschannel_loop | yes? process next channel 140 bgt.b .dschannel_loop | yes? process next channel
142 asl.l %d7, %d3 | wrap phase to start of next frame 141 asl.l %d7, %d2 | wrap phase to start of next frame
143 sub.l %d3, %d5 | r->phase = phase - (count << 16) 142 sub.l %d2, %d5 | data->resample_data.phase =
144 move.l %d5, (%a0) | 143 move.l %d5, 12(%a0) | ... phase - (count << 16)
145 move.l %a4, %d0 | return d - d[0] 144 move.l %a4, %d0 | return d - d[0]
146 sub.l (%a2), %d0 | 145 sub.l (%a2), %d0 |
147 asr.l #2, %d0 | convert bytes->samples 146 asr.l #2, %d0 | convert bytes->samples
@@ -153,31 +152,30 @@ dsp_downsample:
153 .size dsp_downsample,.dsend-dsp_downsample 152 .size dsp_downsample,.dsend-dsp_downsample
154 153
155/**************************************************************************** 154/****************************************************************************
156 * dsp_upsample(int channels, int count, struct resample_data *r, 155 * int dsp_upsample(int count, struct dsp_data *dsp,
157 * in32_t **src, int32_t **dst) 156 * in32_t *src[], int32_t *dst[])
158 */ 157 */
159 .section .text 158 .section .text
160 .global dsp_upsample 159 .global dsp_upsample
161dsp_upsample: 160dsp_upsample:
162 lea.l -40(%sp), %sp | save non-clobberables 161 lea.l -40(%sp), %sp | save non-clobberables
163 movem.l %d2-%d7/%a2-%a5, (%sp) | 162 movem.l %d2-%d7/%a2-%a5, (%sp) |
164 movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels 163 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
165 | %d3 = count 164 | %a0 = data
166 | %a0 = r
167 | %a1 = src 165 | %a1 = src
168 | %a2 = dst 166 | %a2 = dst
169 move.l 4(%a0), %d4 | %d4 = delta = r->delta 167 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
168 | %d4 = delta = data->resample_data.delta
170 swap %d4 | swap delta to high word to use 169 swap %d4 | swap delta to high word to use
171 | carries to increment position 170 | carries to increment position
172.uschannel_loop: 171.uschannel_loop:
173 move.l (%a0), %d5 | %d5 = phase = r->phase 172 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
174 move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] 173 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
175 lea.l 4(%a0, %d2.l*4), %a4 | %a4 = &r->last_sample[ch-1] 174 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
176 lea.l (%a3, %d3.l*4), %a5 | %a5 = src_end = &src[count] 175 lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count]
177 move.l (%a4), %d0 | %d0 = last = r->last_sample[ch-1] 176 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
178 move.l -4(%a5), %d1 | r->last_sample[ch-1] = s[count-1] 177 move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
179 move.l %d1, (%a4) | 178 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
180 move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1]
181 swap %d5 | swap phase to high word to use 179 swap %d5 | swap phase to high word to use
182 | carries to increment position 180 | carries to increment position
183 move.l %d5, %d6 | %d6 = pos = phase >> 16 181 move.l %d5, %d6 | %d6 = pos = phase >> 16
@@ -204,13 +202,13 @@ dsp_upsample:
204 move.l %d7, (%a4)+ | *d++ = %d7 202 move.l %d7, (%a4)+ | *d++ = %d7
205 add.l %d4, %d5 | phase += delta 203 add.l %d4, %d5 | phase += delta
206 bcc.b .usloop_0 | load next values? 204 bcc.b .usloop_0 | load next values?
207 cmp.l %a5, %a3 | src < src_end? 205 cmp.l %a5, %a3 | src <= src_end?
208 blt.b .usloop_1 | yes? continue resampling 206 ble.b .usloop_1 | yes? continue resampling
209.usloop_skip: 207.usloop_skip:
210 subq.l #1, %d2 | ch > 0? 208 subq.l #1, %d3 | ch > 0?
211 bgt.b .uschannel_loop | yes? process next channel 209 bgt.b .uschannel_loop | yes? process next channel
212 swap %d5 | wrap phase to start of next frame 210 swap %d5 | wrap phase to start of next frame
213 move.l %d5, (%a0) | ...and save in r->phase 211 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
214 move.l %a4, %d0 | return d - d[0] 212 move.l %a4, %d0 | return d - d[0]
215 sub.l (%a2), %d0 | 213 sub.l (%a2), %d0 |
216 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 214 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
@@ -219,3 +217,307 @@ dsp_upsample:
219 rts | buh-bye 217 rts | buh-bye
220.usend: 218.usend:
221 .size dsp_upsample,.usend-dsp_upsample 219 .size dsp_upsample,.usend-dsp_upsample
220
221/* These routines might benefit from burst transfers but we'll keep them
222 * small for now since they're rather light weight
223 */
224
225/****************************************************************************
226 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
227 *
228 * Mix left and right channels 50/50 into a center channel.
229 */
230 .section .text
231 .global channels_process_sound_chan_mono
232channels_process_sound_chan_mono:
233 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
234 lea.l -12(%sp), %sp | save registers
235 move.l %macsr, %d1 |
236 movem.l %d1-%d3, (%sp) |
237 move.l #0xb0, %macsr | put emac in rounding fractional mode
238 movem.l (%a0), %a0-%a1 | get channel pointers
239 move.l #0x40000000, %d3 | %d3 = 0.5
2401:
241 move.l (%a0), %d1 | L = R = l/2 + r/2
242 mac.l %d1, %d3, (%a1), %d2, %acc0 |
243 mac.l %d2, %d3, %acc0 |
244 movclr.l %acc0, %d1 |
245 move.l %d1, (%a0)+ | output to original buffer
246 move.l %d1, (%a1)+ |
247 subq.l #1, %d0 |
248 bgt.s 1b |
249 movem.l (%sp), %d1-%d3 | restore registers
250 move.l %d1, %macsr |
251 lea.l 12(%sp), %sp | cleanup
252 rts
253.cpmono_end:
254 .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono
255
256
257/****************************************************************************
258 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
259 *
260 * Apply stereo width (narrowing/expanding) effect.
261 */
262 .section .text
263 .global channels_process_sound_chan_custom
264channels_process_sound_chan_custom:
265 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
266 lea.l -16(%sp), %sp | save registers
267 move.l %macsr, %d1 |
268 movem.l %d1-%d4, (%sp) |
269 move.l #0xb0, %macsr | put emac in rounding fractional mode
270 movem.l (%a0), %a0-%a1 | get channel pointers
271 move.l dsp_sw_gain, %d3 | load straight (mid) gain
272 move.l dsp_sw_cross, %d4 | load cross (side) gain
2731:
274 move.l (%a0), %d1 |
275 mac.l %d1, %d3 , (%a1), %d2, %acc0 | L = l*gain + r*cross
276 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross
277 mac.l %d2, %d4 , %acc0 |
278 mac.l %d2, %d3 , %acc1 |
279 movclr.l %acc0, %d1 |
280 movclr.l %acc1, %d2 |
281 move.l %d1, (%a0)+ |
282 move.l %d2, (%a1)+ |
283 subq.l #1, %d0 |
284 bgt.s 1b |
285 movem.l (%sp), %d1-%d4 | restore registers
286 move.l %d1, %macsr |
287 lea.l 16(%sp), %sp | cleanup
288 rts
289.cpcustom_end:
290 .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom
291
292/****************************************************************************
293 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
294 *
295 * Separate channels into side channels.
296 */
297 .section .text
298 .global channels_process_sound_chan_karaoke
299channels_process_sound_chan_karaoke:
300 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
301 lea.l -16(%sp), %sp | save registers
302 move.l %macsr, %d1 |
303 movem.l %d1-%d4, (%sp) |
304 move.l #0xb0, %macsr | put emac in rounding fractional mode
305 movem.l (%a0), %a0-%a1 | get channel pointers
306 move.l #0x40000000, %d4 | %d3 = 0.5
3071:
308 move.l (%a0), %d1 |
309 mac.l %d1, %d4, (%a1), %d2, %acc0 | L = l/2 - r/2
310 mac.l %d2, %d4, %acc1 | R = r/2 - l/2
311 movclr.l %acc0, %d1 |
312 movclr.l %acc1, %d2 |
313 move.l %d1, %d3 |
314 sub.l %d2, %d1 |
315 sub.l %d3, %d2 |
316 move.l %d1, (%a0)+ |
317 move.l %d2, (%a1)+ |
318 subq.l #1, %d0 |
319 bgt.s 1b |
320 movem.l (%sp), %d1-%d4 | restore registers
321 move.l %d1, %macsr |
322 lea.l 16(%sp), %sp | cleanup
323 rts
324.cpkaraoke_end:
325 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke
326
327/****************************************************************************
328 * void sample_output_stereo(int count, struct dsp_data *data,
329 * int32_t *src[], int16_t *dst)
330 *
331 * Framework based on the ubiquitous Rockbox line transfer logic for
332 * Coldfire CPUs.
333 *
334 * Does emac clamping and scaling (which proved faster than the usual
335 * checks and branches - even single test clamping) and writes using
336 * line burst transfers. Also better than writing a single L-R pair per
337 * loop but a good deal more code.
338 *
339 * Attemping bursting during reads is rather futile since the source and
340 * destination alignments rarely agree and too much complication will
341 * slow us up. The parallel loads seem to do a bit better at least until
342 * a pcm buffer can always give line aligned chunk and then aligning the
343 * dest can then imply the source is aligned if the source buffers are.
344 * For now longword alignment is assumed of both the source and dest.
345 *
346 */
347 .section .text
348 .global sample_output_stereo
349sample_output_stereo:
350 lea.l -44(%sp), %sp | save registers
351 move.l %macsr, %d1 | do it now as at many lines will
352 movem.l %d1-%d7/%a2-%a5, (%sp) | be the far more common condition
353 move.l #0x80, %macsr | put emac unit in signed int mode
354 movem.l 48(%sp), %a0-%a2/%a4 |
355 lea.l (%a4, %a0.l*4), %a0 | %a0 = end address
356 move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale))
357 sub.l #16, %d1 |
358 neg.l %d1 |
359 move.q #1, %d0 |
360 asl.l %d1, %d0 |
361 move.l %d0, %a1 |
362 movem.l (%a2), %a2-%a3 | get L/R channel pointers
363 moveq.l #28, %d0 | %d0 = second line bound
364 add.l %a4, %d0 |
365 and.l #0xfffffff0, %d0 |
366 cmp.l %a4, %d0 | at least a full line?
367 blo.w .sos_longloop_1_start | no? jump to trailing longword
368 sub.l #16, %d0 | %d1 = first line bound
369 cmp.l %a4, %d0 | any leading longwords?
370 bls.b .sos_lineloop_start | no? jump to line loop
371.sos_longloop_0:
372 move.l (%a2)+, %d1 | read longword from L and R
373 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
374 mac.l %d2, %a1, %acc1 | shift R to high word
375 movclr.l %acc0, %d1 | get possibly saturated results
376 movclr.l %acc1, %d2 |
377 swap %d2 | move R to low word
378 move.w %d2, %d1 | interleave MS 16 bits of each
379 move.l %d1, (%a4)+ | ...and write both
380 cmp.l %a4, %d0 |
381 bhi.b .sos_longloop_0 |
382.sos_lineloop_start:
383 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
384.sos_lineloop:
385 move.l (%a2)+, %d0 | get next 4 L samples and scale
386 mac.l %d0, %a1, (%a2)+, %d1, %acc0 | with saturation
387 mac.l %d1, %a1, (%a2)+, %d2, %acc1 |
388 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
389 mac.l %d3, %a1, %acc3 |
390 movclr.l %acc0, %d0 | obtain results
391 movclr.l %acc1, %d1 |
392 movclr.l %acc2, %d2 |
393 movclr.l %acc3, %d3 |
394 move.l (%a3)+, %d4 | get next 4 R samples and scale
395 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
396 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
397 mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
398 mac.l %d7, %a1, %acc3 |
399 movclr.l %acc0, %d4 | obtain results
400 movclr.l %acc1, %d5 |
401 movclr.l %acc2, %d6 |
402 movclr.l %acc3, %d7 |
403 swap %d4 | interleave most significant
404 move.w %d4, %d0 | 16 bits of L and R
405 swap %d5 |
406 move.w %d5, %d1 |
407 swap %d6 |
408 move.w %d6, %d2 |
409 swap %d7 |
410 move.w %d7, %d3 |
411 movem.l %d0-%d3, (%a4) | write four stereo samples
412 lea.l 16(%a4), %a4 |
413 cmp.l %a4, %a5 |
414 bhi.b .sos_lineloop |
415.sos_longloop_1_start:
416 cmp.l %a4, %a0 | any longwords left?
417 bls.b .sos_done | no? finished.
418.sos_longloop_1:
419 move.l (%a2)+, %d1 | handle trailing longwords
420 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
421 mac.l %d2, %a1, %acc1 |
422 movclr.l %acc0, %d1 |
423 movclr.l %acc1, %d2 |
424 swap %d2 |
425 move.w %d2, %d1 |
426 move.l %d1, (%a4)+ |
427 cmp.l %a4, %a0 |
428 bhi.b .sos_longloop_1 |
429.sos_done:
430 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
431 move.l %d1, %macsr |
432 lea.l 44(%sp), %sp | cleanup
433 rts |
434.sos_end:
435 .size sample_output_stereo, .sos_end-sample_output_stereo
436
437/****************************************************************************
438 * void sample_output_mono(int count, struct dsp_data *data,
439 * int32_t *src[], int16_t *dst)
440 *
441 * Same treatment as sample_output_stereo but for one channel.
442 */
443 .section .text
444 .global sample_output_mono
445sample_output_mono:
446 lea.l -28(%sp), %sp | save registers
447 move.l %macsr, %d1 | do it now as at many lines will
448 movem.l %d1-%d5/%a2-%a3, (%sp) | be the far more common condition
449 move.l #0x80, %macsr | put emac unit in signed int mode
450 movem.l 32(%sp), %a0-%a3 |
451 lea.l (%a3, %a0.l*4), %a0 | %a0 = end address
452 move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale))
453 sub.l #16, %d1 |
454 neg.l %d1 |
455 move.q #1, %d5 |
456 asl.l %d1, %d5 |
457 movem.l (%a2), %a2 | get source channel pointer
458 moveq.l #28, %d0 | %d0 = second line bound
459 add.l %a3, %d0 |
460 and.l #0xfffffff0, %d0 |
461 cmp.l %a3, %d0 | at least a full line?
462 blo.w .som_longloop_1_start | no? jump to trailing longword
463 sub.l #16, %d0 | %d1 = first line bound
464 cmp.l %a3, %d0 | any leading longwords?
465 bls.b .som_lineloop_start | no? jump to line loop
466.som_longloop_0:
467 move.l (%a2)+, %d1 | read longword from L and R
468 mac.l %d1, %d5, %acc0 | shift L to high word
469 movclr.l %acc0, %d1 | get possibly saturated results
470 move.l %d1, %d2 |
471 swap %d2 | move R to low word
472 move.w %d2, %d1 | duplicate single channel into
473 move.l %d1, (%a3)+ | L and R
474 cmp.l %a3, %d0 |
475 bhi.b .som_longloop_0 |
476.som_lineloop_start:
477 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
478.som_lineloop:
479 move.l (%a2)+, %d0 | get next 4 L samples and scale
480 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
481 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
482 mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
483 mac.l %d3, %d5, %acc3 |
484 movclr.l %acc0, %d0 | obtain results
485 movclr.l %acc1, %d1 |
486 movclr.l %acc2, %d2 |
487 movclr.l %acc3, %d3 |
488 move.l %d0, %d4 | duplicate single channel
489 swap %d4 | into L and R
490 move.w %d4, %d0 |
491 move.l %d1, %d4 |
492 swap %d4 |
493 move.w %d4, %d1 |
494 move.l %d2, %d4 |
495 swap %d4 |
496 move.w %d4, %d2 |
497 move.l %d3, %d4 |
498 swap %d4 |
499 move.w %d4, %d3 |
500 movem.l %d0-%d3, (%a3) | write four stereo samples
501 lea.l 16(%a3), %a3 |
502 cmp.l %a3, %a1 |
503 bhi.b .som_lineloop |
504.som_longloop_1_start:
505 cmp.l %a3, %a0 | any longwords left?
506 bls.b .som_done | no? finished.
507.som_longloop_1:
508 move.l (%a2)+, %d1 | handle trailing longwords
509 mac.l %d1, %d5, %acc0 | the same way as leading ones
510 movclr.l %acc0, %d1 |
511 move.l %d1, %d2 |
512 swap %d2 |
513 move.w %d2, %d1 |
514 move.l %d1, (%a3)+ |
515 cmp.l %a3, %a0 |
516 bhi.b .som_longloop_1 |
517.som_done:
518 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
519 move.l %d1, %macsr |
520 lea.l 28(%sp), %sp | cleanup
521 rts |
522.som_end:
523 .size sample_output_mono, .som_end-sample_output_mono