diff options
author | Michael Sevakis <jethead71@rockbox.org> | 2007-02-24 17:06:36 +0000 |
---|---|---|
committer | Michael Sevakis <jethead71@rockbox.org> | 2007-02-24 17:06:36 +0000 |
commit | d4e904bf3557c63fb358d2d8e91bb103ca369e1a (patch) | |
tree | 2405fea04069c5d13286438d38ef7c246bb75075 /apps/dsp_cf.S | |
parent | dbf772bae969703972a672a866f07edc9a9031a5 (diff) | |
download | rockbox-d4e904bf3557c63fb358d2d8e91bb103ca369e1a.tar.gz rockbox-d4e904bf3557c63fb358d2d8e91bb103ca369e1a.zip |
SWCODEC: Dsp speed optimizations. Changes for more modularity. Removal of some usless stuff. Some assembly routines for Coldfire with speed in mind over size for the outputs but the channel modes remain compact. Miscellaneous coldfire asm updates to accomodate the changes. Codec API structure version has to increase so do a full update.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12472 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/dsp_cf.S')
-rw-r--r-- | apps/dsp_cf.S | 380 |
1 files changed, 341 insertions, 39 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S index 295ef05fe0..1f8dd48cee 100644 --- a/apps/dsp_cf.S +++ b/apps/dsp_cf.S | |||
@@ -18,7 +18,7 @@ | |||
18 | ****************************************************************************/ | 18 | ****************************************************************************/ |
19 | 19 | ||
20 | /**************************************************************************** | 20 | /**************************************************************************** |
21 | * apply_crossfeed(int32_t* src[], int count) | 21 | * void apply_crossfeed(int32_t *src[], int count) |
22 | */ | 22 | */ |
23 | .section .text | 23 | .section .text |
24 | .global apply_crossfeed | 24 | .global apply_crossfeed |
@@ -88,32 +88,31 @@ apply_crossfeed: | |||
88 | .size apply_crossfeed,.cfend-apply_crossfeed | 88 | .size apply_crossfeed,.cfend-apply_crossfeed |
89 | 89 | ||
90 | /**************************************************************************** | 90 | /**************************************************************************** |
91 | * dsp_downsample(int channels, int count, struct resample_data *r, | 91 | * int dsp_downsample(int count, struct dsp_data *data, |
92 | * in32_t **src, int32_t **dst) | 92 | * in32_t *src[], int32_t *dst[]) |
93 | */ | 93 | */ |
94 | .section .text | 94 | .section .text |
95 | .global dsp_downsample | 95 | .global dsp_downsample |
96 | dsp_downsample: | 96 | dsp_downsample: |
97 | lea.l -40(%sp), %sp | save non-clobberables | 97 | lea.l -40(%sp), %sp | save non-clobberables |
98 | movem.l %d2-%d7/%a2-%a5, (%sp) | | 98 | movem.l %d2-%d7/%a2-%a5, (%sp) | |
99 | movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels | 99 | movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count |
100 | | %d3 = count | 100 | | %a0 = data |
101 | | %a0 = r | ||
102 | | %a1 = src | 101 | | %a1 = src |
103 | | %a2 = dst | 102 | | %a2 = dst |
104 | move.l 4(%a0), %d4 | %d4 = delta = r->delta | 103 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels |
105 | move.l #16, %d7 | %d7 = shift | 104 | | %d4 = delta = data->resample_data.delta |
105 | moveq.l #16, %d7 | %d7 = shift | ||
106 | .dschannel_loop: | 106 | .dschannel_loop: |
107 | move.l (%a0), %d5 | %d5 = phase = r->phase | 107 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase |
108 | move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] | 108 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] |
109 | move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] | 109 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] |
110 | lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] | 110 | lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1] |
111 | move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] | 111 | move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] |
112 | move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] | 112 | move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1] |
113 | move.l %d1, (%a5) | | ||
114 | move.l %d5, %d6 | %d6 = pos = phase >> 16 | 113 | move.l %d5, %d6 | %d6 = pos = phase >> 16 |
115 | lsr.l %d7, %d6 | | 114 | lsr.l %d7, %d6 | |
116 | cmp.l %d3, %d6 | past end of samples? | 115 | cmp.l %d2, %d6 | past end of samples? |
117 | bge.b .dsloop_skip | yes? skip loop | 116 | bge.b .dsloop_skip | yes? skip loop |
118 | tst.l %d6 | need last sample of prev. frame? | 117 | tst.l %d6 | need last sample of prev. frame? |
119 | bne.b .dsloop | no? start main loop | 118 | bne.b .dsloop | no? start main loop |
@@ -134,14 +133,14 @@ dsp_downsample: | |||
134 | move.l %d5, %d6 | pos = phase >> 16 | 133 | move.l %d5, %d6 | pos = phase >> 16 |
135 | lsr.l %d7, %d6 | | 134 | lsr.l %d7, %d6 | |
136 | move.l %d0, (%a4)+ | *d++ = %d0 | 135 | move.l %d0, (%a4)+ | *d++ = %d0 |
137 | cmp.l %d3, %d6 | pos < count? | 136 | cmp.l %d2, %d6 | pos < count? |
138 | blt.b .dsloop | yes? continue resampling | 137 | blt.b .dsloop | yes? continue resampling |
139 | .dsloop_skip: | 138 | .dsloop_skip: |
140 | subq.l #1, %d2 | ch > 0? | 139 | subq.l #1, %d3 | ch > 0? |
141 | bgt.b .dschannel_loop | yes? process next channel | 140 | bgt.b .dschannel_loop | yes? process next channel |
142 | asl.l %d7, %d3 | wrap phase to start of next frame | 141 | asl.l %d7, %d2 | wrap phase to start of next frame |
143 | sub.l %d3, %d5 | r->phase = phase - (count << 16) | 142 | sub.l %d2, %d5 | data->resample_data.phase = |
144 | move.l %d5, (%a0) | | 143 | move.l %d5, 12(%a0) | ... phase - (count << 16) |
145 | move.l %a4, %d0 | return d - d[0] | 144 | move.l %a4, %d0 | return d - d[0] |
146 | sub.l (%a2), %d0 | | 145 | sub.l (%a2), %d0 | |
147 | asr.l #2, %d0 | convert bytes->samples | 146 | asr.l #2, %d0 | convert bytes->samples |
@@ -153,31 +152,30 @@ dsp_downsample: | |||
153 | .size dsp_downsample,.dsend-dsp_downsample | 152 | .size dsp_downsample,.dsend-dsp_downsample |
154 | 153 | ||
155 | /**************************************************************************** | 154 | /**************************************************************************** |
156 | * dsp_upsample(int channels, int count, struct resample_data *r, | 155 | * int dsp_upsample(int count, struct dsp_data *dsp, |
157 | * in32_t **src, int32_t **dst) | 156 | * in32_t *src[], int32_t *dst[]) |
158 | */ | 157 | */ |
159 | .section .text | 158 | .section .text |
160 | .global dsp_upsample | 159 | .global dsp_upsample |
161 | dsp_upsample: | 160 | dsp_upsample: |
162 | lea.l -40(%sp), %sp | save non-clobberables | 161 | lea.l -40(%sp), %sp | save non-clobberables |
163 | movem.l %d2-%d7/%a2-%a5, (%sp) | | 162 | movem.l %d2-%d7/%a2-%a5, (%sp) | |
164 | movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels | 163 | movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count |
165 | | %d3 = count | 164 | | %a0 = data |
166 | | %a0 = r | ||
167 | | %a1 = src | 165 | | %a1 = src |
168 | | %a2 = dst | 166 | | %a2 = dst |
169 | move.l 4(%a0), %d4 | %d4 = delta = r->delta | 167 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels |
168 | | %d4 = delta = data->resample_data.delta | ||
170 | swap %d4 | swap delta to high word to use | 169 | swap %d4 | swap delta to high word to use |
171 | | carries to increment position | 170 | | carries to increment position |
172 | .uschannel_loop: | 171 | .uschannel_loop: |
173 | move.l (%a0), %d5 | %d5 = phase = r->phase | 172 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase |
174 | move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] | 173 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] |
175 | lea.l 4(%a0, %d2.l*4), %a4 | %a4 = &r->last_sample[ch-1] | 174 | lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] |
176 | lea.l (%a3, %d3.l*4), %a5 | %a5 = src_end = &src[count] | 175 | lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] |
177 | move.l (%a4), %d0 | %d0 = last = r->last_sample[ch-1] | 176 | move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] |
178 | move.l -4(%a5), %d1 | r->last_sample[ch-1] = s[count-1] | 177 | move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] |
179 | move.l %d1, (%a4) | | 178 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] |
180 | move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] | ||
181 | swap %d5 | swap phase to high word to use | 179 | swap %d5 | swap phase to high word to use |
182 | | carries to increment position | 180 | | carries to increment position |
183 | move.l %d5, %d6 | %d6 = pos = phase >> 16 | 181 | move.l %d5, %d6 | %d6 = pos = phase >> 16 |
@@ -204,13 +202,13 @@ dsp_upsample: | |||
204 | move.l %d7, (%a4)+ | *d++ = %d7 | 202 | move.l %d7, (%a4)+ | *d++ = %d7 |
205 | add.l %d4, %d5 | phase += delta | 203 | add.l %d4, %d5 | phase += delta |
206 | bcc.b .usloop_0 | load next values? | 204 | bcc.b .usloop_0 | load next values? |
207 | cmp.l %a5, %a3 | src < src_end? | 205 | cmp.l %a5, %a3 | src <= src_end? |
208 | blt.b .usloop_1 | yes? continue resampling | 206 | ble.b .usloop_1 | yes? continue resampling |
209 | .usloop_skip: | 207 | .usloop_skip: |
210 | subq.l #1, %d2 | ch > 0? | 208 | subq.l #1, %d3 | ch > 0? |
211 | bgt.b .uschannel_loop | yes? process next channel | 209 | bgt.b .uschannel_loop | yes? process next channel |
212 | swap %d5 | wrap phase to start of next frame | 210 | swap %d5 | wrap phase to start of next frame |
213 | move.l %d5, (%a0) | ...and save in r->phase | 211 | move.l %d5, 12(%a0) | ...and save in data->resample_data.phase |
214 | move.l %a4, %d0 | return d - d[0] | 212 | move.l %a4, %d0 | return d - d[0] |
215 | sub.l (%a2), %d0 | | 213 | sub.l (%a2), %d0 | |
216 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables | 214 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables |
@@ -219,3 +217,307 @@ dsp_upsample: | |||
219 | rts | buh-bye | 217 | rts | buh-bye |
220 | .usend: | 218 | .usend: |
221 | .size dsp_upsample,.usend-dsp_upsample | 219 | .size dsp_upsample,.usend-dsp_upsample |
220 | |||
221 | /* These routines might benefit from burst transfers but we'll keep them | ||
222 | * small for now since they're rather light weight | ||
223 | */ | ||
224 | |||
225 | /**************************************************************************** | ||
226 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) | ||
227 | * | ||
228 | * Mix left and right channels 50/50 into a center channel. | ||
229 | */ | ||
230 | .section .text | ||
231 | .global channels_process_sound_chan_mono | ||
232 | channels_process_sound_chan_mono: | ||
233 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | ||
234 | lea.l -12(%sp), %sp | save registers | ||
235 | move.l %macsr, %d1 | | ||
236 | movem.l %d1-%d3, (%sp) | | ||
237 | move.l #0xb0, %macsr | put emac in rounding fractional mode | ||
238 | movem.l (%a0), %a0-%a1 | get channel pointers | ||
239 | move.l #0x40000000, %d3 | %d3 = 0.5 | ||
240 | 1: | ||
241 | move.l (%a0), %d1 | L = R = l/2 + r/2 | ||
242 | mac.l %d1, %d3, (%a1), %d2, %acc0 | | ||
243 | mac.l %d2, %d3, %acc0 | | ||
244 | movclr.l %acc0, %d1 | | ||
245 | move.l %d1, (%a0)+ | output to original buffer | ||
246 | move.l %d1, (%a1)+ | | ||
247 | subq.l #1, %d0 | | ||
248 | bgt.s 1b | | ||
249 | movem.l (%sp), %d1-%d3 | restore registers | ||
250 | move.l %d1, %macsr | | ||
251 | lea.l 12(%sp), %sp | cleanup | ||
252 | rts | ||
253 | .cpmono_end: | ||
254 | .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono | ||
255 | |||
256 | |||
257 | /**************************************************************************** | ||
258 | * void channels_process_sound_chan_custom(int count, int32_t *buf[]) | ||
259 | * | ||
260 | * Apply stereo width (narrowing/expanding) effect. | ||
261 | */ | ||
262 | .section .text | ||
263 | .global channels_process_sound_chan_custom | ||
264 | channels_process_sound_chan_custom: | ||
265 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | ||
266 | lea.l -16(%sp), %sp | save registers | ||
267 | move.l %macsr, %d1 | | ||
268 | movem.l %d1-%d4, (%sp) | | ||
269 | move.l #0xb0, %macsr | put emac in rounding fractional mode | ||
270 | movem.l (%a0), %a0-%a1 | get channel pointers | ||
271 | move.l dsp_sw_gain, %d3 | load straight (mid) gain | ||
272 | move.l dsp_sw_cross, %d4 | load cross (side) gain | ||
273 | 1: | ||
274 | move.l (%a0), %d1 | | ||
275 | mac.l %d1, %d3 , (%a1), %d2, %acc0 | L = l*gain + r*cross | ||
276 | mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross | ||
277 | mac.l %d2, %d4 , %acc0 | | ||
278 | mac.l %d2, %d3 , %acc1 | | ||
279 | movclr.l %acc0, %d1 | | ||
280 | movclr.l %acc1, %d2 | | ||
281 | move.l %d1, (%a0)+ | | ||
282 | move.l %d2, (%a1)+ | | ||
283 | subq.l #1, %d0 | | ||
284 | bgt.s 1b | | ||
285 | movem.l (%sp), %d1-%d4 | restore registers | ||
286 | move.l %d1, %macsr | | ||
287 | lea.l 16(%sp), %sp | cleanup | ||
288 | rts | ||
289 | .cpcustom_end: | ||
290 | .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom | ||
291 | |||
292 | /**************************************************************************** | ||
293 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) | ||
294 | * | ||
295 | * Separate channels into side channels. | ||
296 | */ | ||
297 | .section .text | ||
298 | .global channels_process_sound_chan_karaoke | ||
299 | channels_process_sound_chan_karaoke: | ||
300 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | ||
301 | lea.l -16(%sp), %sp | save registers | ||
302 | move.l %macsr, %d1 | | ||
303 | movem.l %d1-%d4, (%sp) | | ||
304 | move.l #0xb0, %macsr | put emac in rounding fractional mode | ||
305 | movem.l (%a0), %a0-%a1 | get channel pointers | ||
306 | move.l #0x40000000, %d4 | %d3 = 0.5 | ||
307 | 1: | ||
308 | move.l (%a0), %d1 | | ||
309 | mac.l %d1, %d4, (%a1), %d2, %acc0 | L = l/2 - r/2 | ||
310 | mac.l %d2, %d4, %acc1 | R = r/2 - l/2 | ||
311 | movclr.l %acc0, %d1 | | ||
312 | movclr.l %acc1, %d2 | | ||
313 | move.l %d1, %d3 | | ||
314 | sub.l %d2, %d1 | | ||
315 | sub.l %d3, %d2 | | ||
316 | move.l %d1, (%a0)+ | | ||
317 | move.l %d2, (%a1)+ | | ||
318 | subq.l #1, %d0 | | ||
319 | bgt.s 1b | | ||
320 | movem.l (%sp), %d1-%d4 | restore registers | ||
321 | move.l %d1, %macsr | | ||
322 | lea.l 16(%sp), %sp | cleanup | ||
323 | rts | ||
324 | .cpkaraoke_end: | ||
325 | .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke | ||
326 | |||
327 | /**************************************************************************** | ||
328 | * void sample_output_stereo(int count, struct dsp_data *data, | ||
329 | * int32_t *src[], int16_t *dst) | ||
330 | * | ||
331 | * Framework based on the ubiquitous Rockbox line transfer logic for | ||
332 | * Coldfire CPUs. | ||
333 | * | ||
334 | * Does emac clamping and scaling (which proved faster than the usual | ||
335 | * checks and branches - even single test clamping) and writes using | ||
336 | * line burst transfers. Also better than writing a single L-R pair per | ||
337 | * loop but a good deal more code. | ||
338 | * | ||
339 | * Attemping bursting during reads is rather futile since the source and | ||
340 | * destination alignments rarely agree and too much complication will | ||
341 | * slow us up. The parallel loads seem to do a bit better at least until | ||
342 | * a pcm buffer can always give line aligned chunk and then aligning the | ||
343 | * dest can then imply the source is aligned if the source buffers are. | ||
344 | * For now longword alignment is assumed of both the source and dest. | ||
345 | * | ||
346 | */ | ||
347 | .section .text | ||
348 | .global sample_output_stereo | ||
349 | sample_output_stereo: | ||
350 | lea.l -44(%sp), %sp | save registers | ||
351 | move.l %macsr, %d1 | do it now as at many lines will | ||
352 | movem.l %d1-%d7/%a2-%a5, (%sp) | be the far more common condition | ||
353 | move.l #0x80, %macsr | put emac unit in signed int mode | ||
354 | movem.l 48(%sp), %a0-%a2/%a4 | | ||
355 | lea.l (%a4, %a0.l*4), %a0 | %a0 = end address | ||
356 | move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale)) | ||
357 | sub.l #16, %d1 | | ||
358 | neg.l %d1 | | ||
359 | move.q #1, %d0 | | ||
360 | asl.l %d1, %d0 | | ||
361 | move.l %d0, %a1 | | ||
362 | movem.l (%a2), %a2-%a3 | get L/R channel pointers | ||
363 | moveq.l #28, %d0 | %d0 = second line bound | ||
364 | add.l %a4, %d0 | | ||
365 | and.l #0xfffffff0, %d0 | | ||
366 | cmp.l %a4, %d0 | at least a full line? | ||
367 | blo.w .sos_longloop_1_start | no? jump to trailing longword | ||
368 | sub.l #16, %d0 | %d1 = first line bound | ||
369 | cmp.l %a4, %d0 | any leading longwords? | ||
370 | bls.b .sos_lineloop_start | no? jump to line loop | ||
371 | .sos_longloop_0: | ||
372 | move.l (%a2)+, %d1 | read longword from L and R | ||
373 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word | ||
374 | mac.l %d2, %a1, %acc1 | shift R to high word | ||
375 | movclr.l %acc0, %d1 | get possibly saturated results | ||
376 | movclr.l %acc1, %d2 | | ||
377 | swap %d2 | move R to low word | ||
378 | move.w %d2, %d1 | interleave MS 16 bits of each | ||
379 | move.l %d1, (%a4)+ | ...and write both | ||
380 | cmp.l %a4, %d0 | | ||
381 | bhi.b .sos_longloop_0 | | ||
382 | .sos_lineloop_start: | ||
383 | lea.l -12(%a0), %a5 | %a5 = at or just before last line bound | ||
384 | .sos_lineloop: | ||
385 | move.l (%a2)+, %d0 | get next 4 L samples and scale | ||
386 | mac.l %d0, %a1, (%a2)+, %d1, %acc0 | with saturation | ||
387 | mac.l %d1, %a1, (%a2)+, %d2, %acc1 | | ||
388 | mac.l %d2, %a1, (%a2)+, %d3, %acc2 | | ||
389 | mac.l %d3, %a1, %acc3 | | ||
390 | movclr.l %acc0, %d0 | obtain results | ||
391 | movclr.l %acc1, %d1 | | ||
392 | movclr.l %acc2, %d2 | | ||
393 | movclr.l %acc3, %d3 | | ||
394 | move.l (%a3)+, %d4 | get next 4 R samples and scale | ||
395 | mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation | ||
396 | mac.l %d5, %a1, (%a3)+, %d6, %acc1 | | ||
397 | mac.l %d6, %a1, (%a3)+, %d7, %acc2 | | ||
398 | mac.l %d7, %a1, %acc3 | | ||
399 | movclr.l %acc0, %d4 | obtain results | ||
400 | movclr.l %acc1, %d5 | | ||
401 | movclr.l %acc2, %d6 | | ||
402 | movclr.l %acc3, %d7 | | ||
403 | swap %d4 | interleave most significant | ||
404 | move.w %d4, %d0 | 16 bits of L and R | ||
405 | swap %d5 | | ||
406 | move.w %d5, %d1 | | ||
407 | swap %d6 | | ||
408 | move.w %d6, %d2 | | ||
409 | swap %d7 | | ||
410 | move.w %d7, %d3 | | ||
411 | movem.l %d0-%d3, (%a4) | write four stereo samples | ||
412 | lea.l 16(%a4), %a4 | | ||
413 | cmp.l %a4, %a5 | | ||
414 | bhi.b .sos_lineloop | | ||
415 | .sos_longloop_1_start: | ||
416 | cmp.l %a4, %a0 | any longwords left? | ||
417 | bls.b .sos_done | no? finished. | ||
418 | .sos_longloop_1: | ||
419 | move.l (%a2)+, %d1 | handle trailing longwords | ||
420 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones | ||
421 | mac.l %d2, %a1, %acc1 | | ||
422 | movclr.l %acc0, %d1 | | ||
423 | movclr.l %acc1, %d2 | | ||
424 | swap %d2 | | ||
425 | move.w %d2, %d1 | | ||
426 | move.l %d1, (%a4)+ | | ||
427 | cmp.l %a4, %a0 | | ||
428 | bhi.b .sos_longloop_1 | | ||
429 | .sos_done: | ||
430 | movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers | ||
431 | move.l %d1, %macsr | | ||
432 | lea.l 44(%sp), %sp | cleanup | ||
433 | rts | | ||
434 | .sos_end: | ||
435 | .size sample_output_stereo, .sos_end-sample_output_stereo | ||
436 | |||
437 | /**************************************************************************** | ||
438 | * void sample_output_mono(int count, struct dsp_data *data, | ||
439 | * int32_t *src[], int16_t *dst) | ||
440 | * | ||
441 | * Same treatment as sample_output_stereo but for one channel. | ||
442 | */ | ||
443 | .section .text | ||
444 | .global sample_output_mono | ||
445 | sample_output_mono: | ||
446 | lea.l -28(%sp), %sp | save registers | ||
447 | move.l %macsr, %d1 | do it now as at many lines will | ||
448 | movem.l %d1-%d5/%a2-%a3, (%sp) | be the far more common condition | ||
449 | move.l #0x80, %macsr | put emac unit in signed int mode | ||
450 | movem.l 32(%sp), %a0-%a3 | | ||
451 | lea.l (%a3, %a0.l*4), %a0 | %a0 = end address | ||
452 | move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale)) | ||
453 | sub.l #16, %d1 | | ||
454 | neg.l %d1 | | ||
455 | move.q #1, %d5 | | ||
456 | asl.l %d1, %d5 | | ||
457 | movem.l (%a2), %a2 | get source channel pointer | ||
458 | moveq.l #28, %d0 | %d0 = second line bound | ||
459 | add.l %a3, %d0 | | ||
460 | and.l #0xfffffff0, %d0 | | ||
461 | cmp.l %a3, %d0 | at least a full line? | ||
462 | blo.w .som_longloop_1_start | no? jump to trailing longword | ||
463 | sub.l #16, %d0 | %d1 = first line bound | ||
464 | cmp.l %a3, %d0 | any leading longwords? | ||
465 | bls.b .som_lineloop_start | no? jump to line loop | ||
466 | .som_longloop_0: | ||
467 | move.l (%a2)+, %d1 | read longword from L and R | ||
468 | mac.l %d1, %d5, %acc0 | shift L to high word | ||
469 | movclr.l %acc0, %d1 | get possibly saturated results | ||
470 | move.l %d1, %d2 | | ||
471 | swap %d2 | move R to low word | ||
472 | move.w %d2, %d1 | duplicate single channel into | ||
473 | move.l %d1, (%a3)+ | L and R | ||
474 | cmp.l %a3, %d0 | | ||
475 | bhi.b .som_longloop_0 | | ||
476 | .som_lineloop_start: | ||
477 | lea.l -12(%a0), %a1 | %a1 = at or just before last line bound | ||
478 | .som_lineloop: | ||
479 | move.l (%a2)+, %d0 | get next 4 L samples and scale | ||
480 | mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation | ||
481 | mac.l %d1, %d5, (%a2)+, %d2, %acc1 | | ||
482 | mac.l %d2, %d5, (%a2)+, %d3, %acc2 | | ||
483 | mac.l %d3, %d5, %acc3 | | ||
484 | movclr.l %acc0, %d0 | obtain results | ||
485 | movclr.l %acc1, %d1 | | ||
486 | movclr.l %acc2, %d2 | | ||
487 | movclr.l %acc3, %d3 | | ||
488 | move.l %d0, %d4 | duplicate single channel | ||
489 | swap %d4 | into L and R | ||
490 | move.w %d4, %d0 | | ||
491 | move.l %d1, %d4 | | ||
492 | swap %d4 | | ||
493 | move.w %d4, %d1 | | ||
494 | move.l %d2, %d4 | | ||
495 | swap %d4 | | ||
496 | move.w %d4, %d2 | | ||
497 | move.l %d3, %d4 | | ||
498 | swap %d4 | | ||
499 | move.w %d4, %d3 | | ||
500 | movem.l %d0-%d3, (%a3) | write four stereo samples | ||
501 | lea.l 16(%a3), %a3 | | ||
502 | cmp.l %a3, %a1 | | ||
503 | bhi.b .som_lineloop | | ||
504 | .som_longloop_1_start: | ||
505 | cmp.l %a3, %a0 | any longwords left? | ||
506 | bls.b .som_done | no? finished. | ||
507 | .som_longloop_1: | ||
508 | move.l (%a2)+, %d1 | handle trailing longwords | ||
509 | mac.l %d1, %d5, %acc0 | the same way as leading ones | ||
510 | movclr.l %acc0, %d1 | | ||
511 | move.l %d1, %d2 | | ||
512 | swap %d2 | | ||
513 | move.w %d2, %d1 | | ||
514 | move.l %d1, (%a3)+ | | ||
515 | cmp.l %a3, %a0 | | ||
516 | bhi.b .som_longloop_1 | | ||
517 | .som_done: | ||
518 | movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers | ||
519 | move.l %d1, %macsr | | ||
520 | lea.l 28(%sp), %sp | cleanup | ||
521 | rts | | ||
522 | .som_end: | ||
523 | .size sample_output_mono, .som_end-sample_output_mono | ||