summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-03-25 04:03:44 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-03-25 04:03:44 +0000
commit369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7 (patch)
tree7620c7da1d611d0d9a339487b6b264e44c6201bd
parentcd630c9e0a2e0aa259a6e53a5af1369f36984b1c (diff)
downloadrockbox-369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7.tar.gz
rockbox-369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7.zip
SWCODEC & Coldfire: Do some more DSP straigntening out. Do as much Coldfire optimizing as seems reasonably possible by jumping through some hoops to avoid stalls. Further boost reduction will just be fractional points if taken to extremes-- not worth it. Wrap up the ASM for awhile.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12905 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c354
-rw-r--r--apps/dsp_asm.h59
-rw-r--r--apps/dsp_cf.S424
3 files changed, 468 insertions, 369 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index be851e2305..3b95145b39 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -38,9 +38,14 @@
38#define WORD_FRACBITS 27 38#define WORD_FRACBITS 27
39 39
40#define NATIVE_DEPTH 16 40#define NATIVE_DEPTH 16
41/* If the buffer sizes change, check the assembly code! */
41#define SAMPLE_BUF_COUNT 256 42#define SAMPLE_BUF_COUNT 256
42#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ 43#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/
43#define DEFAULT_GAIN 0x01000000 44#define DEFAULT_GAIN 0x01000000
45#define SAMPLE_BUF_LEFT_CHANNEL 0
46#define SAMPLE_BUF_RIGHT_CHANNEL (SAMPLE_BUF_COUNT/2)
47#define RESAMPLE_BUF_LEFT_CHANNEL 0
48#define RESAMPLE_BUF_RIGHT_CHANNEL (RESAMPLE_BUF_COUNT/2)
44 49
45/* enums to index conversion properly with stereo mode and other settings */ 50/* enums to index conversion properly with stereo mode and other settings */
46enum 51enum
@@ -66,11 +71,10 @@ enum
66 * NOTE: Any assembly routines that use these structures must be updated 71 * NOTE: Any assembly routines that use these structures must be updated
67 * if current data members are moved or changed. 72 * if current data members are moved or changed.
68 */ 73 */
69 /* 32-bit achitecture offset */
70struct resample_data 74struct resample_data
71{ 75{
72 long delta; /* 00h */ 76 uint32_t delta; /* 00h */
73 long phase; /* 04h */ 77 uint32_t phase; /* 04h */
74 int32_t last_sample[2]; /* 08h */ 78 int32_t last_sample[2]; /* 08h */
75 /* 10h */ 79 /* 10h */
76}; 80};
@@ -93,9 +97,10 @@ struct dsp_data
93 int output_scale; /* 00h */ 97 int output_scale; /* 00h */
94 int num_channels; /* 04h */ 98 int num_channels; /* 04h */
95 struct resample_data resample_data; /* 08h */ 99 struct resample_data resample_data; /* 08h */
96 int clip_min; /* 18h */ 100 int32_t clip_min; /* 18h */
97 int clip_max; /* 2ch */ 101 int32_t clip_max; /* 1ch */
98 /* 30h */ 102 int32_t gain; /* 20h - Note that this is in S8.23 format. */
103 /* 24h */
99}; 104};
100 105
101/* No asm...yet */ 106/* No asm...yet */
@@ -132,13 +137,18 @@ struct eq_state
132#include <dsp_asm.h> 137#include <dsp_asm.h>
133 138
134/* Typedefs keep things much neater in this case */ 139/* Typedefs keep things much neater in this case */
135typedef int (*sample_input_fn_type)(int count, const char *src[], 140typedef void (*sample_input_fn_type)(int count, const char *src[],
136 int32_t *dst[]); 141 int32_t *dst[]);
137typedef int (*resample_fn_type)(int count, struct dsp_data *data, 142typedef int (*resample_fn_type)(int count, struct dsp_data *data,
138 int32_t *src[], int32_t *dst[]); 143 int32_t *src[], int32_t *dst[]);
139typedef void (*sample_output_fn_type)(int count, struct dsp_data *data, 144typedef void (*sample_output_fn_type)(int count, struct dsp_data *data,
140 int32_t *src[], int16_t *dst); 145 int32_t *src[], int16_t *dst);
146/* Single-DSP channel processing in place */
141typedef void (*channels_process_fn_type)(int count, int32_t *buf[]); 147typedef void (*channels_process_fn_type)(int count, int32_t *buf[]);
148/* DSP local channel processing in place */
149typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data,
150 int32_t *buf[]);
151
142 152
143/* 153/*
144 ***************************************************************************/ 154 ***************************************************************************/
@@ -152,16 +162,16 @@ struct dsp_config
152 int sample_bytes; 162 int sample_bytes;
153 int stereo_mode; 163 int stereo_mode;
154 int frac_bits; 164 int frac_bits;
155 long gain; /* Note that this is in S8.23 format. */
156 /* Functions that change depending upon settings - NULL if stage is 165 /* Functions that change depending upon settings - NULL if stage is
157 disabled */ 166 disabled */
158 sample_input_fn_type input_samples; 167 sample_input_fn_type input_samples;
159 resample_fn_type resample; 168 resample_fn_type resample;
160 sample_output_fn_type output_samples; 169 sample_output_fn_type output_samples;
161 /* These will be NULL for the voice codec and is more economical that 170 /* These will be NULL for the voice codec and is more economical that
162 way */ 171 way */
163 channels_process_fn_type apply_crossfeed; 172 channels_process_dsp_fn_type apply_gain;
164 channels_process_fn_type channels_process; 173 channels_process_fn_type apply_crossfeed;
174 channels_process_fn_type channels_process;
165}; 175};
166 176
167/* General DSP config */ 177/* General DSP config */
@@ -211,7 +221,7 @@ static struct dsp_config *dsp IDATA_ATTR = audio_dsp;
211 * of copying needed is minimized for that case. 221 * of copying needed is minimized for that case.
212 */ 222 */
213 223
214static int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR; 224int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR;
215static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR; 225static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR;
216 226
217/* set a new dsp and return old one */ 227/* set a new dsp and return old one */
@@ -258,23 +268,20 @@ void sound_set_pitch(int permille)
258 dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency); 268 dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency);
259} 269}
260 270
261/* Convert at most count samples to the internal format, if needed. Returns 271/* Convert count samples to the internal format, if needed. Updates src
262 * number of samples ready for further processing. Updates src to point 272 * to point past the samples "consumed" and dst is set to point to the
263 * past the samples "consumed" and dst is set to point to the samples to 273 * samples to consume. Note that for mono, dst[0] equals dst[1], as there
264 * consume. Note that for mono, dst[0] equals dst[1], as there is no point 274 * is no point in processing the same data twice.
265 * in processing the same data twice.
266 */ 275 */
267 276
268/* convert count 16-bit mono to 32-bit mono */ 277/* convert count 16-bit mono to 32-bit mono */
269static int sample_input_lte_native_mono( 278static void sample_input_lte_native_mono(
270 int count, const char *src[], int32_t *dst[]) 279 int count, const char *src[], int32_t *dst[])
271{ 280{
272 count = MIN(SAMPLE_BUF_COUNT/2, count);
273
274 const int16_t *s = (int16_t *) src[0]; 281 const int16_t *s = (int16_t *) src[0];
275 const int16_t * const send = s + count; 282 const int16_t * const send = s + count;
276 int32_t *d = dst[0] = dst[1] = sample_buf; 283 int32_t *d = dst[0] = dst[1] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
277 const int scale = WORD_SHIFT; 284 int scale = WORD_SHIFT;
278 285
279 do 286 do
280 { 287 {
@@ -283,21 +290,17 @@ static int sample_input_lte_native_mono(
283 while (s < send); 290 while (s < send);
284 291
285 src[0] = (char *)s; 292 src[0] = (char *)s;
286
287 return count;
288} 293}
289 294
290/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */ 295/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
291static int sample_input_lte_native_i_stereo( 296static void sample_input_lte_native_i_stereo(
292 int count, const char *src[], int32_t *dst[]) 297 int count, const char *src[], int32_t *dst[])
293{ 298{
294 count = MIN(SAMPLE_BUF_COUNT/2, count);
295
296 const int32_t *s = (int32_t *) src[0]; 299 const int32_t *s = (int32_t *) src[0];
297 const int32_t * const send = s + count; 300 const int32_t * const send = s + count;
298 int32_t *dl = dst[0] = sample_buf; 301 int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
299 int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; 302 int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
300 const int scale = WORD_SHIFT; 303 int scale = WORD_SHIFT;
301 304
302 do 305 do
303 { 306 {
@@ -313,22 +316,18 @@ static int sample_input_lte_native_i_stereo(
313 while (s < send); 316 while (s < send);
314 317
315 src[0] = (char *)s; 318 src[0] = (char *)s;
316
317 return count;
318} 319}
319 320
320/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */ 321/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
321static int sample_input_lte_native_ni_stereo( 322static void sample_input_lte_native_ni_stereo(
322 int count, const char *src[], int32_t *dst[]) 323 int count, const char *src[], int32_t *dst[])
323{ 324{
324 count = MIN(SAMPLE_BUF_COUNT/2, count);
325
326 const int16_t *sl = (int16_t *) src[0]; 325 const int16_t *sl = (int16_t *) src[0];
327 const int16_t *sr = (int16_t *) src[1]; 326 const int16_t *sr = (int16_t *) src[1];
328 const int16_t * const slend = sl + count; 327 const int16_t * const slend = sl + count;
329 int32_t *dl = dst[0] = sample_buf; 328 int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
330 int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; 329 int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
331 const int scale = WORD_SHIFT; 330 int scale = WORD_SHIFT;
332 331
333 do 332 do
334 { 333 {
@@ -339,35 +338,24 @@ static int sample_input_lte_native_ni_stereo(
339 338
340 src[0] = (char *)sl; 339 src[0] = (char *)sl;
341 src[1] = (char *)sr; 340 src[1] = (char *)sr;
342
343 return count;
344} 341}
345 342
346/* convert count 32-bit mono to 32-bit mono */ 343/* convert count 32-bit mono to 32-bit mono */
347static int sample_input_gt_native_mono( 344static void sample_input_gt_native_mono(
348 int count, const char *src[], int32_t *dst[]) 345 int count, const char *src[], int32_t *dst[])
349{ 346{
350 count = MIN(SAMPLE_BUF_COUNT/2, count);
351
352 dst[0] = dst[1] = (int32_t *)src[0]; 347 dst[0] = dst[1] = (int32_t *)src[0];
353 src[0] = (char *)(dst[0] + count); 348 src[0] = (char *)(dst[0] + count);
354
355 return count;
356} 349}
357 350
358/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */ 351/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
359static int sample_input_gt_native_i_stereo( 352static void sample_input_gt_native_i_stereo(
360 int count, const char *src[], int32_t *dst[]) 353 int count, const char *src[], int32_t *dst[])
361{ 354{
362 count = MIN(SAMPLE_BUF_COUNT/2, count);
363
364 const int32_t *s = (int32_t *)src[0]; 355 const int32_t *s = (int32_t *)src[0];
365 const int32_t * const send = s + 2*count; 356 const int32_t * const send = s + 2*count;
366 int32_t *dl = sample_buf; 357 int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
367 int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2; 358 int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
368
369 dst[0] = dl;
370 dst[1] = dr;
371 359
372 do 360 do
373 { 361 {
@@ -377,22 +365,16 @@ static int sample_input_gt_native_i_stereo(
377 while (s < send); 365 while (s < send);
378 366
379 src[0] = (char *)send; 367 src[0] = (char *)send;
380
381 return count;
382} 368}
383 369
384/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */ 370/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
385static int sample_input_gt_native_ni_stereo( 371static void sample_input_gt_native_ni_stereo(
386 int count, const char *src[], int32_t *dst[]) 372 int count, const char *src[], int32_t *dst[])
387{ 373{
388 count = MIN(SAMPLE_BUF_COUNT/2, count);
389
390 dst[0] = (int32_t *)src[0]; 374 dst[0] = (int32_t *)src[0];
391 dst[1] = (int32_t *)src[1]; 375 dst[1] = (int32_t *)src[1];
392 src[0] = (char *)(dst[0] + count); 376 src[0] = (char *)(dst[0] + count);
393 src[1] = (char *)(dst[1] + count); 377 src[1] = (char *)(dst[1] + count);
394
395 return count;
396} 378}
397 379
398/** 380/**
@@ -573,12 +555,6 @@ static void sample_output_new_format(void)
573 dsp->output_samples = sample_output_functions[out]; 555 dsp->output_samples = sample_output_functions[out];
574} 556}
575 557
576static void resampler_set_delta(int frequency)
577{
578 dsp->data.resample_data.delta = (unsigned long)
579 frequency * 65536LL / NATIVE_FREQUENCY;
580}
581
582/** 558/**
583 * Linear interpolation resampling that introduces a one sample delay because 559 * Linear interpolation resampling that introduces a one sample delay because
584 * of our inability to look into the future at the end of a frame. 560 * of our inability to look into the future at the end of a frame.
@@ -587,9 +563,9 @@ static void resampler_set_delta(int frequency)
587static int dsp_downsample(int count, struct dsp_data *data, 563static int dsp_downsample(int count, struct dsp_data *data,
588 int32_t *src[], int32_t *dst[]) 564 int32_t *src[], int32_t *dst[])
589{ 565{
590 int ch = data->num_channels - 1; 566 int ch = data->num_channels - 1;
591 long delta = data->resample_data.delta; 567 uint32_t delta = data->resample_data.delta;
592 long phase, pos; 568 uint32_t phase, pos;
593 int32_t *d; 569 int32_t *d;
594 570
595 /* Rolled channel loop actually showed slightly faster. */ 571 /* Rolled channel loop actually showed slightly faster. */
@@ -610,7 +586,7 @@ static int dsp_downsample(int count, struct dsp_data *data,
610 if (pos > 0) 586 if (pos > 0)
611 last = s[pos - 1]; 587 last = s[pos - 1];
612 588
613 while (pos < count) 589 while (pos < (uint32_t)count)
614 { 590 {
615 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); 591 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
616 phase += delta; 592 phase += delta;
@@ -625,12 +601,12 @@ static int dsp_downsample(int count, struct dsp_data *data,
625 return d - dst[0]; 601 return d - dst[0];
626} 602}
627 603
628static int dsp_upsample(int count, struct dsp_data *data, 604static int dsp_upsample(int count, struct dsp_data *data,
629 int32_t *src[], int32_t *dst[]) 605 int32_t *src[], int32_t *dst[])
630{ 606{
631 int ch = data->num_channels - 1; 607 int ch = data->num_channels - 1;
632 long delta = data->resample_data.delta; 608 uint32_t delta = data->resample_data.delta;
633 long phase, pos; 609 uint32_t phase, pos;
634 int32_t *d; 610 int32_t *d;
635 611
636 /* Rolled channel loop actually showed slightly faster. */ 612 /* Rolled channel loop actually showed slightly faster. */
@@ -653,7 +629,7 @@ static int dsp_upsample(int count, struct dsp_data *data,
653 pos = phase >> 16; 629 pos = phase >> 16;
654 } 630 }
655 631
656 while (pos < count) 632 while (pos < (uint32_t)count)
657 { 633 {
658 last = s[pos - 1]; 634 last = s[pos - 1];
659 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); 635 *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
@@ -669,24 +645,43 @@ static int dsp_upsample(int count, struct dsp_data *data,
669} 645}
670#endif /* DSP_HAVE_ASM_RESAMPLING */ 646#endif /* DSP_HAVE_ASM_RESAMPLING */
671 647
648static void resampler_new_delta(void)
649{
650 dsp->data.resample_data.delta = (unsigned long)
651 dsp->frequency * 65536LL / NATIVE_FREQUENCY;
652
653 if (dsp->frequency == NATIVE_FREQUENCY)
654 {
655 /* NOTE: If fully glitch-free transistions from no resampling to
656 resampling are desired, last_sample history should be maintained
657 even when not resampling. */
658 dsp->resample = NULL;
659 dsp->data.resample_data.phase = 0;
660 dsp->data.resample_data.last_sample[0] = 0;
661 dsp->data.resample_data.last_sample[1] = 0;
662 }
663 else if (dsp->frequency < NATIVE_FREQUENCY)
664 dsp->resample = dsp_upsample;
665 else
666 dsp->resample = dsp_downsample;
667}
668
672/* Resample count stereo samples. Updates the src array, if resampling is 669/* Resample count stereo samples. Updates the src array, if resampling is
673 * done, to refer to the resampled data. Returns number of stereo samples 670 * done, to refer to the resampled data. Returns number of stereo samples
674 * for further processing. 671 * for further processing.
675 */ 672 */
676static inline int resample(int count, int32_t *src[]) 673static inline int resample(int count, int32_t *src[])
677{ 674{
678 if (dsp->resample) 675 int32_t *dst[2] =
679 { 676 {
680 int32_t *dst[2] = 677 &resample_buf[RESAMPLE_BUF_LEFT_CHANNEL],
681 { 678 &resample_buf[RESAMPLE_BUF_RIGHT_CHANNEL],
682 resample_buf, 679 };
683 resample_buf + RESAMPLE_BUF_COUNT/2,
684 };
685 680
686 count = dsp->resample(count, &dsp->data, src, dst); 681 count = dsp->resample(count, &dsp->data, src, dst);
687 src[0] = dst[0]; 682
688 src[1] = dst[dsp->data.num_channels - 1]; 683 src[0] = dst[0];
689 } 684 src[1] = dst[dsp->data.num_channels - 1];
690 685
691 return count; 686 return count;
692} 687}
@@ -810,30 +805,59 @@ void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
810 c[2] <<= 4; 805 c[2] <<= 4;
811} 806}
812 807
808/* Apply a constant gain to the samples (e.g., for ReplayGain).
809 * Note that this must be called before the resampler.
810 */
811#ifndef DSP_HAVE_ASM_APPLY_GAIN
812static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
813{
814 const int32_t gain = data->gain;
815 int ch = data->num_channels - 1;
816
817 do
818 {
819 int32_t *s = buf[ch];
820 int32_t *d = buf[ch];
821 int32_t samp = *s++;
822 int i = 0;
823
824 do
825 {
826 FRACMUL_8_LOOP(samp, gain, s, d);
827 }
828 while (++i < count);
829 }
830 while (--ch >= 0);
831}
832#endif /* DSP_HAVE_ASM_APPLY_GAIN */
833
813/* Combine all gains to a global gain. */ 834/* Combine all gains to a global gain. */
814static void set_gain(struct dsp_config *dsp) 835static void set_gain(struct dsp_config *dsp)
815{ 836{
816 dsp->gain = DEFAULT_GAIN; 837 dsp->data.gain = DEFAULT_GAIN;
817 838
818 /* Replay gain not relevant to voice */ 839 /* Replay gain not relevant to voice */
819 if (dsp == audio_dsp && replaygain) 840 if (dsp == audio_dsp && replaygain)
820 { 841 {
821 dsp->gain = replaygain; 842 dsp->data.gain = replaygain;
822 } 843 }
823 844
824 if (eq_enabled && eq_precut) 845 if (eq_enabled && eq_precut)
825 { 846 {
826 dsp->gain = (long) (((int64_t) dsp->gain * eq_precut) >> 24); 847 dsp->data.gain =
848 (long) (((int64_t) dsp->data.gain * eq_precut) >> 24);
827 } 849 }
828 850
829 if (dsp->gain == DEFAULT_GAIN) 851 if (dsp->data.gain == DEFAULT_GAIN)
830 { 852 {
831 dsp->gain = 0; 853 dsp->data.gain = 0;
832 } 854 }
833 else 855 else
834 { 856 {
835 dsp->gain >>= 1; 857 dsp->data.gain >>= 1;
836 } 858 }
859
860 dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL;
837} 861}
838 862
839/** 863/**
@@ -927,50 +951,6 @@ static void eq_process(int count, int32_t *buf[])
927 } 951 }
928} 952}
929 953
930/* Apply a constant gain to the samples (e.g., for ReplayGain). May update
931 * the src array if gain was applied.
932 * Note that this must be called before the resampler.
933 */
934static void apply_gain(int count, int32_t *buf[])
935{
936 int32_t *sl, *sr;
937 int32_t s, *d;
938 long gain;
939 int i;
940
941 if (new_gain)
942 {
943 /* Gain has changed */
944 dsp_set_replaygain();
945 if (dsp->gain == 0)
946 return; /* No gain to apply now */
947 }
948
949 sl = buf[0], sr = buf[1];
950 gain = dsp->gain;
951
952 if (sl != sr)
953 {
954 d = &sample_buf[SAMPLE_BUF_COUNT / 2];
955 buf[1] = d;
956 s = *sr++;
957
958 for (i = 0; i < count; i++)
959 FRACMUL_8_LOOP(s, gain, sr, d);
960 }
961 else
962 {
963 buf[1] = &sample_buf[0];
964 }
965
966 d = &sample_buf[0];
967 buf[0] = d;
968 s = *sl++;
969
970 for (i = 0; i < count; i++)
971 FRACMUL_8_LOOP(s, gain, sl, d);
972}
973
974void dsp_set_stereo_width(int value) 954void dsp_set_stereo_width(int value)
975{ 955{
976 long width, straight, cross; 956 long width, straight, cross;
@@ -993,35 +973,6 @@ void dsp_set_stereo_width(int value)
993 dsp_sw_cross = cross << 8; 973 dsp_sw_cross = cross << 8;
994} 974}
995 975
996/**
997 * Implements the different channel configurations and stereo width.
998 */
999
1000/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
1001 * completeness. */
1002#if 0
1003static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
1004{
1005 /* The channels are each just themselves */
1006 (void)count; (void)buf;
1007}
1008#endif
1009
1010#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
1011static void channels_process_sound_chan_mono(int count, int32_t *buf[])
1012{
1013 int32_t *sl = buf[0], *sr = buf[1];
1014
1015 do
1016 {
1017 int32_t lr = *sl/2 + *sr/2;
1018 *sl++ = lr;
1019 *sr++ = lr;
1020 }
1021 while (--count > 0);
1022}
1023#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
1024
1025#if CONFIG_CODEC == SWCODEC 976#if CONFIG_CODEC == SWCODEC
1026 977
1027#ifdef HAVE_SW_TONE_CONTROLS 978#ifdef HAVE_SW_TONE_CONTROLS
@@ -1063,6 +1014,35 @@ int dsp_callback(int msg, intptr_t param)
1063} 1014}
1064#endif 1015#endif
1065 1016
1017/**
1018 * Implements the different channel configurations and stereo width.
1019 */
1020
1021/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
1022 * completeness. */
1023#if 0
1024static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
1025{
1026 /* The channels are each just themselves */
1027 (void)count; (void)buf;
1028}
1029#endif
1030
1031#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
1032static void channels_process_sound_chan_mono(int count, int32_t *buf[])
1033{
1034 int32_t *sl = buf[0], *sr = buf[1];
1035
1036 do
1037 {
1038 int32_t lr = *sl/2 + *sr/2;
1039 *sl++ = lr;
1040 *sr++ = lr;
1041 }
1042 while (--count > 0);
1043}
1044#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
1045
1066#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM 1046#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
1067static void channels_process_sound_chan_custom(int count, int32_t *buf[]) 1047static void channels_process_sound_chan_custom(int count, int32_t *buf[])
1068{ 1048{
@@ -1151,30 +1131,47 @@ int dsp_process(char *dst, const char *src[], int count)
1151 coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); 1131 coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
1152#endif 1132#endif
1153 1133
1134 if (new_gain)
1135 dsp_set_replaygain(); /* Gain has changed */
1136
1137 /* Testing function pointers for NULL is preferred since the pointer
1138 will be preloaded to be used for the call if not. */
1154 while (count > 0) 1139 while (count > 0)
1155 { 1140 {
1156 samples = dsp->input_samples(count, src, tmp); 1141 samples = MIN(SAMPLE_BUF_COUNT/2, count);
1157 count -= samples; 1142 count -= samples;
1158 if (dsp->gain != 0) 1143
1159 apply_gain(samples, tmp); 1144 dsp->input_samples(samples, src, tmp);
1160 if ((samples = resample(samples, tmp)) <= 0) 1145
1146 if (dsp->apply_gain)
1147 dsp->apply_gain(samples, &dsp->data, tmp);
1148
1149 if (dsp->resample && (samples = resample(samples, tmp)) <= 0)
1161 break; /* I'm pretty sure we're downsampling here */ 1150 break; /* I'm pretty sure we're downsampling here */
1151
1162 if (dsp->apply_crossfeed) 1152 if (dsp->apply_crossfeed)
1163 dsp->apply_crossfeed(samples, tmp); 1153 dsp->apply_crossfeed(samples, tmp);
1154
1164 /* TODO: EQ and tone controls need separate structs for audio and voice 1155 /* TODO: EQ and tone controls need separate structs for audio and voice
1165 * DSP processing thanks to filter history. isn't really audible now, but 1156 * DSP processing thanks to filter history. isn't really audible now, but
1166 * might be the day we start handling voice more delicately. 1157 * might be the day we start handling voice more delicately. Planned
1158 * changes may well run all relevent channels through the same EQ so
1159 * perhaps not.
1167 */ 1160 */
1168 if (eq_enabled) 1161 if (eq_enabled)
1169 eq_process(samples, tmp); 1162 eq_process(samples, tmp);
1163
1170#ifdef HAVE_SW_TONE_CONTROLS 1164#ifdef HAVE_SW_TONE_CONTROLS
1171 if ((bass | treble) != 0) 1165 if ((bass | treble) != 0)
1172 eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels, 1166 eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels,
1173 FILTER_BISHELF_SHIFT); 1167 FILTER_BISHELF_SHIFT);
1174#endif 1168#endif
1169
1175 if (dsp->channels_process) 1170 if (dsp->channels_process)
1176 dsp->channels_process(samples, tmp); 1171 dsp->channels_process(samples, tmp);
1172
1177 dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst); 1173 dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst);
1174
1178 written += samples; 1175 written += samples;
1179 dst += samples * sizeof (int16_t) * 2; 1176 dst += samples * sizeof (int16_t) * 2;
1180 yield(); 1177 yield();
@@ -1245,9 +1242,6 @@ bool dsp_configure(int setting, intptr_t value)
1245 if (dsp == audio_dsp) 1242 if (dsp == audio_dsp)
1246 { 1243 {
1247 *var = value; 1244 *var = value;
1248 /* In case current gain is zero, force at least one call
1249 to apply_gain or apply_gain won't pick up on new_gain */
1250 audio_dsp->gain = -1;
1251 new_gain = true; 1245 new_gain = true;
1252 } 1246 }
1253 } 1247 }
@@ -1282,15 +1276,7 @@ bool dsp_configure(int setting, intptr_t value)
1282 else 1276 else
1283 dsp->frequency = dsp->codec_frequency; 1277 dsp->frequency = dsp->codec_frequency;
1284 1278
1285 resampler_set_delta(dsp->frequency); 1279 resampler_new_delta();
1286
1287 if (dsp->frequency == NATIVE_FREQUENCY)
1288 dsp->resample = NULL;
1289 else if (dsp->frequency < NATIVE_FREQUENCY)
1290 dsp->resample = dsp_upsample;
1291 else
1292 dsp->resample = dsp_downsample;
1293
1294 break; 1280 break;
1295 1281
1296 case DSP_SET_SAMPLE_DEPTH: 1282 case DSP_SET_SAMPLE_DEPTH:
@@ -1348,7 +1334,7 @@ bool dsp_configure(int setting, intptr_t value)
1348 case DSP_FLUSH: 1334 case DSP_FLUSH:
1349 memset(&dsp->data.resample_data, 0, 1335 memset(&dsp->data.resample_data, 0,
1350 sizeof (dsp->data.resample_data)); 1336 sizeof (dsp->data.resample_data));
1351 resampler_set_delta(dsp->frequency); 1337 resampler_new_delta();
1352 dither_init(); 1338 dither_init();
1353 break; 1339 break;
1354 1340
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index f8df337b37..14875d21d8 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -22,32 +22,61 @@
22#ifndef _DSP_ASM_H 22#ifndef _DSP_ASM_H
23#define _DSP_ASM_H 23#define _DSP_ASM_H
24 24
25/* Set the appropriate #defines based on CPU or whatever matters */
25#ifndef SIMULATOR 26#ifndef SIMULATOR
26 27
27#if defined(CPU_COLDFIRE) || defined(CPU_ARM) 28#if defined(CPU_ARM)
29#define DSP_HAVE_ASM_RESAMPLING
28#define DSP_HAVE_ASM_CROSSFEED 30#define DSP_HAVE_ASM_CROSSFEED
29void apply_crossfeed(int count, int32_t *buf[]); 31#elif defined (CPU_COLDFIRE)
32#define DSP_HAVE_ASM_APPLY_GAIN
30#define DSP_HAVE_ASM_RESAMPLING 33#define DSP_HAVE_ASM_RESAMPLING
31int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); 34#define DSP_HAVE_ASM_CROSSFEED
32int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]);
33#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */
34
35#if defined (CPU_COLDFIRE)
36#define DSP_HAVE_ASM_SOUND_CHAN_MONO 35#define DSP_HAVE_ASM_SOUND_CHAN_MONO
37void channels_process_sound_chan_mono(int count, int32_t *buf[]);
38#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM 36#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
39void channels_process_sound_chan_custom(int count, int32_t *buf[]);
40#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE 37#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
41void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
42
43#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO 38#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
44void sample_output_mono(int count, struct dsp_data *data,
45 int32_t *src[], int16_t *dst);
46#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO 39#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
47void sample_output_stereo(int count, struct dsp_data *data,
48 int32_t *src[], int16_t *dst);
49#endif /* CPU_COLDFIRE */ 40#endif /* CPU_COLDFIRE */
50 41
51#endif /* SIMULATOR */ 42#endif /* SIMULATOR */
52 43
44/* Declare prototypes based upon what's #defined above */
45#ifdef DSP_HAVE_ASM_CROSSFEED
46void apply_crossfeed(int count, int32_t *buf[]);
47#endif
48
49#ifdef DSP_HAVE_ASM_APPLY_GAIN
50void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]);
51#endif /* DSP_HAVE_ASM_APPLY_GAIN* */
52
53#ifdef DSP_HAVE_ASM_RESAMPLING
54int dsp_upsample(int count, struct dsp_data *data,
55 int32_t *src[], int32_t *dst[]);
56int dsp_downsample(int count, struct dsp_data *data,
57 int32_t *src[], int32_t *dst[]);
58#endif /* DSP_HAVE_ASM_RESAMPLING */
59
60#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO
61void channels_process_sound_chan_mono(int count, int32_t *buf[]);
62#endif
63
64#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
65void channels_process_sound_chan_custom(int count, int32_t *buf[]);
66#endif
67
68#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
69void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
70#endif
71
72#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
73void sample_output_stereo(int count, struct dsp_data *data,
74 int32_t *src[], int16_t *dst);
75#endif
76
77#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
78void sample_output_mono(int count, struct dsp_data *data,
79 int32_t *src[], int16_t *dst);
80#endif
81
53#endif /* _DSP_ASM_H */ 82#endif /* _DSP_ASM_H */
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index af9ac1fa4b..e5d3ee8c55 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -19,68 +19,117 @@
19 ****************************************************************************/ 19 ****************************************************************************/
20 20
21/**************************************************************************** 21/****************************************************************************
22 * void apply_crossfeed(int count, int32_t *src[]) 22 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
23 */ 23 */
24 .section .text 24 .section .text
25 .align 2
26 .global dsp_apply_gain
27dsp_apply_gain:
28 lea.l -20(%sp), %sp | save registers
29 movem.l %d2-%d4/%a2-%a3, (%sp) |
30 movem.l 28(%sp), %a0-%a1 | %a0 = data,
31 | %a1 = buf
32 move.l 4(%a0), %d1 | %d1 = data->num_channels
33 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
3410: | channel loop |
35 move.l 24(%sp), %d0 | %d0 = count
36 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
37 move.l %a2, %a3 | %a3 = d = s
38 move.l (%a2)+, %d2 | %d2 = *s++,
39 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
40 subq.l #1, %d0 | --count > 0 ? : effectively n++
41 ble.b 30f | loop done | no? finish up
4220: | loop |
43 move.l %accext01, %d4 | fetch S(n-1)[7:0]
44 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
45 asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
46 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
47 move.b %d4, %d3 |
48 move.l %d3, (%a3)+ |
49 subq.l #1, %d0 | --count > 0 ? : effectively n++
50 bgt.b 20b | loop | yes? do more samples
5130: | loop done |
52 move.l %accext01, %d4 | fetch S(n-1)[7:0]
53 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
54 asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
55 move.b %d4, %d3 |
56 move.l %d3, (%a3) |
57 subq.l #1, %d1 | next channel
58 bgt.b 10b | channel loop |
59 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
60 lea.l 20(%sp), %sp | cleanup stack
61 rts |
62 .size dsp_apply_gain,.-dsp_apply_gain
63
64/****************************************************************************
65 * void apply_crossfeed(int count, int32_t *buf[])
66 */
67 .section .text
68 .align 2
25 .global apply_crossfeed 69 .global apply_crossfeed
26apply_crossfeed: 70apply_crossfeed:
27 lea.l -44(%sp), %sp 71 lea.l -44(%sp), %sp |
28 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 72 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
29 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src 73 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
30 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] 74 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
31 lea.l crossfeed_data, %a1 75 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
32 move.l (%a1)+, %a6 | a6 = direct gain 76 move.l (%a1)+, %d6 | %d6 = direct gain
33 movem.l 12(%a1), %d0-%d3 | fetch filter history samples 77 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
34 move.l 132(%a1), %a0 | fetch delay line address 78 move.l 132(%a1), %a0 | fetch delay line address
35 movem.l (%a1), %a1-%a3 | load filter coefs 79 movem.l (%a1), %a1-%a3 | load filter coefs
80 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
81 bra.b 20f | loop start | go to loop start point
36 /* Register usage in loop: 82 /* Register usage in loop:
37 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), 83 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
38 * %a4 = src[0], %a5 = src[1], %a6 = direct gain, 84 * %a4 = buf[0], %a5 = buf[1],
85 * %a6 = delay line pointer wrap limit,
39 * %d0..%d3 = history 86 * %d0..%d3 = history
40 * %d4..%d6 = temp. 87 * %d4..%d5 = temp.
88 * %d6 = direct gain,
41 * %d7 = count 89 * %d7 = count
42 */ 90 */
43.cfloop: 9110: | loop |
44 mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] 92 movclr.l %acc0, %d4 | write outputs
45 mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] 93 move.l %d4, (%a4)+ | .
46 mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L 94 movclr.l %acc1, %d5 | .
47 move.l %acc0, %d1 | get filtered delayed sample 95 move.l %d5, (%a5)+ | .
48 mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] 9620: | loop start |
49 movclr.l %acc0, %d6 | 97 mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
50 move.l %d6, (%a4)+ | write result 98 mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
51 99 mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
52 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] 100 mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
53 mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] 101 mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
54 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R 102 mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
55 movem.l %d4-%d5, (%a0) | save left & right inputs to delay line 103 movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
56 move.l %acc0, %d3 | get filtered delayed sample 104 move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
57 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] 105 move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
58 lea.l 8(%a0), %a0 | increment delay pointer 106 mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
59 movclr.l %acc0, %d6 | 107 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
60 move.l %d6, (%a5)+ | write result 108 cmp.l %a6, %a0 | wrap %a0 if passed end
61 109 bhs.b 30f | wrap buffer |
62 cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end 110 .word 0x51fb | tpf.l | trap the buffer wrap
63 bge.b .cfwrap | 11130: | wrap buffer | ...fwd taken branches more costly
64 .word 0x51fb | tpf.l - trap the buffer wrap 112 lea.l -104(%a0), %a0 | wrap it up
65.cfwrap: 113 subq.l #1, %d7 | --count > 0 ?
66 lea.l -104(%a0), %a0 | wrap 114 bgt.b 10b | loop | yes? do more
67 subq.l #1, %d7 | --count < 0 ? 115 movclr.l %acc0, %d4 | write last outputs
68 bgt.b .cfloop | 116 move.l %d4, (%a4) | .
117 movclr.l %acc1, %d5 | .
118 move.l %d5, (%a5) | .
69 lea.l crossfeed_data+16, %a1 | save data back to struct 119 lea.l crossfeed_data+16, %a1 | save data back to struct
70 movem.l %d0-%d3, (%a1) | ...history 120 movem.l %d0-%d3, (%a1) | ...history
71 move.l %a0, 120(%a1) | ...delay_p 121 move.l %a0, 120(%a1) | ...delay_p
72 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs 122 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
73 lea.l 44(%sp), %sp 123 lea.l 44(%sp), %sp |
74 rts 124 rts |
75.cfend: 125 .size apply_crossfeed,.-apply_crossfeed
76 .size apply_crossfeed,.cfend-apply_crossfeed
77
78 126
79/**************************************************************************** 127/****************************************************************************
80 * int dsp_downsample(int count, struct dsp_data *data, 128 * int dsp_downsample(int count, struct dsp_data *data,
81 * in32_t *src[], int32_t *dst[]) 129 * in32_t *src[], int32_t *dst[])
82 */ 130 */
83 .section .text 131 .section .text
132 .align 2
84 .global dsp_downsample 133 .global dsp_downsample
85dsp_downsample: 134dsp_downsample:
86 lea.l -40(%sp), %sp | save non-clobberables 135 lea.l -40(%sp), %sp | save non-clobberables
@@ -92,7 +141,7 @@ dsp_downsample:
92 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels 141 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
93 | %d4 = delta = data->resample_data.delta 142 | %d4 = delta = data->resample_data.delta
94 moveq.l #16, %d7 | %d7 = shift 143 moveq.l #16, %d7 | %d7 = shift
95.dschannel_loop: 14410: | channel loop |
96 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 145 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
97 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 146 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
98 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 147 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
@@ -102,15 +151,15 @@ dsp_downsample:
102 move.l %d5, %d6 | %d6 = pos = phase >> 16 151 move.l %d5, %d6 | %d6 = pos = phase >> 16
103 lsr.l %d7, %d6 | 152 lsr.l %d7, %d6 |
104 cmp.l %d2, %d6 | past end of samples? 153 cmp.l %d2, %d6 | past end of samples?
105 bge.b .dsloop_skip | yes? skip loop 154 bge.b 40f | skip resample loop| yes? skip loop
106 tst.l %d6 | need last sample of prev. frame? 155 tst.l %d6 | need last sample of prev. frame?
107 bne.b .dsloop | no? start main loop 156 bne.b 20f | resample loop | no? start main loop
108 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] 157 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
109 bra.b .dsuse_last_start | start with last (last in %d0) 158 bra.b 30f | resample start last | start with last (last in %d0)
110.dsloop: 15920: | resample loop |
111 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] 160 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
112 movem.l (%a5), %d0-%d1 | 161 movem.l (%a5), %d0-%d1 |
113.dsuse_last_start: 16230: | resample start last |
114 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 163 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
115 move.l %d0, %acc0 | %acc0 = previous sample 164 move.l %d0, %acc0 | %acc0 = previous sample
116 move.l %d5, %d0 | frac = (phase << 16) >> 1 165 move.l %d5, %d0 | frac = (phase << 16) >> 1
@@ -123,11 +172,11 @@ dsp_downsample:
123 movclr.l %acc0, %d0 | 172 movclr.l %acc0, %d0 |
124 move.l %d0, (%a4)+ | *d++ = %d0 173 move.l %d0, (%a4)+ | *d++ = %d0
125 cmp.l %d2, %d6 | pos < count? 174 cmp.l %d2, %d6 | pos < count?
126 blt.b .dsloop | yes? continue resampling 175 blt.b 20b | resample loop | yes? continue resampling
127.dsloop_skip: 17640: | skip resample loop |
128 subq.l #1, %d3 | ch > 0? 177 subq.l #1, %d3 | ch > 0?
129 bgt.b .dschannel_loop | yes? process next channel 178 bgt.b 10b | channel loop | yes? process next channel
130 asl.l %d7, %d2 | wrap phase to start of next frame 179 lsl.l %d7, %d2 | wrap phase to start of next frame
131 sub.l %d2, %d5 | data->resample_data.phase = 180 sub.l %d2, %d5 | data->resample_data.phase =
132 move.l %d5, 12(%a0) | ... phase - (count << 16) 181 move.l %d5, 12(%a0) | ... phase - (count << 16)
133 move.l %a4, %d0 | return d - d[0] 182 move.l %a4, %d0 | return d - d[0]
@@ -136,14 +185,14 @@ dsp_downsample:
136 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 185 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
137 lea.l 40(%sp), %sp | cleanup stack 186 lea.l 40(%sp), %sp | cleanup stack
138 rts | buh-bye 187 rts | buh-bye
139.dsend: 188 .size dsp_downsample,.-dsp_downsample
140 .size dsp_downsample,.dsend-dsp_downsample
141 189
142/**************************************************************************** 190/****************************************************************************
143 * int dsp_upsample(int count, struct dsp_data *dsp, 191 * int dsp_upsample(int count, struct dsp_data *dsp,
144 * in32_t *src[], int32_t *dst[]) 192 * int32_t *src[], int32_t *dst[])
145 */ 193 */
146 .section .text 194 .section .text
195 .align 2
147 .global dsp_upsample 196 .global dsp_upsample
148dsp_upsample: 197dsp_upsample:
149 lea.l -40(%sp), %sp | save non-clobberables 198 lea.l -40(%sp), %sp | save non-clobberables
@@ -154,47 +203,55 @@ dsp_upsample:
154 | %a2 = dst 203 | %a2 = dst
155 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels 204 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
156 | %d4 = delta = data->resample_data.delta 205 | %d4 = delta = data->resample_data.delta
157 swap %d4 | swap delta to high word to use 206 swap %d4 | swap delta to high word to use...
158 | carries to increment position 207 | ...carries to increment position
159.uschannel_loop: 20810: | channel loop |
160 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 209 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
161 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 210 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
162 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] 211 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
163 lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] 212 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
164 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] 213 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
165 move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] 214 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
166 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 215 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
216 move.l (%a3)+, %d1 | fetch first sample - might throw this...
217 | ...away later but we'll be preincremented
218 move.l %d1, %d6 | save sample value
219 sub.l %d0, %d1 | %d1 = diff = s[0] - last
167 swap %d5 | swap phase to high word to use 220 swap %d5 | swap phase to high word to use
168 | carries to increment position 221 | carries to increment position
169 move.l %d5, %d6 | %d6 = pos = phase >> 16 222 move.l %d5, %d7 | %d7 = pos = phase >> 16
170 clr.w %d5 | 223 clr.w %d5 |
171 eor.l %d5, %d6 | pos == 0? 224 eor.l %d5, %d7 | pos == 0?
172 beq.b .usstart_0 | no? transistion from down 225 beq.b 40f | loop start | yes? start loop
173 cmp.l %d2, %d6 | past end of samples? 226 cmp.l %d2, %d7 | past end of samples?
174 bge.b .usloop_skip | yes? skip loop 227 bge.b 50f | skip resample loop| yes? go to next channel and collect info
175 lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) 228 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
176 move.l (%a3)+, %d0 | %d0 = *s++ 229 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
177 .word 0x51fa | tpf.w - trap next instruction 230 move.l %d1, %d6 | save sample value
178.usloop_1: 231 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
232 bra.b 40f | loop start |
23320: | next sample loop |
179 move.l %d6, %d0 | move previous sample to %d0 234 move.l %d6, %d0 | move previous sample to %d0
180.usstart_0:
181 move.l (%a3)+, %d1 | fetch next sample 235 move.l (%a3)+, %d1 | fetch next sample
182 move.l %d1, %d6 | save sample value 236 move.l %d1, %d6 | save sample value
183 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 237 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
184.usloop_0: 23830: | same sample loop |
239 movclr.l %acc0, %d7 | %d7 = result
240 move.l %d7, (%a4)+ | *d++ = %d7
24140: | loop start |
185 lsr.l #1, %d5 | make phase into frac 242 lsr.l #1, %d5 | make phase into frac
243 move.l %d0, %acc0 | %acc0 = s[pos-1]
186 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac 244 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
187 lsl.l #1, %d5 | restore frac to phase 245 lsl.l #1, %d5 | restore frac to phase
188 movclr.l %acc0, %d7 | %d7 = product
189 add.l %d0, %d7 | %d7 = last + product
190 move.l %d7, (%a4)+ | *d++ = %d7
191 add.l %d4, %d5 | phase += delta 246 add.l %d4, %d5 | phase += delta
192 bcc.b .usloop_0 | load next values? 247 bcc.b 30b | same sample loop | load next values?
193 cmp.l %a5, %a3 | src <= src_end? 248 cmp.l %a5, %a3 | src <= src_end?
194 ble.b .usloop_1 | yes? continue resampling 249 bls.b 20b | next sample loop | yes? continue resampling
195.usloop_skip: 250 movclr.l %acc0, %d7 | %d7 = result
251 move.l %d7, (%a4)+ | *d++ = %d7
25250: | skip resample loop |
196 subq.l #1, %d3 | ch > 0? 253 subq.l #1, %d3 | ch > 0?
197 bgt.b .uschannel_loop | yes? process next channel 254 bgt.b 10b | channel loop | yes? process next channel
198 swap %d5 | wrap phase to start of next frame 255 swap %d5 | wrap phase to start of next frame
199 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase 256 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
200 move.l %a4, %d0 | return d - d[0] 257 move.l %a4, %d0 | return d - d[0]
@@ -203,12 +260,7 @@ dsp_upsample:
203 asr.l #2, %d0 | convert bytes->samples 260 asr.l #2, %d0 | convert bytes->samples
204 lea.l 40(%sp), %sp | cleanup stack 261 lea.l 40(%sp), %sp | cleanup stack
205 rts | buh-bye 262 rts | buh-bye
206.usend: 263 .size dsp_upsample,.-dsp_upsample
207 .size dsp_upsample,.usend-dsp_upsample
208
209/* These routines might benefit from burst transfers but we'll keep them
210 * small for now since they're rather light weight
211 */
212 264
213/**************************************************************************** 265/****************************************************************************
214 * void channels_process_sound_chan_mono(int count, int32_t *buf[]) 266 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -216,31 +268,39 @@ dsp_upsample:
216 * Mix left and right channels 50/50 into a center channel. 268 * Mix left and right channels 50/50 into a center channel.
217 */ 269 */
218 .section .text 270 .section .text
271 .align 2
219 .global channels_process_sound_chan_mono 272 .global channels_process_sound_chan_mono
220channels_process_sound_chan_mono: 273channels_process_sound_chan_mono:
221 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 274 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
222 lea.l -12(%sp), %sp | save registers 275 lea.l -20(%sp), %sp | save registers
223 move.l %macsr, %d1 | 276 movem.l %d2-%d4/%a2-%a3, (%sp) |
224 movem.l %d1-%d3, (%sp) |
225 move.l #0xb0, %macsr | put emac in rounding fractional mode
226 movem.l (%a0), %a0-%a1 | get channel pointers 277 movem.l (%a0), %a0-%a1 | get channel pointers
278 move.l %a0, %a2 | use separate dst pointers since read
279 move.l %a1, %a3 | pointers run one ahead of write
227 move.l #0x40000000, %d3 | %d3 = 0.5 280 move.l #0x40000000, %d3 | %d3 = 0.5
2281: 281 move.l (%a0)+, %d1 | prime the input registers
229 move.l (%a0), %d1 | L = R = l/2 + r/2 282 move.l (%a1)+, %d2 |
230 mac.l %d1, %d3, (%a1), %d2, %acc0 | 283 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
231 mac.l %d2, %d3, %acc0 | 284 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
232 movclr.l %acc0, %d1 | 285 subq.l #1, %d0 |
233 move.l %d1, (%a0)+ | output to original buffer 286 ble.s 20f | loop done |
234 move.l %d1, (%a1)+ | 28710: | loop |
235 subq.l #1, %d0 | 288 movclr.l %acc0, %d4 | L = R = l/2 + r/2
236 bgt.s 1b | 289 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
237 movem.l (%sp), %d1-%d3 | restore registers 290 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
238 move.l %d1, %macsr | 291 move.l %d4, (%a2)+ | output to original buffer
239 lea.l 12(%sp), %sp | cleanup 292 move.l %d4, (%a3)+ |
240 rts 293 subq.l #1, %d0 |
241.cpmono_end: 294 bgt.s 10b | loop |
242 .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono 29520: | loop done |
243 296 movclr.l %acc0, %d4 | output last sample
297 move.l %d4, (%a2) |
298 move.l %d4, (%a3) |
299 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
300 lea.l 20(%sp), %sp | cleanup
301 rts |
302 .size channels_process_sound_chan_mono, \
303 .-channels_process_sound_chan_mono
244 304
245/**************************************************************************** 305/****************************************************************************
246 * void channels_process_sound_chan_custom(int count, int32_t *buf[]) 306 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
@@ -248,34 +308,47 @@ channels_process_sound_chan_mono:
248 * Apply stereo width (narrowing/expanding) effect. 308 * Apply stereo width (narrowing/expanding) effect.
249 */ 309 */
250 .section .text 310 .section .text
311 .align 2
251 .global channels_process_sound_chan_custom 312 .global channels_process_sound_chan_custom
252channels_process_sound_chan_custom: 313channels_process_sound_chan_custom:
253 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 314 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
254 lea.l -16(%sp), %sp | save registers 315 lea.l -28(%sp), %sp | save registers
255 move.l %macsr, %d1 | 316 movem.l %d2-%d6/%a2-%a3, (%sp) |
256 movem.l %d1-%d4, (%sp) |
257 move.l #0xb0, %macsr | put emac in rounding fractional mode
258 movem.l (%a0), %a0-%a1 | get channel pointers 317 movem.l (%a0), %a0-%a1 | get channel pointers
318 move.l %a0, %a2 | use separate dst pointers since read
319 move.l %a1, %a3 | pointers run one ahead of write
259 move.l dsp_sw_gain, %d3 | load straight (mid) gain 320 move.l dsp_sw_gain, %d3 | load straight (mid) gain
260 move.l dsp_sw_cross, %d4 | load cross (side) gain 321 move.l dsp_sw_cross, %d4 | load cross (side) gain
2611: 322 move.l (%a0)+, %d1 | prime the input registers
262 move.l (%a0), %d1 | 323 move.l (%a1)+, %d2 |
263 mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross 324 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
264 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross 325 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
265 mac.l %d2, %d4 , %acc0 | 326 mac.l %d2, %d4 , %acc0 |
266 mac.l %d2, %d3 , %acc1 | 327 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
267 movclr.l %acc0, %d1 |
268 movclr.l %acc1, %d2 |
269 move.l %d1, (%a0)+ |
270 move.l %d2, (%a1)+ |
271 subq.l #1, %d0 | 328 subq.l #1, %d0 |
272 bgt.s 1b | 329 ble.b 20f | loop done |
273 movem.l (%sp), %d1-%d4 | restore registers 33010: | loop |
274 move.l %d1, %macsr | 331 movclr.l %acc0, %d5 |
275 lea.l 16(%sp), %sp | cleanup 332 movclr.l %acc1, %d6 |
276 rts 33315: | loop start |
277.cpcustom_end: 334 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
278 .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom 335 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
336 mac.l %d2, %d4 , %acc0 |
337 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
338 move.l %d5, (%a2)+ |
339 move.l %d6, (%a3)+ |
340 subq.l #1, %d0 |
341 bgt.s 10b | loop |
34220: | loop done |
343 movclr.l %acc0, %d5 | output last sample
344 movclr.l %acc1, %d6 |
345 move.l %d5, (%a2) |
346 move.l %d6, (%a3) |
347 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
348 lea.l 28(%sp), %sp | cleanup
349 rts |
350 .size channels_process_sound_chan_custom, \
351 .-channels_process_sound_chan_custom
279 352
280/**************************************************************************** 353/****************************************************************************
281 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) 354 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@@ -283,31 +356,42 @@ channels_process_sound_chan_custom:
283 * Separate channels into side channels. 356 * Separate channels into side channels.
284 */ 357 */
285 .section .text 358 .section .text
359 .align 2
286 .global channels_process_sound_chan_karaoke 360 .global channels_process_sound_chan_karaoke
287channels_process_sound_chan_karaoke: 361channels_process_sound_chan_karaoke:
288 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 362 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
289 lea.l -16(%sp), %sp | save registers 363 lea.l -20(%sp), %sp | save registers
290 move.l %macsr, %d1 | 364 movem.l %d2-%d4/%a2-%a3, (%sp) |
291 movem.l %d1-%d4, (%sp) | 365 movem.l (%a0), %a0-%a1 | get channel src pointers
292 move.l #0xb0, %macsr | put emac in rounding fractional mode 366 move.l %a0, %a2 | use separate dst pointers since read
293 movem.l (%a0), %a0-%a1 | get channel pointers 367 move.l %a1, %a3 | pointers run one ahead of write
294 move.l #0x40000000, %d4 | %d3 = 0.5 368 move.l #0x40000000, %d3 | %d3 = 0.5
2951: 369 move.l (%a0)+, %d1 | prime the input registers
296 move.l (%a0), %d1 | 370 move.l (%a1)+, %d2 |
297 msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 371 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
298 mac.l %d2, %d4 , %acc0 | 372 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
299 movclr.l %acc0, %d1 | 373 subq.l #1, %d0 |
300 move.l %d1, (%a1)+ | 374 ble.b 20f | loop done |
301 neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 37510: | loop |
302 move.l %d1, (%a0)+ | 376 movclr.l %acc0, %d4 |
303 subq.l #1, %d0 | 377 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
304 bgt.s 1b | 378 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
305 movem.l (%sp), %d1-%d4 | restore registers 379 move.l %d4, (%a2)+ |
306 move.l %d1, %macsr | 380 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
307 lea.l 16(%sp), %sp | cleanup 381 move.l %d4, (%a3)+ |
308 rts 382 subq.l #1, %d0 |
309.cpkaraoke_end: 383 bgt.s 10b | loop |
310 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke 38420: | loop done |
385 movclr.l %acc0, %d4 | output last sample
386 move.l %d4, (%a2) |
387 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
388 move.l %d4, (%a3) |
389 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
390 lea.l 20(%sp), %sp | cleanup
391 rts |
392 .size channels_process_sound_chan_karaoke, \
393 .-channels_process_sound_chan_karaoke
394
311/**************************************************************************** 395/****************************************************************************
312 * void sample_output_stereo(int count, struct dsp_data *data, 396 * void sample_output_stereo(int count, struct dsp_data *data,
313 * int32_t *src[], int16_t *dst) 397 * int32_t *src[], int16_t *dst)
@@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke:
329 * 413 *
330 */ 414 */
331 .section .text 415 .section .text
416 .align 2
332 .global sample_output_stereo 417 .global sample_output_stereo
333sample_output_stereo: 418sample_output_stereo:
334 lea.l -44(%sp), %sp | save registers 419 lea.l -44(%sp), %sp | save registers
@@ -348,11 +433,11 @@ sample_output_stereo:
348 add.l %a4, %d0 | 433 add.l %a4, %d0 |
349 and.l #0xfffffff0, %d0 | 434 and.l #0xfffffff0, %d0 |
350 cmp.l %a0, %d0 | at least a full line? 435 cmp.l %a0, %d0 | at least a full line?
351 bhi.w .sos_longloop_1_start | no? jump to trailing longword 436 bhi.w 40f | long loop 1 start | no? do as trailing longwords
352 sub.l #16, %d0 | %d1 = first line bound 437 sub.l #16, %d0 | %d1 = first line bound
353 cmp.l %a4, %d0 | any leading longwords? 438 cmp.l %a4, %d0 | any leading longwords?
354 bls.b .sos_lineloop_start | no? jump to line loop 439 bls.b 20f | line loop start | no? start line loop
355.sos_longloop_0: 44010: | long loop 0 |
356 move.l (%a2)+, %d1 | read longword from L and R 441 move.l (%a2)+, %d1 | read longword from L and R
357 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word 442 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
358 mac.l %d2, %a1, %acc1 | shift R to high word 443 mac.l %d2, %a1, %acc1 | shift R to high word
@@ -362,10 +447,10 @@ sample_output_stereo:
362 move.w %d2, %d1 | interleave MS 16 bits of each 447 move.w %d2, %d1 | interleave MS 16 bits of each
363 move.l %d1, (%a4)+ | ...and write both 448 move.l %d1, (%a4)+ | ...and write both
364 cmp.l %a4, %d0 | 449 cmp.l %a4, %d0 |
365 bhi.b .sos_longloop_0 | 450 bhi.b 10b | long loop 0 |
366.sos_lineloop_start: 45120: | line loop start |
367 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound 452 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
368.sos_lineloop: 45330: | line loop |
369 move.l (%a3)+, %d4 | get next 4 R samples and scale 454 move.l (%a3)+, %d4 | get next 4 R samples and scale
370 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation 455 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
371 mac.l %d5, %a1, (%a3)+, %d6, %acc1 | 456 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
@@ -394,11 +479,11 @@ sample_output_stereo:
394 move.w %d7, %d3 | 479 move.w %d7, %d3 |
395 movem.l %d0-%d3, -16(%a4) | write four stereo samples 480 movem.l %d0-%d3, -16(%a4) | write four stereo samples
396 cmp.l %a4, %a5 | 481 cmp.l %a4, %a5 |
397 bhi.b .sos_lineloop | 482 bhi.b 30b | line loop |
398.sos_longloop_1_start: 48340: | long loop 1 start |
399 cmp.l %a4, %a0 | any longwords left? 484 cmp.l %a4, %a0 | any longwords left?
400 bls.b .sos_done | no? finished. 485 bls.b 60f | output end | no? stop
401.sos_longloop_1: 48650: | long loop 1 |
402 move.l (%a2)+, %d1 | handle trailing longwords 487 move.l (%a2)+, %d1 | handle trailing longwords
403 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones 488 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
404 mac.l %d2, %a1, %acc1 | 489 mac.l %d2, %a1, %acc1 |
@@ -408,14 +493,13 @@ sample_output_stereo:
408 move.w %d2, %d1 | 493 move.w %d2, %d1 |
409 move.l %d1, (%a4)+ | 494 move.l %d1, (%a4)+ |
410 cmp.l %a4, %a0 | 495 cmp.l %a4, %a0 |
411 bhi.b .sos_longloop_1 | 496 bhi.b 50b | long loop 1
412.sos_done: 49760: | output end |
413 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers 498 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
414 move.l %d1, %macsr | 499 move.l %d1, %macsr |
415 lea.l 44(%sp), %sp | cleanup 500 lea.l 44(%sp), %sp | cleanup
416 rts | 501 rts |
417.sos_end: 502 .size sample_output_stereo, .-sample_output_stereo
418 .size sample_output_stereo, .sos_end-sample_output_stereo
419 503
420/**************************************************************************** 504/****************************************************************************
421 * void sample_output_mono(int count, struct dsp_data *data, 505 * void sample_output_mono(int count, struct dsp_data *data,
@@ -424,6 +508,7 @@ sample_output_stereo:
424 * Same treatment as sample_output_stereo but for one channel. 508 * Same treatment as sample_output_stereo but for one channel.
425 */ 509 */
426 .section .text 510 .section .text
511 .align 2
427 .global sample_output_mono 512 .global sample_output_mono
428sample_output_mono: 513sample_output_mono:
429 lea.l -28(%sp), %sp | save registers 514 lea.l -28(%sp), %sp | save registers
@@ -442,11 +527,11 @@ sample_output_mono:
442 add.l %a3, %d0 | 527 add.l %a3, %d0 |
443 and.l #0xfffffff0, %d0 | 528 and.l #0xfffffff0, %d0 |
444 cmp.l %a0, %d0 | at least a full line? 529 cmp.l %a0, %d0 | at least a full line?
445 bhi.w .som_longloop_1_start | no? jump to trailing longword 530 bhi.w 40f | long loop 1 start | no? do as trailing longwords
446 sub.l #16, %d0 | %d1 = first line bound 531 sub.l #16, %d0 | %d1 = first line bound
447 cmp.l %a3, %d0 | any leading longwords? 532 cmp.l %a3, %d0 | any leading longwords?
448 bls.b .som_lineloop_start | no? jump to line loop 533 bls.b 20f | line loop start | no? start line loop
449.som_longloop_0: 53410: | long loop 0 |
450 move.l (%a2)+, %d1 | read longword from L and R 535 move.l (%a2)+, %d1 | read longword from L and R
451 mac.l %d1, %d5, %acc0 | shift L to high word 536 mac.l %d1, %d5, %acc0 | shift L to high word
452 movclr.l %acc0, %d1 | get possibly saturated results 537 movclr.l %acc0, %d1 | get possibly saturated results
@@ -455,10 +540,10 @@ sample_output_mono:
455 move.w %d2, %d1 | duplicate single channel into 540 move.w %d2, %d1 | duplicate single channel into
456 move.l %d1, (%a3)+ | L and R 541 move.l %d1, (%a3)+ | L and R
457 cmp.l %a3, %d0 | 542 cmp.l %a3, %d0 |
458 bhi.b .som_longloop_0 | 543 bhi.b 10b | long loop 0 |
459.som_lineloop_start: 54420: | line loop start |
460 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound 545 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
461.som_lineloop: 54630: | line loop |
462 move.l (%a2)+, %d0 | get next 4 L samples and scale 547 move.l (%a2)+, %d0 | get next 4 L samples and scale
463 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation 548 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
464 mac.l %d1, %d5, (%a2)+, %d2, %acc1 | 549 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
@@ -483,11 +568,11 @@ sample_output_mono:
483 move.w %d4, %d3 | 568 move.w %d4, %d3 |
484 movem.l %d0-%d3, -16(%a3) | write four stereo samples 569 movem.l %d0-%d3, -16(%a3) | write four stereo samples
485 cmp.l %a3, %a1 | 570 cmp.l %a3, %a1 |
486 bhi.b .som_lineloop | 571 bhi.b 30b | line loop |
487.som_longloop_1_start: 57240: | long loop 1 start |
488 cmp.l %a3, %a0 | any longwords left? 573 cmp.l %a3, %a0 | any longwords left?
489 bls.b .som_done | no? finished. 574 bls.b 60f | output end | no? stop
490.som_longloop_1: 57550: | loop loop 1 |
491 move.l (%a2)+, %d1 | handle trailing longwords 576 move.l (%a2)+, %d1 | handle trailing longwords
492 mac.l %d1, %d5, %acc0 | the same way as leading ones 577 mac.l %d1, %d5, %acc0 | the same way as leading ones
493 movclr.l %acc0, %d1 | 578 movclr.l %acc0, %d1 |
@@ -496,11 +581,10 @@ sample_output_mono:
496 move.w %d2, %d1 | 581 move.w %d2, %d1 |
497 move.l %d1, (%a3)+ | 582 move.l %d1, (%a3)+ |
498 cmp.l %a3, %a0 | 583 cmp.l %a3, %a0 |
499 bhi.b .som_longloop_1 | 584 bhi.b 50b | long loop 1 |
500.som_done: 58560: | output end |
501 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers 586 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
502 move.l %d1, %macsr | 587 move.l %d1, %macsr |
503 lea.l 28(%sp), %sp | cleanup 588 lea.l 28(%sp), %sp | cleanup
504 rts | 589 rts |
505.som_end: 590 .size sample_output_mono, .-sample_output_mono
506 .size sample_output_mono, .som_end-sample_output_mono