From 36175ac9453999d2d079c521126ecc5ac7a8d984 Mon Sep 17 00:00:00 2001 From: Michael Sevakis Date: Mon, 19 Feb 2007 02:49:26 +0000 Subject: SWCODEC: DSP optimizations for conversion to internal format and resampling. Assembly resampling for Coldfire. Word has it ARM will get that soon. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12399 a1c6a512-1295-4272-9138-f99709370657 --- apps/dsp.c | 388 ++++++++++++++++++++++++++++++++++----------------------- apps/dsp.h | 10 +- apps/dsp_asm.h | 8 ++ apps/dsp_cf.S | 145 ++++++++++++++++++++- 4 files changed, 391 insertions(+), 160 deletions(-) diff --git a/apps/dsp.c b/apps/dsp.c index c7eed8bd76..c062f2c088 100644 --- a/apps/dsp.c +++ b/apps/dsp.c @@ -46,6 +46,18 @@ #define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ #define DEFAULT_GAIN 0x01000000 + +enum +{ + CONVERT_LE_NATIVE_I_STEREO = STEREO_INTERLEAVED, + CONVERT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED, + CONVERT_LE_NATIVE_MONO = STEREO_MONO, + CONVERT_GT_NATIVE_I_STEREO = STEREO_INTERLEAVED + STEREO_NUM_MODES, + CONVERT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES, + CONVERT_GT_NATIVE_MONO = STEREO_MONO + STEREO_NUM_MODES, + CONVERT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES +}; + struct dsp_config { long codec_frequency; /* Sample rate of data coming from the codec */ @@ -60,6 +72,7 @@ struct dsp_config int sample_depth; int sample_bytes; int stereo_mode; + int num_channels; int frac_bits; bool dither_enabled; long dither_bias; @@ -69,11 +82,13 @@ struct dsp_config bool eq_enabled; long eq_precut; long gain; /* Note that this is in S8.23 format. */ + int (*convert_to_internal)(const char* src[], int32_t* dst[], int count); }; struct resample_data { - long phase, delta; + long phase; + long delta; int32_t last_sample[2]; }; @@ -139,88 +154,157 @@ void sound_set_pitch(int permille) * consume. Note that for mono, dst[0] equals dst[1], as there is no point * in processing the same data twice. */ -static int convert_to_internal(const char* src[], int count, int32_t* dst[]) + +/* convert count 16-bit mono to 32-bit mono */ +static int convert_lte_native_mono( + const char *src[], int32_t *dst[], int count) { - count = MIN(SAMPLE_BUF_COUNT / 2, count); + count = MIN(SAMPLE_BUF_COUNT/2, count); - if ((dsp->sample_depth <= NATIVE_DEPTH) - || (dsp->stereo_mode == STEREO_INTERLEAVED)) - { - dst[0] = &sample_buf[0]; - dst[1] = (dsp->stereo_mode == STEREO_MONO) - ? dst[0] : &sample_buf[SAMPLE_BUF_COUNT / 2]; - } - else + const short *s = (short*) src[0]; + const short * const send = s + count; + int32_t *d = dst[0] = dst[1] = sample_buf; + const int scale = WORD_SHIFT; + + do { - dst[0] = (int32_t*) src[0]; - dst[1] = (int32_t*) ((dsp->stereo_mode == STEREO_MONO) ? src[0] : src[1]); + *d++ = *s++ << scale; } + while (s < send); - if (dsp->sample_depth <= NATIVE_DEPTH) - { - short* s0 = (short*) src[0]; - int32_t* d0 = dst[0]; - int32_t* d1 = dst[1]; - int scale = WORD_SHIFT; - int i; + src[0] = (char *)s; - if (dsp->stereo_mode == STEREO_INTERLEAVED) - { - for (i = 0; i < count; i++) - { - *d0++ = *s0++ << scale; - *d1++ = *s0++ << scale; - } - } - else if (dsp->stereo_mode == STEREO_NONINTERLEAVED) - { - short* s1 = (short*) src[1]; + return count; +} - for (i = 0; i < count; i++) - { - *d0++ = *s0++ << scale; - *d1++ = *s1++ << scale; - } - } - else - { - for (i = 0; i < count; i++) - { - *d0++ = *s0++ << scale; - } - } - } - else if (dsp->stereo_mode == STEREO_INTERLEAVED) - { - int32_t* s0 = (int32_t*) src[0]; - int32_t* d0 = dst[0]; - int32_t* d1 = dst[1]; - int i; +/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */ +static int convert_lte_native_interleaved_stereo( + const char *src[], int32_t *dst[], int count) +{ + count = MIN(SAMPLE_BUF_COUNT/2, count); - for (i = 0; i < count; i++) - { - *d0++ = *s0++; - *d1++ = *s0++; - } - } + const int32_t *s = (int32_t *) src[0]; + const int32_t * const send = s + count; + int32_t *dl = dst[0] = sample_buf; + int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; + const int scale = WORD_SHIFT; - if (dsp->stereo_mode == STEREO_NONINTERLEAVED) + do { - src[0] += count * dsp->sample_bytes; - src[1] += count * dsp->sample_bytes; + short slr = *s++; +#ifdef ROCKBOX_LITTLE_ENDIAN + *dl++ = (slr >> 16) << scale; + *dr++ = (int32_t)(short)slr << scale; +#else /* ROCKBOX_BIG_ENDIAN */ + *dl++ = (int32_t)(short)slr << scale; + *dr++ = (slr >> 16) << scale; +#endif } - else if (dsp->stereo_mode == STEREO_INTERLEAVED) + while (s < send); + + src[0] = (char *)s; + + return count; +} + +/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */ +static int convert_lte_native_noninterleaved_stereo( + const char *src[], int32_t *dst[], int count) +{ + const short *sl = (short *) src[0]; + const short *sr = (short *) src[1]; + const short * const slend = sl + count; + int32_t *dl = dst[0] = sample_buf; + int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; + const int scale = WORD_SHIFT; + + do { - src[0] += count * dsp->sample_bytes * 2; + *dl++ = *sl++ << scale; + *dr++ = *sr++ << scale; } - else + while (sl < slend); + + src[0] = (char *)sl; + src[1] = (char *)sr; + + return count; +} + +/* convert count 32-bit mono to 32-bit mono */ +static int convert_gt_native_mono( + const char *src[], int32_t *dst[], int count) +{ + count = MIN(SAMPLE_BUF_COUNT/2, count); + + dst[0] = dst[1] = (int32_t *)src[0]; + src[0] = (char *)(dst[0] + count); + + return count; +} + +/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */ +static int convert_gt_native_interleaved_stereo( + const char *src[], int32_t *dst[], int count) +{ + count = MIN(SAMPLE_BUF_COUNT/2, count); + + const int32_t *s = (int32_t *)src[0]; + const int32_t * const send = s + 2*count; + int32_t *dl = sample_buf; + int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2; + + dst[0] = dl; + dst[1] = dr; + + do { - src[0] += count * dsp->sample_bytes; + *dl++ = *s++; + *dr++ = *s++; } + while (s < send); + + src[0] = (char *)send; + + return count; +} + +/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */ +static int convert_gt_native_noninterleaved_stereo( + const char *src[], int32_t *dst[], int count) +{ + count = MIN(SAMPLE_BUF_COUNT/2, count); + + dst[0] = (int32_t *)src[0]; + dst[1] = (int32_t *)src[1]; + src[0] = (char *)(dst[0] + count); + src[1] = (char *)(dst[1] + count); return count; } +/* set the to-native sample conversion function based on dsp sample parameters */ +static void new_sample_conversion(void) +{ + static int (*convert_to_internal_functions[])( + const char* src[], int32_t *dst[], int count) = + { + [CONVERT_LE_NATIVE_MONO] = convert_lte_native_mono, + [CONVERT_LE_NATIVE_I_STEREO] = convert_lte_native_interleaved_stereo, + [CONVERT_LE_NATIVE_NI_STEREO] = convert_lte_native_noninterleaved_stereo, + [CONVERT_GT_NATIVE_MONO] = convert_gt_native_mono, + [CONVERT_GT_NATIVE_I_STEREO] = convert_gt_native_interleaved_stereo, + [CONVERT_GT_NATIVE_NI_STEREO] = convert_gt_native_noninterleaved_stereo, + }; + + int convert = dsp->stereo_mode; + + if (dsp->sample_depth > NATIVE_DEPTH) + convert += CONVERT_GT_NATIVE_1ST_INDEX; + + dsp->convert_to_internal = convert_to_internal_functions[convert]; +} + static void resampler_set_delta(int frequency) { resample_data[current_codec].delta = (unsigned long) @@ -230,124 +314,118 @@ static void resampler_set_delta(int frequency) /* Linear interpolation resampling that introduces a one sample delay because * of our inability to look into the future at the end of a frame. */ - -/* TODO: we really should have a separate set of resample functions for both - mono and stereo to avoid all this internal branching and looping. */ -static int downsample(int32_t **dst, int32_t **src, int count, - struct resample_data *r) +#ifndef DSP_HAVE_ASM_RESAMPLING +static int dsp_downsample(int channels, int count, struct resample_data *r, + int32_t **src, int32_t **dst) { - long phase = r->phase; long delta = r->delta; - int32_t last_sample; - int32_t *d[2] = { dst[0], dst[1] }; - int pos = phase >> 16; - int i = 1, j; - int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2; - - for (j = 0; j < num_channels; j++) { - last_sample = r->last_sample[j]; + long phase, pos; + int32_t *d; + + /* Rolled channel loop actually showed slightly faster. */ + do + { + /* Just initialize things and not worry too much about the relatively + * uncommon case of not being able to spit out a sample for the frame. + */ + int32_t *s = src[--channels]; + int32_t last = r->last_sample[channels]; + + r->last_sample[channels] = s[count - 1]; + d = dst[channels]; + phase = r->phase; + pos = phase >> 16; + /* Do we need last sample of previous frame for interpolation? */ if (pos > 0) - last_sample = src[j][pos - 1]; + last = s[pos - 1]; - /* Be sure starting position isn't passed the available data */ - if (pos < count) - *d[j]++ = last_sample + FRACMUL((phase & 0xffff) << 15, - src[j][pos] - last_sample); - else + while (pos < count) { - /* No samples can be output here since were already passed the - end. Keep phase, save the last sample and return nothing. */ - i = 0; - goto done; + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); + phase += delta; + pos = phase >> 16; + last = s[pos - 1]; } } - - phase += delta; - - while ((pos = phase >> 16) < count) - { - for (j = 0; j < num_channels; j++) - *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15, - src[j][pos] - src[j][pos - 1]); - phase += delta; - i++; - } + while (channels > 0); /* Wrap phase accumulator back to start of next frame. */ -done: r->phase = phase - (count << 16); - r->last_sample[0] = src[0][count - 1]; - r->last_sample[1] = src[1][count - 1]; - return i; + return d - dst[0]; } -static long upsample(int32_t **dst, int32_t **src, int count, struct resample_data *r) +static int dsp_upsample(int channels, int count, struct resample_data *r, + int32_t **src, int32_t **dst) { - long phase = r->phase; long delta = r->delta; - int32_t *d[2] = { dst[0], dst[1] }; - int i = 0, j; - int pos; - int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2; - - while ((phase >> 16) == 0) - { - for (j = 0; j < num_channels; j++) - *d[j]++ = r->last_sample[j] + FRACMUL((phase & 0xffff) << 15, - src[j][0] - r->last_sample[j]); - phase += delta; - i++; - } + long phase, pos; + int32_t *d; - while ((pos = phase >> 16) < count) + /* Rolled channel loop actually showed slightly faster. */ + do { - for (j = 0; j < num_channels; j++) - *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15, - src[j][pos] - src[j][pos - 1]); - phase += delta; - i++; + /* Should always be able to output a sample for a ratio up to + RESAMPLE_BUF_COUNT / SAMPLE_BUF_COUNT. */ + int32_t *s = src[--channels]; + int32_t last = r->last_sample[channels]; + + r->last_sample[channels] = s[count - 1]; + d = dst[channels]; + phase = r->phase; + pos = phase >> 16; + + while (pos == 0) + { + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last); + phase += delta; + pos = phase >> 16; + } + + while (pos < count) + { + last = s[pos - 1]; + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); + phase += delta; + pos = phase >> 16; + } } + while (channels > 0); /* Wrap phase accumulator back to start of next frame. */ - r->phase = phase - (count << 16); - r->last_sample[0] = src[0][count - 1]; - r->last_sample[1] = src[1][count - 1]; - return i; + r->phase = phase & 0xffff; + return d - dst[0]; } +#endif /* DSP_HAVE_ASM_RESAMPLING */ /* Resample count stereo samples. Updates the src array, if resampling is * done, to refer to the resampled data. Returns number of stereo samples * for further processing. */ -static inline int resample(int32_t* src[], int count) +static inline int resample(int32_t *src[], int count) { - long new_count; + long new_count = count; if (dsp->frequency != NATIVE_FREQUENCY) { - int32_t* dst[2] = {&resample_buf[0], &resample_buf[RESAMPLE_BUF_COUNT / 2]}; + int32_t *dst[2] = + { + resample_buf, + resample_buf + RESAMPLE_BUF_COUNT/2, + }; + int channels = dsp->num_channels; if (dsp->frequency < NATIVE_FREQUENCY) - { - new_count = upsample(dst, src, count, - &resample_data[current_codec]); - } + new_count = dsp_upsample(channels, count, + &resample_data[current_codec], + src, dst); else - { - new_count = downsample(dst, src, count, - &resample_data[current_codec]); - } + new_count = dsp_downsample(channels, count, + &resample_data[current_codec], + src, dst); src[0] = dst[0]; - if (dsp->stereo_mode != STEREO_MONO) - src[1] = dst[1]; - else - src[1] = dst[0]; - } - else - { - new_count = count; + src[1] = dst[channels - 1]; } return new_count; @@ -378,8 +456,7 @@ void dsp_dither_enable(bool enable) static void dither_init(void) { - memset(&dither_data[0], 0, sizeof(struct dither_data)); - memset(&dither_data[1], 0, sizeof(struct dither_data)); + memset(dither_data, 0, sizeof(dither_data)); dsp->dither_bias = (1L << (dsp->frac_bits - NATIVE_DEPTH)); dsp->dither_mask = (1L << (dsp->frac_bits + 1 - NATIVE_DEPTH)) - 1; } @@ -592,7 +669,7 @@ void dsp_set_eq_coefs(int band) static void eq_process(int32_t **x, unsigned num) { int i; - unsigned int channels = dsp->stereo_mode != STEREO_MONO ? 2 : 1; + unsigned int channels = dsp->num_channels; unsigned shift; /* filter configuration currently is 1 low shelf filter, 3 band peaking @@ -772,7 +849,7 @@ int dsp_process(char *dst, const char *src[], int count) while (count > 0) { - samples = convert_to_internal(src, count, tmp); + samples = dsp->convert_to_internal(src, tmp, count); count -= samples; apply_gain(tmp, samples); samples = resample(tmp, samples); @@ -886,7 +963,7 @@ bool dsp_configure(int setting, intptr_t value) case DSP_SET_SAMPLE_DEPTH: dsp->sample_depth = value; - + if (dsp->sample_depth <= NATIVE_DEPTH) { dsp->frac_bits = WORD_FRACBITS; @@ -902,15 +979,19 @@ bool dsp_configure(int setting, intptr_t value) dsp->clip_min = -(1 << value); } + new_sample_conversion(); dither_init(); break; case DSP_SET_STEREO_MODE: - dsp->stereo_mode = (long) value; + dsp->stereo_mode = value; + dsp->num_channels = value == STEREO_MONO ? 1 : 2; + new_sample_conversion(); break; case DSP_RESET: dsp->stereo_mode = STEREO_NONINTERLEAVED; + dsp->num_channels = 2; dsp->clip_max = ((1 << WORD_FRACBITS) - 1); dsp->clip_min = -((1 << WORD_FRACBITS)); dsp->track_gain = 0; @@ -921,6 +1002,7 @@ bool dsp_configure(int setting, intptr_t value) dsp->sample_depth = NATIVE_DEPTH; dsp->frac_bits = WORD_FRACBITS; dsp->new_gain = true; + new_sample_conversion(); break; case DSP_FLUSH: diff --git a/apps/dsp.h b/apps/dsp.h index 8e82b6118d..b99ac213ab 100644 --- a/apps/dsp.h +++ b/apps/dsp.h @@ -24,9 +24,13 @@ #include #define NATIVE_FREQUENCY 44100 -#define STEREO_INTERLEAVED 0 -#define STEREO_NONINTERLEAVED 1 -#define STEREO_MONO 2 +enum +{ + STEREO_INTERLEAVED = 0, + STEREO_NONINTERLEAVED, + STEREO_MONO, + STEREO_NUM_MODES, +}; enum { CODEC_SET_FILEBUF_WATERMARK = 1, diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h index 04c2848a98..add76a07f8 100644 --- a/apps/dsp_asm.h +++ b/apps/dsp_asm.h @@ -27,5 +27,13 @@ void apply_crossfeed(int32_t* src[], int count); #endif +#if defined (CPU_COLDFIRE) +#define DSP_HAVE_ASM_RESAMPLING +int dsp_downsample(int channels, int count, void *resample_data, + int32_t **src, int32_t **dst); +int dsp_upsample(int channels, int count, void *resample_data, + int32_t **src, int32_t **dst); #endif +#endif /* _DSP_ASM_H */ + diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S index 719d1db1d5..233be82860 100644 --- a/apps/dsp_cf.S +++ b/apps/dsp_cf.S @@ -17,8 +17,11 @@ * ****************************************************************************/ - .section .text - .global apply_crossfeed +/**************************************************************************** + * apply_crossfeed(int32_t* src[], int count) + */ + .section .text + .global apply_crossfeed apply_crossfeed: lea.l (-44, %sp), %sp movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs @@ -67,11 +70,11 @@ apply_crossfeed: addq.l #1, %d4 | index++ moveq.l #13, %d6 cmp.l %d6, %d4 | wrap index to 0 if it overflows - jlt .nowrap + jlt .cfnowrap moveq.l #13*8, %d4 sub.l %d4, %a0 | wrap back delay line ptr as well clr.l %d4 -.nowrap: +.cfnowrap: subq.l #1, %d7 jne .cfloop | save data back to struct @@ -81,4 +84,138 @@ apply_crossfeed: movem.l (%sp), %d2-%d7/%a2-%a6 lea.l (44, %sp), %sp rts +.cfend: + .size apply_crossfeed,.cfend-apply_crossfeed +/**************************************************************************** + * dsp_downsample(int channels, int count, struct resample_data *r, + * in32_t **src, int32_t **dst) + */ + .section .text + .global dsp_downsample +dsp_downsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels + | %d3 = count + | %a0 = r + | %a1 = src + | %a2 = dst + move.l 4(%a0), %d4 | %d4 = delta = r->delta + move.l #16, %d7 | %d7 = shift +.dschannel_loop: + move.l (%a0), %d5 | %d5 = phase = r->phase + move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] + move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] + lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] + move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] + move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] + move.l %d1, (%a5) | + move.l %d5, %d6 | %d6 = pos = phase >> 16 + lsr.l %d7, %d6 | + cmp.l %d3, %d6 | past end of samples? + bge.b .dsloop_skip | yes? skip loop + tst.l %d6 | need last sample of prev. frame? + bne.b .dsloop | no? start main loop + move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] + bra.b .dsuse_last_start | start with last (last in %d0) +.dsloop: + lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] + movem.l (%a5), %d0-%d1 | +.dsuse_last_start: + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] + move.l %d0, %acc0 | %acc0 = previous sample + move.l %d5, %d0 | frac = (phase << 16) >> 1 + lsl.l %d7, %d0 | + lsr.l #1, %d0 | + mac.l %d0, %d1, %acc0 | %acc0 += frac * diff + move.l %acc0, %d0 | + add.l %d4, %d5 | phase += delta + move.l %d5, %d6 | pos = phase >> 16 + lsr.l %d7, %d6 | + move.l %d0, (%a4)+ | *d++ = %d0 + cmp.l %d3, %d6 | pos < count? + blt.b .dsloop | yes? continue resampling +.dsloop_skip: + subq.l #1, %d2 | ch > 0? + bgt.b .dschannel_loop | yes? process next channel + asl.l %d7, %d3 | wrap phase to start of next frame + sub.l %d3, %d5 | r->phase = phase - (count << 16) + move.l %d5, (%a0) | + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + asr.l #2, %d0 | convert bytes->samples + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + move.l %acc1, %acc0 | clear %acc0 + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye +.dsend: + .size dsp_downsample,.dsend-dsp_downsample + +/**************************************************************************** + * dsp_upsample(int channels, int count, struct resample_data *r, + * in32_t **src, int32_t **dst) + */ + .section .text + .global dsp_upsample +dsp_upsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels + | %d3 = count + | %a0 = r + | %a1 = src + | %a2 = dst + move.l 4(%a0), %d4 | %d4 = delta = r->delta + swap %d4 | swap delta to high word to use + | carries to increment position +.uschannel_loop: + move.l (%a0), %d5 | %d5 = phase = r->phase + move.l -4(%a1, %d2.l*4), %a3 | %a3 = s = src[ch-1] + move.l -4(%a2, %d2.l*4), %a4 | %a4 = d = dst[ch-1] + lea.l 4(%a0, %d2.l*4), %a5 | %a5 = &r->last_sample[ch-1] + move.l (%a5), %d0 | %d0 = last = r->last_sample[ch-1] + move.l -4(%a3, %d3.l*4), %d1 | r->last_sample[ch-1] = s[count-1] + move.l %d1, (%a5) | + moveq.l #16, %d1 | %d0 = shift + move.l %d5, %d6 | %d6 = pos = phase >> 16 + lsl.l %d1, %d5 | swap phase to high word to use + | carries to increment position + lsr.l %d1, %d6 | pos == 0? + bne.b .usstart_1 | no? transistion from down + move.l (%a3), %d1 | %d1 = s[0] + sub.l %d0, %d1 | diff = s[pos] - last + bra.b .usloop_0 | jump to typical start point +.usstart_1: + cmp.l %d3, %d6 | past end of samples? + bge.b .usloop_skip | yes? skip loop +.usloop_1: + lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] + movem.l (%a5), %d0-%d1 | + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] +.usloop_0: + move.l %d0, %acc0 | %acc0 = previous sample + lsr.l #1, %d5 | make phase into frac + mac.l %d1, %d5, %acc0 | %acc0 += diff * frac + move.l %acc0, %d7 | + lsl.l #1, %d5 | restore frac to phase + move.l %d7, (%a4)+ | *d++ = %d0 + add.l %d4, %d5 | phase += delta + bcc.b .usloop_0 | load next values? + addq.l #1, %d6 | increment position + cmp.l %d3, %d6 | pos < count? + blt.b .usloop_1 | yes? continue resampling +.usloop_skip: + subq.l #1, %d2 | ch > 0? + bgt.b .uschannel_loop | yes? process next channel + swap %d5 | wrap phase to start of next frame + move.l %d5, (%a0) | ...and save in r->phase + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + asr.l #2, %d0 | convert bytes->samples + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + move.l %acc1, %acc0 | clear %acc0 + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye +.usend: + .size dsp_upsample,.usend-dsp_upsample -- cgit v1.2.3