SWCODEC: DSP optimizations for conversion to internal format and resampling. Assembly resampling for Coldfire. Word has it ARM will get that soon.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12399 a1c6a512-1295-4272-9138-f99709370657
author: Michael Sevakis <jethead71@rockbox.org> 2007-02-19 02:49:26 +0000
committer: Michael Sevakis <jethead71@rockbox.org> 2007-02-19 02:49:26 +0000
commit: 36175ac9453999d2d079c521126ecc5ac7a8d984 (patch)
tree: a37e87b5fd7283d1456b7a346e16c1a5ed590a2c
parent: 2801a87d543f38cadd076330f329c84e23852997 (diff)
download: rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.gz
rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.zip
4 files changed, 391 insertions, 160 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index c7eed8bd76..c062f2c088 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -46,6 +46,18 @@
 #define RESAMPLE_BUF_COUNT  (256 * 4)   /* Enough for 11,025 Hz -> 44,100 Hz*/
 #define DEFAULT_GAIN        0x01000000
+enum
+{
+    CONVERT_LE_NATIVE_I_STEREO  = STEREO_INTERLEAVED,
+    CONVERT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED,
+    CONVERT_LE_NATIVE_MONO      = STEREO_MONO,
+    CONVERT_GT_NATIVE_I_STEREO  = STEREO_INTERLEAVED + STEREO_NUM_MODES,
+    CONVERT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES,
+    CONVERT_GT_NATIVE_MONO      = STEREO_MONO + STEREO_NUM_MODES,
+    CONVERT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES
+};
 struct dsp_config
 {
    long codec_frequency; /* Sample rate of data coming from the codec */
@@ -60,6 +72,7 @@ struct dsp_config
    int sample_depth;
    int sample_bytes;
    int stereo_mode;
+    int num_channels;
    int frac_bits;
    bool dither_enabled;
    long dither_bias;
@@ -69,11 +82,13 @@ struct dsp_config
    bool eq_enabled;
    long eq_precut;
    long gain;          /* Note that this is in S8.23 format. */
+    int (*convert_to_internal)(const char* src[], int32_t* dst[], int count);
 };
 struct resample_data
 {
-    long phase, delta;
+    long phase;
+    long delta;
    int32_t last_sample[2];
 };
@@ -139,88 +154,157 @@ void sound_set_pitch(int permille)
 * consume. Note that for mono, dst[0] equals dst[1], as there is no point
 * in processing the same data twice.
 */
-static int convert_to_internal(const char* src[], int count, int32_t* dst[])
+/* convert count 16-bit mono to 32-bit mono */
+static int convert_lte_native_mono(
+    const char *src[], int32_t *dst[], int count)
 {
-    count = MIN(SAMPLE_BUF_COUNT / 2, count);
+    count = MIN(SAMPLE_BUF_COUNT/2, count);
-    if ((dsp->sample_depth <= NATIVE_DEPTH)
+    const short *s = (short*) src[0];
-        || (dsp->stereo_mode == STEREO_INTERLEAVED))
+    const short * const send = s + count;
-    {
+    int32_t *d = dst[0] = dst[1] = sample_buf;
-        dst[0] = &sample_buf[0];
+    const int scale = WORD_SHIFT;
-        dst[1] = (dsp->stereo_mode == STEREO_MONO)
-            ? dst[0] : &sample_buf[SAMPLE_BUF_COUNT / 2];
+    do
-    }
-    else
    {
-        dst[0] = (int32_t*) src[0];
+        *d++ = *s++ << scale;
-        dst[1] = (int32_t*) ((dsp->stereo_mode == STEREO_MONO) ? src[0] : src[1]);
    }
+    while (s < send);
-    if (dsp->sample_depth <= NATIVE_DEPTH)
+    src[0] = (char *)s;
-    {
-        short* s0 = (short*) src[0];
-        int32_t* d0 = dst[0];
-        int32_t* d1 = dst[1];
-        int scale = WORD_SHIFT;
-        int i;
-        if (dsp->stereo_mode == STEREO_INTERLEAVED)
+    return count;
-        {
+}
-            for (i = 0; i < count; i++)
-            {
-                *d0++ = *s0++ << scale;
-                *d1++ = *s0++ << scale;
-            }
-        }
-        else if (dsp->stereo_mode == STEREO_NONINTERLEAVED)
-        {
-            short* s1 = (short*) src[1];
-            for (i = 0; i < count; i++)
+/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
-            {
+static int convert_lte_native_interleaved_stereo(
-                *d0++ = *s0++ << scale;
+    const char *src[], int32_t *dst[], int count)
-                *d1++ = *s1++ << scale;
+{
-            }
+    count = MIN(SAMPLE_BUF_COUNT/2, count);
-        }
-        else
-        {
-            for (i = 0; i < count; i++)
-            {
-                *d0++ = *s0++ << scale;
-            }
-        }
-    }
-    else if (dsp->stereo_mode == STEREO_INTERLEAVED)
-    {
-        int32_t* s0 = (int32_t*) src[0];
-        int32_t* d0 = dst[0];
-        int32_t* d1 = dst[1];
-        int i;
-        for (i = 0; i < count; i++)
+    const int32_t *s = (int32_t *) src[0];
-        {
+    const int32_t * const send = s + count;
-            *d0++ = *s0++;
+    int32_t *dl = dst[0] = sample_buf;
-            *d1++ = *s0++;
+    int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
-        }
+    const int scale = WORD_SHIFT;
-    }
-    if (dsp->stereo_mode == STEREO_NONINTERLEAVED)
+    do
    {
-        src[0] += count * dsp->sample_bytes;
+        short slr = *s++;
-        src[1] += count * dsp->sample_bytes;
+#ifdef ROCKBOX_LITTLE_ENDIAN
+        *dl++ = (slr >> 16) << scale;
+        *dr++ = (int32_t)(short)slr << scale;
+#else  /* ROCKBOX_BIG_ENDIAN */
+        *dl++ = (int32_t)(short)slr << scale;
+        *dr++ = (slr >> 16) << scale;
+#endif
    }
-    else if (dsp->stereo_mode == STEREO_INTERLEAVED)
+    while (s < send);
+    src[0] = (char *)s;
+    return count;
+}
+/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
+static int convert_lte_native_noninterleaved_stereo(
+    const char *src[], int32_t *dst[], int count)
+{
+    const short *sl = (short *) src[0];
+    const short *sr = (short *) src[1];
+    const short * const slend = sl + count;
+    int32_t *dl = dst[0] = sample_buf;
+    int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2;
+    const int scale = WORD_SHIFT;
+    do
    {
-        src[0] += count * dsp->sample_bytes * 2;
+        *dl++ = *sl++ << scale;
+        *dr++ = *sr++ << scale;
    }
-    else
+    while (sl < slend);
+    src[0] = (char *)sl;
+    src[1] = (char *)sr;
+    return count;
+}
+/* convert count 32-bit mono to 32-bit mono */
+static int convert_gt_native_mono(
+    const char *src[], int32_t *dst[], int count)
+{
+    count = MIN(SAMPLE_BUF_COUNT/2, count);
+    dst[0] = dst[1] = (int32_t *)src[0];
+    src[0] = (char *)(dst[0] + count);
+    return count;
+}
+/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
+static int convert_gt_native_interleaved_stereo(
+    const char *src[], int32_t *dst[], int count)
+{
+    count = MIN(SAMPLE_BUF_COUNT/2, count);
+    const int32_t *s = (int32_t *)src[0];
+    const int32_t * const send = s + 2*count;
+    int32_t *dl = sample_buf;
+    int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2;
+    dst[0] = dl;
+    dst[1] = dr;
+    do
    {
-        src[0] += count * dsp->sample_bytes;
+        *dl++ = *s++;
+        *dr++ = *s++;
    }
+    while (s < send);
+    src[0] = (char *)send;
+    return count;
+}
+/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
+static int convert_gt_native_noninterleaved_stereo(
+    const char *src[], int32_t *dst[], int count)
+{
+    count = MIN(SAMPLE_BUF_COUNT/2, count);
+    dst[0] = (int32_t *)src[0];
+    dst[1] = (int32_t *)src[1];
+    src[0] = (char *)(dst[0] + count);
+    src[1] = (char *)(dst[1] + count);
    return count;
 }
+/* set the to-native sample conversion function based on dsp sample parameters */
+static void new_sample_conversion(void)
+{
+    static int (*convert_to_internal_functions[])(
+        const char* src[], int32_t *dst[], int count) =
+    {
+        [CONVERT_LE_NATIVE_MONO]      = convert_lte_native_mono,
+        [CONVERT_LE_NATIVE_I_STEREO]  = convert_lte_native_interleaved_stereo,
+        [CONVERT_LE_NATIVE_NI_STEREO] = convert_lte_native_noninterleaved_stereo,
+        [CONVERT_GT_NATIVE_MONO]      = convert_gt_native_mono,
+        [CONVERT_GT_NATIVE_I_STEREO]  = convert_gt_native_interleaved_stereo,
+        [CONVERT_GT_NATIVE_NI_STEREO] = convert_gt_native_noninterleaved_stereo,
+    };
+    int convert = dsp->stereo_mode;
+    if (dsp->sample_depth > NATIVE_DEPTH)
+        convert += CONVERT_GT_NATIVE_1ST_INDEX;
+    dsp->convert_to_internal = convert_to_internal_functions[convert];
+}
 static void resampler_set_delta(int frequency)
 {
    resample_data[current_codec].delta = (unsigned long) 
@@ -230,124 +314,118 @@ static void resampler_set_delta(int frequency)
 /* Linear interpolation resampling that introduces a one sample delay because
 * of our inability to look into the future at the end of a frame.
 */
+#ifndef DSP_HAVE_ASM_RESAMPLING
-/* TODO: we really should have a separate set of resample functions for both
+static int dsp_downsample(int channels, int count, struct resample_data *r,
-   mono and stereo to avoid all this internal branching and looping. */
+                          int32_t **src, int32_t **dst)
-static int downsample(int32_t **dst, int32_t **src, int count,
-    struct resample_data *r)
 {
-    long phase = r->phase;
    long delta = r->delta;
-    int32_t last_sample;
+    long phase, pos;
-    int32_t *d[2] = { dst[0], dst[1] };
+    int32_t *d;
-    int pos = phase >> 16;
-    int i = 1, j;
+    /* Rolled channel loop actually showed slightly faster. */
-    int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2;
+    do
-    
+    {
-    for (j = 0; j < num_channels; j++) {
+        /* Just initialize things and not worry too much about the relatively
-        last_sample = r->last_sample[j];
+         * uncommon case of not being able to spit out a sample for the frame.
+         */
+        int32_t *s = src[--channels];
+        int32_t last = r->last_sample[channels];
+        
+        r->last_sample[channels] = s[count - 1];
+        d = dst[channels];
+        phase = r->phase;
+        pos = phase >> 16;
        /* Do we need last sample of previous frame for interpolation? */
        if (pos > 0)
-            last_sample = src[j][pos - 1];
+            last = s[pos - 1];
-        /* Be sure starting position isn't passed the available data */
+        while (pos < count)
-        if (pos < count)
-            *d[j]++ = last_sample + FRACMUL((phase & 0xffff) << 15,
-                src[j][pos] - last_sample);
-        else
        {
-            /* No samples can be output here since were already passed the
+            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
-               end. Keep phase, save the last sample and return nothing. */
+            phase += delta;
-            i = 0;
+            pos = phase >> 16;
-            goto done;
+            last = s[pos - 1];
        }
    }
+    while (channels > 0);
-    phase += delta;
- 
-    while ((pos = phase >> 16) < count)
-    {
-        for (j = 0; j < num_channels; j++)
-            *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15,
-                src[j][pos] - src[j][pos - 1]);
-         phase += delta;
-         i++;
-    }
    /* Wrap phase accumulator back to start of next frame. */
-done:
    r->phase = phase - (count << 16);
-    r->last_sample[0] = src[0][count - 1];
+    return d - dst[0];
-    r->last_sample[1] = src[1][count - 1];
-    return i;
 }
-static long upsample(int32_t **dst, int32_t **src, int count, struct resample_data *r)
+static int dsp_upsample(int channels, int count, struct resample_data *r,
+                        int32_t **src, int32_t **dst)
 {
-    long phase = r->phase;
    long delta = r->delta;
-    int32_t *d[2] = { dst[0], dst[1] };
+    long phase, pos;
-    int i = 0, j;
+    int32_t *d;
-    int pos;
-    int num_channels = dsp->stereo_mode == STEREO_MONO ? 1 : 2;
-   
-    while ((phase >> 16) == 0)
-    {
-       for (j = 0; j < num_channels; j++)
-           *d[j]++ = r->last_sample[j] + FRACMUL((phase & 0xffff) << 15,
-                src[j][0] - r->last_sample[j]);
-        phase += delta;
-        i++;
-    }
-    while ((pos = phase >> 16) < count)
+    /* Rolled channel loop actually showed slightly faster. */
+    do
    {
-        for (j = 0; j < num_channels; j++)
+        /* Should always be able to output a sample for a ratio up to
-            *d[j]++ = src[j][pos - 1] + FRACMUL((phase & 0xffff) << 15,
+           RESAMPLE_BUF_COUNT / SAMPLE_BUF_COUNT. */
-                src[j][pos] - src[j][pos - 1]);
+        int32_t *s = src[--channels];
-        phase += delta;
+        int32_t last = r->last_sample[channels];
-        i++;
+        
+        r->last_sample[channels] = s[count - 1];
+        d = dst[channels];
+        phase = r->phase;
+        pos = phase >> 16;
+        while (pos == 0)
+        {
+            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last);
+            phase += delta;
+            pos = phase >> 16;
+        }
+        while (pos < count)
+        {
+            last = s[pos - 1];
+            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
+            phase += delta;
+            pos = phase >> 16;
+        }
    }
+    while (channels > 0);
    /* Wrap phase accumulator back to start of next frame. */
-    r->phase = phase - (count << 16);
+    r->phase = phase & 0xffff;
-    r->last_sample[0] = src[0][count - 1];
+    return d - dst[0];
-    r->last_sample[1] = src[1][count - 1];
-    return i;
 }
+#endif /* DSP_HAVE_ASM_RESAMPLING */
 /* Resample count stereo samples. Updates the src array, if resampling is
 * done, to refer to the resampled data. Returns number of stereo samples
 * for further processing.
 */
-static inline int resample(int32_t* src[], int count)
+static inline int resample(int32_t *src[], int count)
 {
-    long new_count;
+    long new_count = count;
    if (dsp->frequency != NATIVE_FREQUENCY)
    {
-        int32_t* dst[2] = {&resample_buf[0], &resample_buf[RESAMPLE_BUF_COUNT / 2]};
+        int32_t *dst[2] =
+        {
+            resample_buf,
+            resample_buf + RESAMPLE_BUF_COUNT/2,
+        };
+        int channels = dsp->num_channels;
        if (dsp->frequency < NATIVE_FREQUENCY)
-        {
+            new_count = dsp_upsample(channels, count,
-            new_count = upsample(dst, src, count, 
+                                     &resample_data[current_codec],
-                            &resample_data[current_codec]);
+                                     src, dst);
-        }
        else
-        {
+            new_count = dsp_downsample(channels, count,
-            new_count = downsample(dst, src, count,
+                                       &resample_data[current_codec],
-                            &resample_data[current_codec]);
+                                       src, dst);
-        }
        src[0] = dst[0];
-        if (dsp->stereo_mode != STEREO_MONO)
+        src[1] = dst[channels - 1];
-            src[1] = dst[1];
-        else
-            src[1] = dst[0];
-    }
-    else
-    {
-        new_count = count;
    }
    return new_count;
@@ -378,8 +456,7 @@ void dsp_dither_enable(bool enable)
 static void dither_init(void)
 {
-    memset(&dither_data[0], 0, sizeof(struct dither_data));
+    memset(dither_data, 0, sizeof(dither_data));
-    memset(&dither_data[1], 0, sizeof(struct dither_data));
    dsp->dither_bias = (1L << (dsp->frac_bits - NATIVE_DEPTH));
    dsp->dither_mask = (1L << (dsp->frac_bits + 1 - NATIVE_DEPTH)) - 1;
 }
@@ -592,7 +669,7 @@ void dsp_set_eq_coefs(int band)
 static void eq_process(int32_t **x, unsigned num)
 {
    int i;
-    unsigned int channels = dsp->stereo_mode != STEREO_MONO ? 2 : 1;
+    unsigned int channels = dsp->num_channels;
    unsigned shift;
    /* filter configuration currently is 1 low shelf filter, 3 band peaking
@@ -772,7 +849,7 @@ int dsp_process(char *dst, const char *src[], int count)
    while (count > 0)
    {
-        samples = convert_to_internal(src, count, tmp);
+        samples = dsp->convert_to_internal(src, tmp, count);
        count -= samples;
        apply_gain(tmp, samples);
        samples = resample(tmp, samples);
@@ -886,7 +963,7 @@ bool dsp_configure(int setting, intptr_t value)
    case DSP_SET_SAMPLE_DEPTH:
        dsp->sample_depth = value;
- 
        if (dsp->sample_depth <= NATIVE_DEPTH)
        {
            dsp->frac_bits = WORD_FRACBITS;
@@ -902,15 +979,19 @@ bool dsp_configure(int setting, intptr_t value)
            dsp->clip_min = -(1 << value);
        }
+        new_sample_conversion();
        dither_init(); 
        break;
    case DSP_SET_STEREO_MODE:
-        dsp->stereo_mode = (long) value;
+        dsp->stereo_mode = value;
+        dsp->num_channels = value == STEREO_MONO ? 1 : 2;
+        new_sample_conversion();
        break;
    case DSP_RESET:
        dsp->stereo_mode = STEREO_NONINTERLEAVED;
+        dsp->num_channels = 2;
        dsp->clip_max =  ((1 << WORD_FRACBITS) - 1);
        dsp->clip_min = -((1 << WORD_FRACBITS));
        dsp->track_gain = 0;
@@ -921,6 +1002,7 @@ bool dsp_configure(int setting, intptr_t value)
        dsp->sample_depth = NATIVE_DEPTH;
        dsp->frac_bits = WORD_FRACBITS;
        dsp->new_gain = true;
+        new_sample_conversion();
        break;
    case DSP_FLUSH:
diff --git a/apps/dsp.h b/apps/dsp.h
index 8e82b6118d..b99ac213ab 100644
--- a/apps/dsp.h
+++ b/apps/dsp.h
@@ -24,9 +24,13 @@
 #include <stdbool.h>
 #define NATIVE_FREQUENCY       44100
-#define STEREO_INTERLEAVED     0
+enum
-#define STEREO_NONINTERLEAVED  1
+{
-#define STEREO_MONO            2
+    STEREO_INTERLEAVED = 0,
+    STEREO_NONINTERLEAVED,
+    STEREO_MONO,
+    STEREO_NUM_MODES,
+};
 enum {
    CODEC_SET_FILEBUF_WATERMARK = 1,
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index 04c2848a98..add76a07f8 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -27,5 +27,13 @@
 void apply_crossfeed(int32_t* src[], int count);
 #endif
+#if defined (CPU_COLDFIRE)
+#define DSP_HAVE_ASM_RESAMPLING
+int dsp_downsample(int channels, int count, void *resample_data,
+                   int32_t **src, int32_t **dst);
+int dsp_upsample(int channels, int count, void *resample_data,
+                 int32_t **src, int32_t **dst);
 #endif
+#endif /* _DSP_ASM_H */
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index 719d1db1d5..233be82860 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -17,8 +17,11 @@
 *
 ****************************************************************************/
-    .section .text
+/****************************************************************************
-    .global apply_crossfeed 
+ * apply_crossfeed(int32_t* src[], int count)
+ */
+    .section    .text
+    .global     apply_crossfeed 
 apply_crossfeed:
    lea.l (-44, %sp), %sp
    movem.l %d2-%d7/%a2-%a6, (%sp)      | save all regs
@@ -67,11 +70,11 @@ apply_crossfeed:
    addq.l #1, %d4                      | index++
    moveq.l #13, %d6
    cmp.l %d6, %d4                      | wrap index to 0 if it overflows
-    jlt .nowrap
+    jlt .cfnowrap
    moveq.l #13*8, %d4
    sub.l %d4, %a0                      | wrap back delay line ptr as well
    clr.l %d4
-.nowrap:
+.cfnowrap:
    subq.l #1, %d7
    jne .cfloop
    | save data back to struct
@@ -81,4 +84,138 @@ apply_crossfeed:
    movem.l (%sp), %d2-%d7/%a2-%a6
    lea.l (44, %sp), %sp
    rts
+.cfend:
+    .size       apply_crossfeed,.cfend-apply_crossfeed
+/****************************************************************************
+ * dsp_downsample(int channels, int count, struct resample_data *r,
+ *                in32_t **src, int32_t **dst)
+ */
+    .section    .text
+    .global     dsp_downsample
+dsp_downsample:
+        lea.l           -40(%sp), %sp           | save non-clobberables
+        movem.l     %d2-%d7/%a2-%a5, (%sp)  |
+        movem.l         44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
+                                        | %d3 = count
+                                        | %a0 = r
+                                                | %a1 = src
+                                        | %a2 = dst
+        move.l          4(%a0), %d4                             | %d4 = delta = r->delta
+        move.l          #16, %d7                | %d7 = shift
+.dschannel_loop:
+        move.l          (%a0), %d5                              | %d5 = phase = r->phase
+        move.l      -4(%a1, %d2.l*4), %a3       | %a3 = s = src[ch-1]
+        move.l      -4(%a2, %d2.l*4), %a4       | %a4 = d = dst[ch-1]
+        lea.l       4(%a0, %d2.l*4), %a5    | %a5 = &r->last_sample[ch-1]
+        move.l          (%a5), %d0                              | %d0 = last = r->last_sample[ch-1]
+        move.l      -4(%a3, %d3.l*4), %d1   | r->last_sample[ch-1] = s[count-1]
+        move.l      %d1, (%a5)                          |
+        move.l      %d5, %d6                            | %d6 = pos = phase >> 16
+        lsr.l       %d7, %d6                |
+        cmp.l       %d3, %d6                            | past end of samples?
+        bge.b       .dsloop_skip                        | yes? skip loop
+        tst.l       %d6                     | need last sample of prev. frame?
+        bne.b       .dsloop                             | no? start main loop
+        move.l      (%a3, %d6.l*4), %d1         | %d1 = s[pos]
+        bra.b       .dsuse_last_start           | start with last (last in %d0)
+.dsloop:
+        lea.l       -4(%a3, %d6.l*4), %a5   | load s[pos-1] and s[pos]
+        movem.l     (%a5), %d0-%d1          |
+.dsuse_last_start:
+        sub.l       %d0, %d1                | %d1 = diff = s[pos] - s[pos-1]
+        move.l      %d0, %acc0              | %acc0 = previous sample
+        move.l      %d5, %d0                | frac = (phase << 16) >> 1
+        lsl.l       %d7, %d0                |
+        lsr.l       #1, %d0                 |
+        mac.l       %d0, %d1, %acc0         | %acc0 += frac * diff
+        move.l      %acc0, %d0              |
+        add.l       %d4, %d5                | phase += delta
+        move.l      %d5, %d6                | pos = phase >> 16
+        lsr.l       %d7, %d6                |
+        move.l      %d0, (%a4)+             | *d++ = %d0
+        cmp.l       %d3, %d6                | pos < count?
+        blt.b       .dsloop                 | yes? continue resampling
+.dsloop_skip:
+        subq.l      #1, %d2                                     | ch > 0?
+        bgt.b           .dschannel_loop                 | yes? process next channel
+        asl.l       %d7, %d3                            | wrap phase to start of next frame
+        sub.l       %d3, %d5                | r->phase = phase - (count << 16)
+        move.l      %d5, (%a0)                          |
+        move.l      %a4, %d0                            | return d - d[0]
+        sub.l       (%a2), %d0                          |
+        asr.l       #2, %d0                 | convert bytes->samples
+        movem.l     (%sp), %d2-%d7/%a2-%a5  | restore non-clobberables
+        move.l          %acc1, %acc0                    | clear %acc0
+        lea.l       40(%sp), %sp            | cleanup stack
+        rts                                 | buh-bye
+.dsend:
+    .size       dsp_downsample,.dsend-dsp_downsample
+/****************************************************************************
+ * dsp_upsample(int channels, int count, struct resample_data *r,
+ *              in32_t **src, int32_t **dst)
+ */
+    .section    .text
+    .global     dsp_upsample
+dsp_upsample:
+        lea.l       -40(%sp), %sp                       | save non-clobberables
+        movem.l     %d2-%d7/%a2-%a5, (%sp)  |
+        movem.l         44(%sp), %d2-%d3/%a0-%a2| %d2 = ch = channels
+                                        | %d3 = count
+                                        | %a0 = r
+                                                | %a1 = src
+                                        | %a2 = dst
+        move.l          4(%a0), %d4                             | %d4 = delta = r->delta
+        swap        %d4                     | swap delta to high word to use
+                                        | carries to increment position
+.uschannel_loop:
+        move.l          (%a0), %d5                              | %d5 = phase = r->phase
+        move.l      -4(%a1, %d2.l*4), %a3       | %a3 = s = src[ch-1]
+        move.l      -4(%a2, %d2.l*4), %a4       | %a4 = d = dst[ch-1]
+        lea.l       4(%a0, %d2.l*4), %a5    | %a5 = &r->last_sample[ch-1]
+        move.l          (%a5), %d0                              | %d0 = last = r->last_sample[ch-1]
+        move.l      -4(%a3, %d3.l*4), %d1   | r->last_sample[ch-1] = s[count-1]
+        move.l      %d1, (%a5)                          |
+        moveq.l         #16, %d1                | %d0 = shift
+        move.l      %d5, %d6                            | %d6 = pos = phase >> 16
+        lsl.l       %d1, %d5                | swap phase to high word to use
+                                        | carries to increment position
+        lsr.l       %d1, %d6                | pos == 0?
+        bne.b       .usstart_1                  | no? transistion from down
+        move.l      (%a3), %d1                          | %d1 = s[0]
+        sub.l       %d0, %d1                | diff = s[pos] - last
+        bra.b       .usloop_0               | jump to typical start point
+.usstart_1:
+        cmp.l       %d3, %d6                | past end of samples?
+        bge.b       .usloop_skip            | yes? skip loop
+.usloop_1:
+        lea.l       -4(%a3, %d6.l*4), %a5   | load s[pos-1] and s[pos]
+        movem.l     (%a5), %d0-%d1          |
+        sub.l       %d0, %d1                | %d1 = diff = s[pos] - s[pos-1]
+.usloop_0:
+        move.l      %d0, %acc0                          | %acc0 = previous sample
+        lsr.l       #1, %d5                 | make phase into frac
+        mac.l       %d1, %d5, %acc0         | %acc0 += diff * frac
+        move.l          %acc0, %d7              |
+        lsl.l       #1, %d5                 | restore frac to phase
+        move.l      %d7, (%a4)+                         | *d++ = %d0
+        add.l       %d4, %d5                | phase += delta
+        bcc.b       .usloop_0                   | load next values?
+        addq.l      #1, %d6                 | increment position
+        cmp.l       %d3, %d6                            | pos < count?
+        blt.b       .usloop_1                   | yes? continue resampling
+.usloop_skip:
+        subq.l      #1, %d2                                     | ch > 0?
+        bgt.b           .uschannel_loop                 | yes? process next channel
+        swap        %d5                                         | wrap phase to start of next frame
+        move.l      %d5, (%a0)                          | ...and save in r->phase
+        move.l      %a4, %d0                            | return d - d[0]
+        sub.l       (%a2), %d0                          |
+        asr.l       #2, %d0                 | convert bytes->samples
+        movem.l     (%sp), %d2-%d7/%a2-%a5  | restore non-clobberables
+        move.l          %acc1, %acc0                    | clear %acc0
+        lea.l       40(%sp), %sp            | cleanup stack
+        rts                                 | buh-bye
+.usend:
+    .size       dsp_upsample,.usend-dsp_upsample
author	Michael Sevakis <jethead71@rockbox.org>	2007-02-19 02:49:26 +0000
committer	Michael Sevakis <jethead71@rockbox.org>	2007-02-19 02:49:26 +0000
commit	36175ac9453999d2d079c521126ecc5ac7a8d984 (patch)
tree	a37e87b5fd7283d1456b7a346e16c1a5ed590a2c
parent	2801a87d543f38cadd076330f329c84e23852997 (diff)
download	rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.tar.gz rockbox-36175ac9453999d2d079c521126ecc5ac7a8d984.zip