From b5716df4cb2837bbbc42195cf1aefcf03e21d6a6 Mon Sep 17 00:00:00 2001 From: Sean Bartell Date: Fri, 24 Jun 2011 01:25:21 -0400 Subject: Build librbcodec with DSP and metadata. All associated files are moved to /lib/rbcodec. Change-Id: I572ddd2b8a996aae1e98c081d06b1ed356dce222 --- lib/rbcodec/dsp/compressor.c | 363 +++++++++ lib/rbcodec/dsp/compressor.h | 29 + lib/rbcodec/dsp/dsp.c | 1573 ++++++++++++++++++++++++++++++++++++ lib/rbcodec/dsp/dsp.h | 125 +++ lib/rbcodec/dsp/dsp_arm.S | 561 +++++++++++++ lib/rbcodec/dsp/dsp_arm_v6.S | 127 +++ lib/rbcodec/dsp/dsp_asm.h | 86 ++ lib/rbcodec/dsp/dsp_cf.S | 611 ++++++++++++++ lib/rbcodec/dsp/eq.c | 268 ++++++ lib/rbcodec/dsp/eq.h | 50 ++ lib/rbcodec/dsp/eq_arm.S | 89 ++ lib/rbcodec/dsp/eq_cf.S | 91 +++ lib/rbcodec/dsp/eqs/Acoustic.cfg | 17 + lib/rbcodec/dsp/eqs/Bass.cfg | 17 + lib/rbcodec/dsp/eqs/Classical.cfg | 17 + lib/rbcodec/dsp/eqs/Default.cfg | 17 + lib/rbcodec/dsp/eqs/Disco.cfg | 17 + lib/rbcodec/dsp/eqs/Electronic.cfg | 17 + lib/rbcodec/dsp/eqs/Hip-Hop.cfg | 17 + lib/rbcodec/dsp/eqs/Jazz.cfg | 17 + lib/rbcodec/dsp/eqs/Lounge.cfg | 17 + lib/rbcodec/dsp/eqs/Pop.cfg | 17 + lib/rbcodec/dsp/eqs/R&B.cfg | 17 + lib/rbcodec/dsp/eqs/Rock.cfg | 17 + lib/rbcodec/dsp/eqs/Vocal.cfg | 17 + lib/rbcodec/dsp/tdspeed.c | 450 +++++++++++ lib/rbcodec/dsp/tdspeed.h | 49 ++ 27 files changed, 4693 insertions(+) create mode 100644 lib/rbcodec/dsp/compressor.c create mode 100644 lib/rbcodec/dsp/compressor.h create mode 100644 lib/rbcodec/dsp/dsp.c create mode 100644 lib/rbcodec/dsp/dsp.h create mode 100644 lib/rbcodec/dsp/dsp_arm.S create mode 100644 lib/rbcodec/dsp/dsp_arm_v6.S create mode 100644 lib/rbcodec/dsp/dsp_asm.h create mode 100644 lib/rbcodec/dsp/dsp_cf.S create mode 100644 lib/rbcodec/dsp/eq.c create mode 100644 lib/rbcodec/dsp/eq.h create mode 100644 lib/rbcodec/dsp/eq_arm.S create mode 100644 lib/rbcodec/dsp/eq_cf.S create mode 100644 lib/rbcodec/dsp/eqs/Acoustic.cfg create mode 100644 lib/rbcodec/dsp/eqs/Bass.cfg create mode 100644 lib/rbcodec/dsp/eqs/Classical.cfg create mode 100644 lib/rbcodec/dsp/eqs/Default.cfg create mode 100644 lib/rbcodec/dsp/eqs/Disco.cfg create mode 100644 lib/rbcodec/dsp/eqs/Electronic.cfg create mode 100644 lib/rbcodec/dsp/eqs/Hip-Hop.cfg create mode 100644 lib/rbcodec/dsp/eqs/Jazz.cfg create mode 100644 lib/rbcodec/dsp/eqs/Lounge.cfg create mode 100644 lib/rbcodec/dsp/eqs/Pop.cfg create mode 100644 lib/rbcodec/dsp/eqs/R&B.cfg create mode 100644 lib/rbcodec/dsp/eqs/Rock.cfg create mode 100644 lib/rbcodec/dsp/eqs/Vocal.cfg create mode 100644 lib/rbcodec/dsp/tdspeed.c create mode 100644 lib/rbcodec/dsp/tdspeed.h (limited to 'lib/rbcodec/dsp') diff --git a/lib/rbcodec/dsp/compressor.c b/lib/rbcodec/dsp/compressor.c new file mode 100644 index 0000000000..3a8d52e4da --- /dev/null +++ b/lib/rbcodec/dsp/compressor.c @@ -0,0 +1,363 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2009 Jeffrey Goode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" +#include "fixedpoint.h" +#include "fracmul.h" +#include "settings.h" +#include "dsp.h" +#include "compressor.h" + +/* Define LOGF_ENABLE to enable logf output in this file */ +/*#define LOGF_ENABLE*/ +#include "logf.h" + +static int32_t comp_rel_slope IBSS_ATTR; /* S7.24 format */ +static int32_t comp_makeup_gain IBSS_ATTR; /* S7.24 format */ +static int32_t comp_curve[66] IBSS_ATTR; /* S7.24 format */ +static int32_t release_gain IBSS_ATTR; /* S7.24 format */ + +#define UNITY (1L << 24) /* unity gain in S7.24 format */ + +/** COMPRESSOR UPDATE + * Called via the menu system to configure the compressor process */ +bool compressor_update(void) +{ + static int curr_set[5]; + int new_set[5] = { + global_settings.compressor_threshold, + global_settings.compressor_makeup_gain, + global_settings.compressor_ratio, + global_settings.compressor_knee, + global_settings.compressor_release_time}; + + /* make menu values useful */ + int threshold = new_set[0]; + bool auto_gain = (new_set[1] == 1); + const int comp_ratios[] = {2, 4, 6, 10, 0}; + int ratio = comp_ratios[new_set[2]]; + bool soft_knee = (new_set[3] == 1); + int release = new_set[4] * NATIVE_FREQUENCY / 1000; + + bool changed = false; + bool active = (threshold < 0); + + for (int i = 0; i < 5; i++) + { + if (curr_set[i] != new_set[i]) + { + changed = true; + curr_set[i] = new_set[i]; + +#if defined(ROCKBOX_HAS_LOGF) && defined(LOGF_ENABLE) + switch (i) + { + case 0: + logf(" Compressor Threshold: %d dB\tEnabled: %s", + threshold, active ? "Yes" : "No"); + break; + case 1: + logf(" Compressor Makeup Gain: %s", + auto_gain ? "Auto" : "Off"); + break; + case 2: + if (ratio) + { logf(" Compressor Ratio: %d:1", ratio); } + else + { logf(" Compressor Ratio: Limit"); } + break; + case 3: + logf(" Compressor Knee: %s", soft_knee?"Soft":"Hard"); + break; + case 4: + logf(" Compressor Release: %d", release); + break; + } +#endif + } + } + + if (changed && active) + { + /* configure variables for compressor operation */ + static const int32_t db[] = { + /* positive db equivalents in S15.16 format */ + 0x000000, 0x241FA4, 0x1E1A5E, 0x1A94C8, + 0x181518, 0x1624EA, 0x148F82, 0x1338BD, + 0x120FD2, 0x1109EB, 0x101FA4, 0x0F4BB6, + 0x0E8A3C, 0x0DD840, 0x0D3377, 0x0C9A0E, + 0x0C0A8C, 0x0B83BE, 0x0B04A5, 0x0A8C6C, + 0x0A1A5E, 0x09ADE1, 0x094670, 0x08E398, + 0x0884F6, 0x082A30, 0x07D2FA, 0x077F0F, + 0x072E31, 0x06E02A, 0x0694C8, 0x064BDF, + 0x060546, 0x05C0DA, 0x057E78, 0x053E03, + 0x04FF5F, 0x04C273, 0x048726, 0x044D64, + 0x041518, 0x03DE30, 0x03A89B, 0x037448, + 0x03412A, 0x030F32, 0x02DE52, 0x02AE80, + 0x027FB0, 0x0251D6, 0x0224EA, 0x01F8E2, + 0x01CDB4, 0x01A359, 0x0179C9, 0x0150FC, + 0x0128EB, 0x010190, 0x00DAE4, 0x00B4E1, + 0x008F82, 0x006AC1, 0x004699, 0x002305}; + + struct curve_point + { + int32_t db; /* S15.16 format */ + int32_t offset; /* S15.16 format */ + } db_curve[5]; + + /** Set up the shape of the compression curve first as decibel + values */ + /* db_curve[0] = bottom of knee + [1] = threshold + [2] = top of knee + [3] = 0 db input + [4] = ~+12db input (2 bits clipping overhead) */ + + db_curve[1].db = threshold << 16; + if (soft_knee) + { + /* bottom of knee is 3dB below the threshold for soft knee*/ + db_curve[0].db = db_curve[1].db - (3 << 16); + /* top of knee is 3dB above the threshold for soft knee */ + db_curve[2].db = db_curve[1].db + (3 << 16); + if (ratio) + /* offset = -3db * (ratio - 1) / ratio */ + db_curve[2].offset = (int32_t)((long long)(-3 << 16) + * (ratio - 1) / ratio); + else + /* offset = -3db for hard limit */ + db_curve[2].offset = (-3 << 16); + } + else + { + /* bottom of knee is at the threshold for hard knee */ + db_curve[0].db = threshold << 16; + /* top of knee is at the threshold for hard knee */ + db_curve[2].db = threshold << 16; + db_curve[2].offset = 0; + } + + /* Calculate 0db and ~+12db offsets */ + db_curve[4].db = 0xC0A8C; /* db of 2 bits clipping */ + if (ratio) + { + /* offset = threshold * (ratio - 1) / ratio */ + db_curve[3].offset = (int32_t)((long long)(threshold << 16) + * (ratio - 1) / ratio); + db_curve[4].offset = (int32_t)((long long)-db_curve[4].db + * (ratio - 1) / ratio) + db_curve[3].offset; + } + else + { + /* offset = threshold for hard limit */ + db_curve[3].offset = (threshold << 16); + db_curve[4].offset = -db_curve[4].db + db_curve[3].offset; + } + + /** Now set up the comp_curve table with compression offsets in the + form of gain factors in S7.24 format */ + /* comp_curve[0] is 0 (-infinity db) input */ + comp_curve[0] = UNITY; + /* comp_curve[1 to 63] are intermediate compression values + corresponding to the 6 MSB of the input values of a non-clipped + signal */ + for (int i = 1; i < 64; i++) + { + /* db constants are stored as positive numbers; + make them negative here */ + int32_t this_db = -db[i]; + + /* no compression below the knee */ + if (this_db <= db_curve[0].db) + comp_curve[i] = UNITY; + + /* if soft knee and below top of knee, + interpolate along soft knee slope */ + else if (soft_knee && (this_db <= db_curve[2].db)) + comp_curve[i] = fp_factor(fp_mul( + ((this_db - db_curve[0].db) / 6), + db_curve[2].offset, 16), 16) << 8; + + /* interpolate along ratio slope above the knee */ + else + comp_curve[i] = fp_factor(fp_mul( + fp_div((db_curve[1].db - this_db), db_curve[1].db, 16), + db_curve[3].offset, 16), 16) << 8; + } + /* comp_curve[64] is the compression level of a maximum level, + non-clipped signal */ + comp_curve[64] = fp_factor(db_curve[3].offset, 16) << 8; + + /* comp_curve[65] is the compression level of a maximum level, + clipped signal */ + comp_curve[65] = fp_factor(db_curve[4].offset, 16) << 8; + +#if defined(ROCKBOX_HAS_LOGF) && defined(LOGF_ENABLE) + logf("\n *** Compression Offsets ***"); + /* some settings for display only, not used in calculations */ + db_curve[0].offset = 0; + db_curve[1].offset = 0; + db_curve[3].db = 0; + + for (int i = 0; i <= 4; i++) + { + logf("Curve[%d]: db: % 6.2f\toffset: % 6.2f", i, + (float)db_curve[i].db / (1 << 16), + (float)db_curve[i].offset / (1 << 16)); + } + + logf("\nGain factors:"); + for (int i = 1; i <= 65; i++) + { + debugf("%02d: %.6f ", i, (float)comp_curve[i] / UNITY); + if (i % 4 == 0) debugf("\n"); + } + debugf("\n"); +#endif + + /* if using auto peak, then makeup gain is max offset - + .1dB headroom */ + comp_makeup_gain = auto_gain ? + fp_factor(-(db_curve[3].offset) - 0x199A, 16) << 8 : UNITY; + logf("Makeup gain:\t%.6f", (float)comp_makeup_gain / UNITY); + + /* calculate per-sample gain change a rate of 10db over release time + */ + comp_rel_slope = 0xAF0BB2 / release; + logf("Release slope:\t%.6f", (float)comp_rel_slope / UNITY); + + release_gain = UNITY; + } + + return active; +} + +/** GET COMPRESSION GAIN + * Returns the required gain factor in S7.24 format in order to compress the + * sample in accordance with the compression curve. Always 1 or less. + */ +static inline int32_t get_compression_gain(struct dsp_data *data, + int32_t sample) +{ + const int frac_bits_offset = data->frac_bits - 15; + + /* sample must be positive */ + if (sample < 0) + sample = -(sample + 1); + + /* shift sample into 15 frac bit range */ + if (frac_bits_offset > 0) + sample >>= frac_bits_offset; + if (frac_bits_offset < 0) + sample <<= -frac_bits_offset; + + /* normal case: sample isn't clipped */ + if (sample < (1 << 15)) + { + /* index is 6 MSB, rem is 9 LSB */ + int index = sample >> 9; + int32_t rem = (sample & 0x1FF) << 22; + + /* interpolate from the compression curve: + higher gain - ((rem / (1 << 31)) * (higher gain - lower gain)) */ + return comp_curve[index] - (FRACMUL(rem, + (comp_curve[index] - comp_curve[index + 1]))); + } + /* sample is somewhat clipped, up to 2 bits of overhead */ + if (sample < (1 << 17)) + { + /* straight interpolation: + higher gain - ((clipped portion of sample * 4/3 + / (1 << 31)) * (higher gain - lower gain)) */ + return comp_curve[64] - (FRACMUL(((sample - (1 << 15)) / 3) << 16, + (comp_curve[64] - comp_curve[65]))); + } + + /* sample is too clipped, return invalid value */ + return -1; +} + +/** COMPRESSOR PROCESS + * Changes the gain of the samples according to the compressor curve + */ +void compressor_process(int count, struct dsp_data *data, int32_t *buf[]) +{ + const int num_chan = data->num_channels; + int32_t *in_buf[2] = {buf[0], buf[1]}; + + while (count-- > 0) + { + int ch; + /* use lowest (most compressed) gain factor of the output buffer + sample pair for both samples (mono is also handled correctly here) + */ + int32_t sample_gain = UNITY; + for (ch = 0; ch < num_chan; ch++) + { + int32_t this_gain = get_compression_gain(data, *in_buf[ch]); + if (this_gain < sample_gain) + sample_gain = this_gain; + } + + /* perform release slope; skip if no compression and no release slope + */ + if ((sample_gain != UNITY) || (release_gain != UNITY)) + { + /* if larger offset than previous slope, start new release slope + */ + if ((sample_gain <= release_gain) && (sample_gain > 0)) + { + release_gain = sample_gain; + } + else + /* keep sloping towards unity gain (and ignore invalid value) */ + { + release_gain += comp_rel_slope; + if (release_gain > UNITY) + { + release_gain = UNITY; + } + } + } + + /* total gain factor is the product of release gain and makeup gain, + but avoid computation if possible */ + int32_t total_gain = ((release_gain == UNITY) ? comp_makeup_gain : + (comp_makeup_gain == UNITY) ? release_gain : + FRACMUL_SHL(release_gain, comp_makeup_gain, 7)); + + /* Implement the compressor: apply total gain factor (if any) to the + output buffer sample pair/mono sample */ + if (total_gain != UNITY) + { + for (ch = 0; ch < num_chan; ch++) + { + *in_buf[ch] = FRACMUL_SHL(total_gain, *in_buf[ch], 7); + } + } + in_buf[0]++; + in_buf[1]++; + } +} + +void compressor_reset(void) +{ + release_gain = UNITY; +} diff --git a/lib/rbcodec/dsp/compressor.h b/lib/rbcodec/dsp/compressor.h new file mode 100644 index 0000000000..6154372e05 --- /dev/null +++ b/lib/rbcodec/dsp/compressor.h @@ -0,0 +1,29 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2009 Jeffrey Goode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#ifndef COMPRESSOR_H +#define COMPRESSOR_H + +void compressor_process(int count, struct dsp_data *data, int32_t *buf[]); +bool compressor_update(void); +void compressor_reset(void); + +#endif /* COMPRESSOR_H */ diff --git a/lib/rbcodec/dsp/dsp.c b/lib/rbcodec/dsp/dsp.c new file mode 100644 index 0000000000..4da555747b --- /dev/null +++ b/lib/rbcodec/dsp/dsp.c @@ -0,0 +1,1573 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2005 Miika Pekkarinen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" +#include "system.h" +#include +#include "dsp.h" +#include "dsp-util.h" +#include "eq.h" +#include "compressor.h" +#include "kernel.h" +#include "settings.h" +#include "replaygain.h" +#include "tdspeed.h" +#include "core_alloc.h" +#include "fixedpoint.h" +#include "fracmul.h" + +/* Define LOGF_ENABLE to enable logf output in this file */ +/*#define LOGF_ENABLE*/ +#include "logf.h" + +/* 16-bit samples are scaled based on these constants. The shift should be + * no more than 15. + */ +#define WORD_SHIFT 12 +#define WORD_FRACBITS 27 + +#define NATIVE_DEPTH 16 +#define SMALL_SAMPLE_BUF_COUNT 128 /* Per channel */ +#define DEFAULT_GAIN 0x01000000 + +/* enums to index conversion properly with stereo mode and other settings */ +enum +{ + SAMPLE_INPUT_LE_NATIVE_I_STEREO = STEREO_INTERLEAVED, + SAMPLE_INPUT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED, + SAMPLE_INPUT_LE_NATIVE_MONO = STEREO_MONO, + SAMPLE_INPUT_GT_NATIVE_I_STEREO = STEREO_INTERLEAVED + STEREO_NUM_MODES, + SAMPLE_INPUT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES, + SAMPLE_INPUT_GT_NATIVE_MONO = STEREO_MONO + STEREO_NUM_MODES, + SAMPLE_INPUT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES +}; + +enum +{ + SAMPLE_OUTPUT_MONO = 0, + SAMPLE_OUTPUT_STEREO, + SAMPLE_OUTPUT_DITHERED_MONO, + SAMPLE_OUTPUT_DITHERED_STEREO +}; + +/* No asm...yet */ +struct dither_data +{ + long error[3]; /* 00h */ + long random; /* 0ch */ + /* 10h */ +}; + +struct crossfeed_data +{ + int32_t gain; /* 00h - Direct path gain */ + int32_t coefs[3]; /* 04h - Coefficients for the shelving filter */ + int32_t history[4]; /* 10h - Format is x[n - 1], y[n - 1] for both channels */ + int32_t delay[13][2]; /* 20h */ + int32_t *index; /* 88h - Current pointer into the delay line */ + /* 8ch */ +}; + +/* Current setup is one lowshelf filters three peaking filters and one + * highshelf filter. Varying the number of shelving filters make no sense, + * but adding peaking filters is possible. + */ +struct eq_state +{ + char enabled[5]; /* 00h - Flags for active filters */ + struct eqfilter filters[5]; /* 08h - packing is 4? */ + /* 10ch */ +}; + +/* Include header with defines which functions are implemented in assembly + code for the target */ +#include + +/* Typedefs keep things much neater in this case */ +typedef void (*sample_input_fn_type)(int count, const char *src[], + int32_t *dst[]); +typedef int (*resample_fn_type)(int count, struct dsp_data *data, + const int32_t *src[], int32_t *dst[]); +typedef void (*sample_output_fn_type)(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst); + +/* Single-DSP channel processing in place */ +typedef void (*channels_process_fn_type)(int count, int32_t *buf[]); +/* DSP local channel processing in place */ +typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data, + int32_t *buf[]); + +/* + ***************************************************************************/ + +struct dsp_config +{ + struct dsp_data data; /* Config members for use in external routines */ + long codec_frequency; /* Sample rate of data coming from the codec */ + long frequency; /* Effective sample rate after pitch shift (if any) */ + int sample_depth; + int sample_bytes; + int stereo_mode; + int32_t tdspeed_percent; /* Speed% * PITCH_SPEED_PRECISION */ +#ifdef HAVE_PITCHSCREEN + bool tdspeed_active; /* Timestretch is in use */ +#endif +#ifdef HAVE_SW_TONE_CONTROLS + /* Filter struct for software bass/treble controls */ + struct eqfilter tone_filter; +#endif + /* Functions that change depending upon settings - NULL if stage is + disabled */ + sample_input_fn_type input_samples; + resample_fn_type resample; + sample_output_fn_type output_samples; + /* These will be NULL for the voice codec and is more economical that + way */ + channels_process_dsp_fn_type apply_gain; + channels_process_fn_type apply_crossfeed; + channels_process_fn_type eq_process; + channels_process_fn_type channels_process; + channels_process_dsp_fn_type compressor_process; +}; + +/* General DSP config */ +static struct dsp_config dsp_conf[2] IBSS_ATTR; /* 0=A, 1=V */ +/* Dithering */ +static struct dither_data dither_data[2] IBSS_ATTR; /* 0=left, 1=right */ +static long dither_mask IBSS_ATTR; +static long dither_bias IBSS_ATTR; +/* Crossfeed */ +struct crossfeed_data crossfeed_data IDATA_ATTR = /* A */ +{ + .index = (int32_t *)crossfeed_data.delay +}; + +/* Equalizer */ +static struct eq_state eq_data; /* A */ + +/* Software tone controls */ +#ifdef HAVE_SW_TONE_CONTROLS +static int prescale; /* A/V */ +static int bass; /* A/V */ +static int treble; /* A/V */ +#endif + +/* Settings applicable to audio codec only */ +#ifdef HAVE_PITCHSCREEN +static int32_t pitch_ratio = PITCH_SPEED_100; +static int big_sample_locks; +#endif +static int channels_mode; + long dsp_sw_gain; + long dsp_sw_cross; +static bool dither_enabled; +static long eq_precut; +static long track_gain; +static bool new_gain; +static long album_gain; +static long track_peak; +static long album_peak; +static long replaygain; +static bool crossfeed_enabled; + +#define AUDIO_DSP (dsp_conf[CODEC_IDX_AUDIO]) +#define VOICE_DSP (dsp_conf[CODEC_IDX_VOICE]) + +/* The internal format is 32-bit samples, non-interleaved, stereo. This + * format is similar to the raw output from several codecs, so the amount + * of copying needed is minimized for that case. + */ + +#define RESAMPLE_RATIO 4 /* Enough for 11,025 Hz -> 44,100 Hz */ +#define SMALL_RESAMPLE_BUF_COUNT (SMALL_SAMPLE_BUF_COUNT * RESAMPLE_RATIO) +#define BIG_SAMPLE_BUF_COUNT SMALL_RESAMPLE_BUF_COUNT +#define BIG_RESAMPLE_BUF_COUNT (BIG_SAMPLE_BUF_COUNT * RESAMPLE_RATIO) + +static int32_t small_sample_buf[2][SMALL_SAMPLE_BUF_COUNT] IBSS_ATTR; +static int32_t small_resample_buf[2][SMALL_RESAMPLE_BUF_COUNT] IBSS_ATTR; + +#ifdef HAVE_PITCHSCREEN +static int32_t (* big_sample_buf)[BIG_SAMPLE_BUF_COUNT] = NULL; +static int32_t (* big_resample_buf)[BIG_RESAMPLE_BUF_COUNT] = NULL; +#endif + +static int sample_buf_count = SMALL_SAMPLE_BUF_COUNT; +static int32_t *sample_buf[2] = { small_sample_buf[0], small_sample_buf[1] }; +static int resample_buf_count = SMALL_RESAMPLE_BUF_COUNT; +static int32_t *resample_buf[2] = { small_resample_buf[0], small_resample_buf[1] }; + +#ifdef HAVE_PITCHSCREEN +int32_t sound_get_pitch(void) +{ + return pitch_ratio; +} + +void sound_set_pitch(int32_t percent) +{ + pitch_ratio = percent; + dsp_configure(&AUDIO_DSP, DSP_SWITCH_FREQUENCY, + AUDIO_DSP.codec_frequency); +} + +static void tdspeed_set_pointers( bool time_stretch_active ) +{ + if( time_stretch_active ) + { + sample_buf_count = BIG_SAMPLE_BUF_COUNT; + resample_buf_count = BIG_RESAMPLE_BUF_COUNT; + sample_buf[0] = big_sample_buf[0]; + sample_buf[1] = big_sample_buf[1]; + resample_buf[0] = big_resample_buf[0]; + resample_buf[1] = big_resample_buf[1]; + } + else + { + sample_buf_count = SMALL_SAMPLE_BUF_COUNT; + resample_buf_count = SMALL_RESAMPLE_BUF_COUNT; + sample_buf[0] = small_sample_buf[0]; + sample_buf[1] = small_sample_buf[1]; + resample_buf[0] = small_resample_buf[0]; + resample_buf[1] = small_resample_buf[1]; + } +} + +static void tdspeed_setup(struct dsp_config *dspc) +{ + /* Assume timestretch will not be used */ + dspc->tdspeed_active = false; + + tdspeed_set_pointers( false ); + + if (!dsp_timestretch_available()) + return; /* Timestretch not enabled or buffer not allocated */ + + if (dspc->tdspeed_percent == 0) + dspc->tdspeed_percent = PITCH_SPEED_100; + + if (!tdspeed_config( + dspc->codec_frequency == 0 ? NATIVE_FREQUENCY : dspc->codec_frequency, + dspc->stereo_mode != STEREO_MONO, + dspc->tdspeed_percent)) + return; /* Timestretch not possible or needed with these parameters */ + + /* Timestretch is to be used */ + dspc->tdspeed_active = true; + + tdspeed_set_pointers( true ); +} + + +static int move_callback(int handle, void* current, void* new) +{ + (void)handle;(void)current; + + if ( big_sample_locks > 0 ) + return BUFLIB_CB_CANNOT_MOVE; + + big_sample_buf = new; + + /* no allocation without timestretch enabled */ + tdspeed_set_pointers( true ); + return BUFLIB_CB_OK; +} + +static void lock_sample_buf( bool lock ) +{ + if ( lock ) + big_sample_locks++; + else + big_sample_locks--; +} + +static struct buflib_callbacks ops = { + .move_callback = move_callback, + .shrink_callback = NULL, +}; + + +void dsp_timestretch_enable(bool enabled) +{ + /* Hook to set up timestretch buffer on first call to settings_apply() */ + static int handle = -1; + if (enabled) + { + if (big_sample_buf) + return; /* already allocated and enabled */ + + /* Set up timestretch buffers */ + big_sample_buf = &small_resample_buf[0]; + handle = core_alloc_ex("resample buf", + 2 * BIG_RESAMPLE_BUF_COUNT * sizeof(int32_t), + &ops); + big_sample_locks = 0; + enabled = handle >= 0; + + if (enabled) + { + /* success, now setup tdspeed */ + big_resample_buf = core_get_data(handle); + + tdspeed_init(); + tdspeed_setup(&AUDIO_DSP); + } + } + + if (!enabled) + { + dsp_set_timestretch(PITCH_SPEED_100); + tdspeed_finish(); + + if (handle >= 0) + core_free(handle); + + handle = -1; + big_sample_buf = NULL; + } +} + +void dsp_set_timestretch(int32_t percent) +{ + AUDIO_DSP.tdspeed_percent = percent; + tdspeed_setup(&AUDIO_DSP); +} + +int32_t dsp_get_timestretch() +{ + return AUDIO_DSP.tdspeed_percent; +} + +bool dsp_timestretch_available() +{ + return (global_settings.timestretch_enabled && big_sample_buf); +} +#endif /* HAVE_PITCHSCREEN */ + +/* Convert count samples to the internal format, if needed. Updates src + * to point past the samples "consumed" and dst is set to point to the + * samples to consume. Note that for mono, dst[0] equals dst[1], as there + * is no point in processing the same data twice. + */ + +/* convert count 16-bit mono to 32-bit mono */ +static void sample_input_lte_native_mono( + int count, const char *src[], int32_t *dst[]) +{ + const int16_t *s = (int16_t *) src[0]; + const int16_t * const send = s + count; + int32_t *d = dst[0] = dst[1] = sample_buf[0]; + int scale = WORD_SHIFT; + + while (s < send) + { + *d++ = *s++ << scale; + } + + src[0] = (char *)s; +} + +/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */ +static void sample_input_lte_native_i_stereo( + int count, const char *src[], int32_t *dst[]) +{ + const int32_t *s = (int32_t *) src[0]; + const int32_t * const send = s + count; + int32_t *dl = dst[0] = sample_buf[0]; + int32_t *dr = dst[1] = sample_buf[1]; + int scale = WORD_SHIFT; + + while (s < send) + { + int32_t slr = *s++; +#ifdef ROCKBOX_LITTLE_ENDIAN + *dl++ = (slr >> 16) << scale; + *dr++ = (int32_t)(int16_t)slr << scale; +#else /* ROCKBOX_BIG_ENDIAN */ + *dl++ = (int32_t)(int16_t)slr << scale; + *dr++ = (slr >> 16) << scale; +#endif + } + + src[0] = (char *)s; +} + +/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */ +static void sample_input_lte_native_ni_stereo( + int count, const char *src[], int32_t *dst[]) +{ + const int16_t *sl = (int16_t *) src[0]; + const int16_t *sr = (int16_t *) src[1]; + const int16_t * const slend = sl + count; + int32_t *dl = dst[0] = sample_buf[0]; + int32_t *dr = dst[1] = sample_buf[1]; + int scale = WORD_SHIFT; + + while (sl < slend) + { + *dl++ = *sl++ << scale; + *dr++ = *sr++ << scale; + } + + src[0] = (char *)sl; + src[1] = (char *)sr; +} + +/* convert count 32-bit mono to 32-bit mono */ +static void sample_input_gt_native_mono( + int count, const char *src[], int32_t *dst[]) +{ + dst[0] = dst[1] = (int32_t *)src[0]; + src[0] = (char *)(dst[0] + count); +} + +/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */ +static void sample_input_gt_native_i_stereo( + int count, const char *src[], int32_t *dst[]) +{ + const int32_t *s = (int32_t *)src[0]; + const int32_t * const send = s + 2*count; + int32_t *dl = dst[0] = sample_buf[0]; + int32_t *dr = dst[1] = sample_buf[1]; + + while (s < send) + { + *dl++ = *s++; + *dr++ = *s++; + } + + src[0] = (char *)send; +} + +/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */ +static void sample_input_gt_native_ni_stereo( + int count, const char *src[], int32_t *dst[]) +{ + dst[0] = (int32_t *)src[0]; + dst[1] = (int32_t *)src[1]; + src[0] = (char *)(dst[0] + count); + src[1] = (char *)(dst[1] + count); +} + +/** + * sample_input_new_format() + * + * set the to-native sample conversion function based on dsp sample parameters + * + * !DSPPARAMSYNC + * needs syncing with changes to the following dsp parameters: + * * dsp->stereo_mode (A/V) + * * dsp->sample_depth (A/V) + */ +static void sample_input_new_format(struct dsp_config *dsp) +{ + static const sample_input_fn_type sample_input_functions[] = + { + [SAMPLE_INPUT_LE_NATIVE_I_STEREO] = sample_input_lte_native_i_stereo, + [SAMPLE_INPUT_LE_NATIVE_NI_STEREO] = sample_input_lte_native_ni_stereo, + [SAMPLE_INPUT_LE_NATIVE_MONO] = sample_input_lte_native_mono, + [SAMPLE_INPUT_GT_NATIVE_I_STEREO] = sample_input_gt_native_i_stereo, + [SAMPLE_INPUT_GT_NATIVE_NI_STEREO] = sample_input_gt_native_ni_stereo, + [SAMPLE_INPUT_GT_NATIVE_MONO] = sample_input_gt_native_mono, + }; + + int convert = dsp->stereo_mode; + + if (dsp->sample_depth > NATIVE_DEPTH) + convert += SAMPLE_INPUT_GT_NATIVE_1ST_INDEX; + + dsp->input_samples = sample_input_functions[convert]; +} + + +#ifndef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO +/* write mono internal format to output format */ +static void sample_output_mono(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst) +{ + const int32_t *s0 = src[0]; + const int scale = data->output_scale; + const int dc_bias = 1 << (scale - 1); + + while (count-- > 0) + { + int32_t lr = clip_sample_16((*s0++ + dc_bias) >> scale); + *dst++ = lr; + *dst++ = lr; + } +} +#endif /* DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO */ + +/* write stereo internal format to output format */ +#ifndef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO +static void sample_output_stereo(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst) +{ + const int32_t *s0 = src[0]; + const int32_t *s1 = src[1]; + const int scale = data->output_scale; + const int dc_bias = 1 << (scale - 1); + + while (count-- > 0) + { + *dst++ = clip_sample_16((*s0++ + dc_bias) >> scale); + *dst++ = clip_sample_16((*s1++ + dc_bias) >> scale); + } +} +#endif /* DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO */ + +/** + * The "dither" code to convert the 24-bit samples produced by libmad was + * taken from the coolplayer project - coolplayer.sourceforge.net + * + * This function handles mono and stereo outputs. + */ +static void sample_output_dithered(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst) +{ + const int32_t mask = dither_mask; + const int32_t bias = dither_bias; + const int scale = data->output_scale; + const int32_t min = data->clip_min; + const int32_t max = data->clip_max; + const int32_t range = max - min; + int ch; + int16_t *d; + + for (ch = 0; ch < data->num_channels; ch++) + { + struct dither_data * const dither = &dither_data[ch]; + const int32_t *s = src[ch]; + int i; + + for (i = 0, d = &dst[ch]; i < count; i++, s++, d += 2) + { + int32_t output, sample; + int32_t random; + + /* Noise shape and bias (for correct rounding later) */ + sample = *s; + sample += dither->error[0] - dither->error[1] + dither->error[2]; + dither->error[2] = dither->error[1]; + dither->error[1] = dither->error[0]/2; + + output = sample + bias; + + /* Dither, highpass triangle PDF */ + random = dither->random*0x0019660dL + 0x3c6ef35fL; + output += (random & mask) - (dither->random & mask); + dither->random = random; + + /* Round sample to output range */ + output &= ~mask; + + /* Error feedback */ + dither->error[0] = sample - output; + + /* Clip */ + if ((uint32_t)(output - min) > (uint32_t)range) + { + int32_t c = min; + if (output > min) + c += range; + output = c; + } + + /* Quantize and store */ + *d = output >> scale; + } + } + + if (data->num_channels == 2) + return; + + /* Have to duplicate left samples into the right channel since + pcm buffer and hardware is interleaved stereo */ + d = &dst[0]; + + while (count-- > 0) + { + int16_t s = *d++; + *d++ = s; + } +} + +/** + * sample_output_new_format() + * + * set the from-native to ouput sample conversion routine + * + * !DSPPARAMSYNC + * needs syncing with changes to the following dsp parameters: + * * dsp->stereo_mode (A/V) + * * dither_enabled (A) + */ +static void sample_output_new_format(struct dsp_config *dsp) +{ + static const sample_output_fn_type sample_output_functions[] = + { + sample_output_mono, + sample_output_stereo, + sample_output_dithered, + sample_output_dithered + }; + + int out = dsp->data.num_channels - 1; + + if (dsp == &AUDIO_DSP && dither_enabled) + out += 2; + + dsp->output_samples = sample_output_functions[out]; +} + +/** + * Linear interpolation resampling that introduces a one sample delay because + * of our inability to look into the future at the end of a frame. + */ +#ifndef DSP_HAVE_ASM_RESAMPLING +static int dsp_downsample(int count, struct dsp_data *data, + const int32_t *src[], int32_t *dst[]) +{ + int ch = data->num_channels - 1; + uint32_t delta = data->resample_data.delta; + uint32_t phase, pos; + int32_t *d; + + /* Rolled channel loop actually showed slightly faster. */ + do + { + /* Just initialize things and not worry too much about the relatively + * uncommon case of not being able to spit out a sample for the frame. + */ + const int32_t *s = src[ch]; + int32_t last = data->resample_data.last_sample[ch]; + + data->resample_data.last_sample[ch] = s[count - 1]; + d = dst[ch]; + phase = data->resample_data.phase; + pos = phase >> 16; + + /* Do we need last sample of previous frame for interpolation? */ + if (pos > 0) + last = s[pos - 1]; + + while (pos < (uint32_t)count) + { + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); + phase += delta; + pos = phase >> 16; + last = s[pos - 1]; + } + } + while (--ch >= 0); + + /* Wrap phase accumulator back to start of next frame. */ + data->resample_data.phase = phase - (count << 16); + return d - dst[0]; +} + +static int dsp_upsample(int count, struct dsp_data *data, + const int32_t *src[], int32_t *dst[]) +{ + int ch = data->num_channels - 1; + uint32_t delta = data->resample_data.delta; + uint32_t phase, pos; + int32_t *d; + + /* Rolled channel loop actually showed slightly faster. */ + do + { + /* Should always be able to output a sample for a ratio up to RESAMPLE_RATIO */ + const int32_t *s = src[ch]; + int32_t last = data->resample_data.last_sample[ch]; + + data->resample_data.last_sample[ch] = s[count - 1]; + d = dst[ch]; + phase = data->resample_data.phase; + pos = phase >> 16; + + while (pos == 0) + { + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last); + phase += delta; + pos = phase >> 16; + } + + while (pos < (uint32_t)count) + { + last = s[pos - 1]; + *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); + phase += delta; + pos = phase >> 16; + } + } + while (--ch >= 0); + + /* Wrap phase accumulator back to start of next frame. */ + data->resample_data.phase = phase & 0xffff; + return d - dst[0]; +} +#endif /* DSP_HAVE_ASM_RESAMPLING */ + +static void resampler_new_delta(struct dsp_config *dsp) +{ + dsp->data.resample_data.delta = (unsigned long) + dsp->frequency * 65536LL / NATIVE_FREQUENCY; + + if (dsp->frequency == NATIVE_FREQUENCY) + { + /* NOTE: If fully glitch-free transistions from no resampling to + resampling are desired, last_sample history should be maintained + even when not resampling. */ + dsp->resample = NULL; + dsp->data.resample_data.phase = 0; + dsp->data.resample_data.last_sample[0] = 0; + dsp->data.resample_data.last_sample[1] = 0; + } + else if (dsp->frequency < NATIVE_FREQUENCY) + dsp->resample = dsp_upsample; + else + dsp->resample = dsp_downsample; +} + +/* Resample count stereo samples. Updates the src array, if resampling is + * done, to refer to the resampled data. Returns number of stereo samples + * for further processing. + */ +static inline int resample(struct dsp_config *dsp, int count, int32_t *src[]) +{ + int32_t *dst[2] = + { + resample_buf[0], + resample_buf[1] + }; + lock_sample_buf( true ); + count = dsp->resample(count, &dsp->data, (const int32_t **)src, dst); + + src[0] = dst[0]; + src[1] = dst[dsp->data.num_channels - 1]; + lock_sample_buf( false ); + return count; +} + +static void dither_init(struct dsp_config *dsp) +{ + memset(dither_data, 0, sizeof (dither_data)); + dither_bias = (1L << (dsp->data.frac_bits - NATIVE_DEPTH)); + dither_mask = (1L << (dsp->data.frac_bits + 1 - NATIVE_DEPTH)) - 1; +} + +void dsp_dither_enable(bool enable) +{ + struct dsp_config *dsp = &AUDIO_DSP; + dither_enabled = enable; + sample_output_new_format(dsp); +} + +/* Applies crossfeed to the stereo signal in src. + * Crossfeed is a process where listening over speakers is simulated. This + * is good for old hard panned stereo records, which might be quite fatiguing + * to listen to on headphones with no crossfeed. + */ +#ifndef DSP_HAVE_ASM_CROSSFEED +static void apply_crossfeed(int count, int32_t *buf[]) +{ + int32_t *hist_l = &crossfeed_data.history[0]; + int32_t *hist_r = &crossfeed_data.history[2]; + int32_t *delay = &crossfeed_data.delay[0][0]; + int32_t *coefs = &crossfeed_data.coefs[0]; + int32_t gain = crossfeed_data.gain; + int32_t *di = crossfeed_data.index; + + int32_t acc; + int32_t left, right; + int i; + + for (i = 0; i < count; i++) + { + left = buf[0][i]; + right = buf[1][i]; + + /* Filter delayed sample from left speaker */ + acc = FRACMUL(*di, coefs[0]); + acc += FRACMUL(hist_l[0], coefs[1]); + acc += FRACMUL(hist_l[1], coefs[2]); + /* Save filter history for left speaker */ + hist_l[1] = acc; + hist_l[0] = *di; + *di++ = left; + /* Filter delayed sample from right speaker */ + acc = FRACMUL(*di, coefs[0]); + acc += FRACMUL(hist_r[0], coefs[1]); + acc += FRACMUL(hist_r[1], coefs[2]); + /* Save filter history for right speaker */ + hist_r[1] = acc; + hist_r[0] = *di; + *di++ = right; + /* Now add the attenuated direct sound and write to outputs */ + buf[0][i] = FRACMUL(left, gain) + hist_r[1]; + buf[1][i] = FRACMUL(right, gain) + hist_l[1]; + + /* Wrap delay line index if bigger than delay line size */ + if (di >= delay + 13*2) + di = delay; + } + /* Write back local copies of data we've modified */ + crossfeed_data.index = di; +} +#endif /* DSP_HAVE_ASM_CROSSFEED */ + +/** + * dsp_set_crossfeed(bool enable) + * + * !DSPPARAMSYNC + * needs syncing with changes to the following dsp parameters: + * * dsp->stereo_mode (A) + */ +void dsp_set_crossfeed(bool enable) +{ + crossfeed_enabled = enable; + AUDIO_DSP.apply_crossfeed = (enable && AUDIO_DSP.data.num_channels > 1) + ? apply_crossfeed : NULL; +} + +void dsp_set_crossfeed_direct_gain(int gain) +{ + crossfeed_data.gain = get_replaygain_int(gain * 10) << 7; + /* If gain is negative, the calculation overflowed and we need to clamp */ + if (crossfeed_data.gain < 0) + crossfeed_data.gain = 0x7fffffff; +} + +/* Both gains should be below 0 dB */ +void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff) +{ + int32_t *c = crossfeed_data.coefs; + long scaler = get_replaygain_int(lf_gain * 10) << 7; + + cutoff = 0xffffffff/NATIVE_FREQUENCY*cutoff; + hf_gain -= lf_gain; + /* Divide cutoff by sqrt(10^(hf_gain/20)) to place cutoff at the -3 dB + * point instead of shelf midpoint. This is for compatibility with the old + * crossfeed shelf filter and should be removed if crossfeed settings are + * ever made incompatible for any other good reason. + */ + cutoff = fp_div(cutoff, get_replaygain_int(hf_gain*5), 24); + filter_shelf_coefs(cutoff, hf_gain, false, c); + /* Scale coefs by LF gain and shift them to s0.31 format. We have no gains + * over 1 and can do this safely + */ + c[0] = FRACMUL_SHL(c[0], scaler, 4); + c[1] = FRACMUL_SHL(c[1], scaler, 4); + c[2] <<= 4; +} + +/* Apply a constant gain to the samples (e.g., for ReplayGain). + * Note that this must be called before the resampler. + */ +#ifndef DSP_HAVE_ASM_APPLY_GAIN +static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) +{ + const int32_t gain = data->gain; + int ch; + + for (ch = 0; ch < data->num_channels; ch++) + { + int32_t *d = buf[ch]; + int i; + + for (i = 0; i < count; i++) + d[i] = FRACMUL_SHL(d[i], gain, 8); + } +} +#endif /* DSP_HAVE_ASM_APPLY_GAIN */ + +/* Combine all gains to a global gain. */ +static void set_gain(struct dsp_config *dsp) +{ + /* gains are in S7.24 format */ + dsp->data.gain = DEFAULT_GAIN; + + /* Replay gain not relevant to voice */ + if (dsp == &AUDIO_DSP && replaygain) + { + dsp->data.gain = replaygain; + } + + if (dsp->eq_process && eq_precut) + { + dsp->data.gain = fp_mul(dsp->data.gain, eq_precut, 24); + } + +#ifdef HAVE_SW_VOLUME_CONTROL + if (global_settings.volume < SW_VOLUME_MAX || + global_settings.volume > SW_VOLUME_MIN) + { + int vol_gain = get_replaygain_int(global_settings.volume * 100); + dsp->data.gain = (long) (((int64_t) dsp->data.gain * vol_gain) >> 24); + } +#endif + + if (dsp->data.gain == DEFAULT_GAIN) + { + dsp->data.gain = 0; + } + else + { + dsp->data.gain >>= 1; /* convert gain to S8.23 format */ + } + + dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL; +} + +/** + * Update the amount to cut the audio before applying the equalizer. + * + * @param precut to apply in decibels (multiplied by 10) + */ +void dsp_set_eq_precut(int precut) +{ + eq_precut = get_replaygain_int(precut * -10); + set_gain(&AUDIO_DSP); +} + +/** + * Synchronize the equalizer filter coefficients with the global settings. + * + * @param band the equalizer band to synchronize + */ +void dsp_set_eq_coefs(int band) +{ + /* Adjust setting pointer to the band we actually want to change */ + struct eq_band_setting *setting = &global_settings.eq_band_settings[band]; + + /* Convert user settings to format required by coef generator functions */ + unsigned long cutoff = 0xffffffff / NATIVE_FREQUENCY * setting->cutoff; + unsigned long q = setting->q; + int gain = setting->gain; + + if (q == 0) + q = 1; + + /* NOTE: The coef functions assume the EMAC unit is in fractional mode, + which it should be, since we're executed from the main thread. */ + + /* Assume a band is disabled if the gain is zero */ + if (gain == 0) + { + eq_data.enabled[band] = 0; + } + else + { + if (band == 0) + eq_ls_coefs(cutoff, q, gain, eq_data.filters[band].coefs); + else if (band == 4) + eq_hs_coefs(cutoff, q, gain, eq_data.filters[band].coefs); + else + eq_pk_coefs(cutoff, q, gain, eq_data.filters[band].coefs); + + eq_data.enabled[band] = 1; + } +} + +/* Apply EQ filters to those bands that have got it switched on. */ +static void eq_process(int count, int32_t *buf[]) +{ + static const int shifts[] = + { + EQ_SHELF_SHIFT, /* low shelf */ + EQ_PEAK_SHIFT, /* peaking */ + EQ_PEAK_SHIFT, /* peaking */ + EQ_PEAK_SHIFT, /* peaking */ + EQ_SHELF_SHIFT, /* high shelf */ + }; + unsigned int channels = AUDIO_DSP.data.num_channels; + int i; + + /* filter configuration currently is 1 low shelf filter, 3 band peaking + filters and 1 high shelf filter, in that order. we need to know this + so we can choose the correct shift factor. + */ + for (i = 0; i < 5; i++) + { + if (!eq_data.enabled[i]) + continue; + eq_filter(buf, &eq_data.filters[i], count, channels, shifts[i]); + } +} + +/** + * Use to enable the equalizer. + * + * @param enable true to enable the equalizer + */ +void dsp_set_eq(bool enable) +{ + AUDIO_DSP.eq_process = enable ? eq_process : NULL; + set_gain(&AUDIO_DSP); +} + +static void dsp_set_stereo_width(int value) +{ + long width, straight, cross; + + width = value * 0x7fffff / 100; + + if (value <= 100) + { + straight = (0x7fffff + width) / 2; + cross = straight - width; + } + else + { + /* straight = (1 + width) / (2 * width) */ + straight = ((int64_t)(0x7fffff + width) << 22) / width; + cross = straight - 0x7fffff; + } + + dsp_sw_gain = straight << 8; + dsp_sw_cross = cross << 8; +} + +/** + * Implements the different channel configurations and stereo width. + */ + +/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for + * completeness. */ +#if 0 +static void channels_process_sound_chan_stereo(int count, int32_t *buf[]) +{ + /* The channels are each just themselves */ + (void)count; (void)buf; +} +#endif + +#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO +static void channels_process_sound_chan_mono(int count, int32_t *buf[]) +{ + int32_t *sl = buf[0], *sr = buf[1]; + + while (count-- > 0) + { + int32_t lr = *sl/2 + *sr/2; + *sl++ = lr; + *sr++ = lr; + } +} +#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */ + +#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM +static void channels_process_sound_chan_custom(int count, int32_t *buf[]) +{ + const int32_t gain = dsp_sw_gain; + const int32_t cross = dsp_sw_cross; + int32_t *sl = buf[0], *sr = buf[1]; + + while (count-- > 0) + { + int32_t l = *sl; + int32_t r = *sr; + *sl++ = FRACMUL(l, gain) + FRACMUL(r, cross); + *sr++ = FRACMUL(r, gain) + FRACMUL(l, cross); + } +} +#endif /* DSP_HAVE_ASM_SOUND_CHAN_CUSTOM */ + +static void channels_process_sound_chan_mono_left(int count, int32_t *buf[]) +{ + /* Just copy over the other channel */ + memcpy(buf[1], buf[0], count * sizeof (*buf)); +} + +static void channels_process_sound_chan_mono_right(int count, int32_t *buf[]) +{ + /* Just copy over the other channel */ + memcpy(buf[0], buf[1], count * sizeof (*buf)); +} + +#ifndef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE +static void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) +{ + int32_t *sl = buf[0], *sr = buf[1]; + + while (count-- > 0) + { + int32_t ch = *sl/2 - *sr/2; + *sl++ = ch; + *sr++ = -ch; + } +} +#endif /* DSP_HAVE_ASM_SOUND_CHAN_KARAOKE */ + +static void dsp_set_channel_config(int value) +{ + static const channels_process_fn_type channels_process_functions[] = + { + /* SOUND_CHAN_STEREO = All-purpose index for no channel processing */ + [SOUND_CHAN_STEREO] = NULL, + [SOUND_CHAN_MONO] = channels_process_sound_chan_mono, + [SOUND_CHAN_CUSTOM] = channels_process_sound_chan_custom, + [SOUND_CHAN_MONO_LEFT] = channels_process_sound_chan_mono_left, + [SOUND_CHAN_MONO_RIGHT] = channels_process_sound_chan_mono_right, + [SOUND_CHAN_KARAOKE] = channels_process_sound_chan_karaoke, + }; + + if ((unsigned)value >= ARRAYLEN(channels_process_functions) || + AUDIO_DSP.stereo_mode == STEREO_MONO) + { + value = SOUND_CHAN_STEREO; + } + + /* This doesn't apply to voice */ + channels_mode = value; + AUDIO_DSP.channels_process = channels_process_functions[value]; +} + +#if CONFIG_CODEC == SWCODEC + +#ifdef HAVE_SW_TONE_CONTROLS +static void set_tone_controls(void) +{ + filter_bishelf_coefs(0xffffffff/NATIVE_FREQUENCY*200, + 0xffffffff/NATIVE_FREQUENCY*3500, + bass, treble, -prescale, + AUDIO_DSP.tone_filter.coefs); + /* Sync the voice dsp coefficients */ + memcpy(&VOICE_DSP.tone_filter.coefs, AUDIO_DSP.tone_filter.coefs, + sizeof (VOICE_DSP.tone_filter.coefs)); +} +#endif + +/* Hook back from firmware/ part of audio, which can't/shouldn't call apps/ + * code directly. + */ +int dsp_callback(int msg, intptr_t param) +{ + switch (msg) + { +#ifdef HAVE_SW_TONE_CONTROLS + case DSP_CALLBACK_SET_PRESCALE: + prescale = param; + set_tone_controls(); + break; + /* prescaler is always set after calling any of these, so we wait with + * calculating coefs until the above case is hit. + */ + case DSP_CALLBACK_SET_BASS: + bass = param; + break; + case DSP_CALLBACK_SET_TREBLE: + treble = param; + break; +#ifdef HAVE_SW_VOLUME_CONTROL + case DSP_CALLBACK_SET_SW_VOLUME: + set_gain(&AUDIO_DSP); + break; +#endif +#endif + case DSP_CALLBACK_SET_CHANNEL_CONFIG: + dsp_set_channel_config(param); + break; + case DSP_CALLBACK_SET_STEREO_WIDTH: + dsp_set_stereo_width(param); + break; + default: + break; + } + return 0; +} +#endif + +/* Process and convert src audio to dst based on the DSP configuration, + * reading count number of audio samples. dst is assumed to be large + * enough; use dsp_output_count() to get the required number. src is an + * array of pointers; for mono and interleaved stereo, it contains one + * pointer to the start of the audio data and the other is ignored; for + * non-interleaved stereo, it contains two pointers, one for each audio + * channel. Returns number of bytes written to dst. + */ +int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count) +{ + static int32_t *tmp[2]; /* tdspeed_doit() needs it static */ + static long last_yield; + long tick; + int written = 0; + +#if defined(CPU_COLDFIRE) + /* set emac unit for dsp processing, and save old macsr, we're running in + codec thread context at this point, so can't clobber it */ + unsigned long old_macsr = coldfire_get_macsr(); + coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); +#endif + + if (new_gain) + dsp_set_replaygain(); /* Gain has changed */ + + /* Perform at least one yield before starting */ + last_yield = current_tick; + yield(); + + /* Testing function pointers for NULL is preferred since the pointer + will be preloaded to be used for the call if not. */ + while (count > 0) + { + int samples = MIN(sample_buf_count, count); + count -= samples; + + dsp->input_samples(samples, src, tmp); + +#ifdef HAVE_PITCHSCREEN + if (dsp->tdspeed_active) + samples = tdspeed_doit(tmp, samples); +#endif + + int chunk_offset = 0; + while (samples > 0) + { + int32_t *t2[2]; + t2[0] = tmp[0]+chunk_offset; + t2[1] = tmp[1]+chunk_offset; + + int chunk = MIN(sample_buf_count, samples); + chunk_offset += chunk; + samples -= chunk; + + if (dsp->apply_gain) + dsp->apply_gain(chunk, &dsp->data, t2); + + if (dsp->resample && (chunk = resample(dsp, chunk, t2)) <= 0) + break; /* I'm pretty sure we're downsampling here */ + + if (dsp->apply_crossfeed) + dsp->apply_crossfeed(chunk, t2); + + if (dsp->eq_process) + dsp->eq_process(chunk, t2); + +#ifdef HAVE_SW_TONE_CONTROLS + if ((bass | treble) != 0) + eq_filter(t2, &dsp->tone_filter, chunk, + dsp->data.num_channels, FILTER_BISHELF_SHIFT); +#endif + + if (dsp->channels_process) + dsp->channels_process(chunk, t2); + + if (dsp->compressor_process) + dsp->compressor_process(chunk, &dsp->data, t2); + + dsp->output_samples(chunk, &dsp->data, (const int32_t **)t2, (int16_t *)dst); + + written += chunk; + dst += chunk * sizeof (int16_t) * 2; + + /* yield at least once each tick */ + tick = current_tick; + if (TIME_AFTER(tick, last_yield)) + { + last_yield = tick; + yield(); + } + } + } + +#if defined(CPU_COLDFIRE) + /* set old macsr again */ + coldfire_set_macsr(old_macsr); +#endif + return written; +} + +/* Given count number of input samples, calculate the maximum number of + * samples of output data that would be generated (the calculation is not + * entirely exact and rounds upwards to be on the safe side; during + * resampling, the number of samples generated depends on the current state + * of the resampler). + */ +/* dsp_input_size MUST be called afterwards */ +int dsp_output_count(struct dsp_config *dsp, int count) +{ +#ifdef HAVE_PITCHSCREEN + if (dsp->tdspeed_active) + count = tdspeed_est_output_size(); +#endif + if (dsp->resample) + { + count = (int)(((unsigned long)count * NATIVE_FREQUENCY + + (dsp->frequency - 1)) / dsp->frequency); + } + + /* Now we have the resampled sample count which must not exceed + * resample_buf_count to avoid resample buffer overflow. One + * must call dsp_input_count() to get the correct input sample + * count. + */ + if (count > resample_buf_count) + count = resample_buf_count; + + return count; +} + +/* Given count output samples, calculate number of input samples + * that would be consumed in order to fill the output buffer. + */ +int dsp_input_count(struct dsp_config *dsp, int count) +{ + /* count is now the number of resampled input samples. Convert to + original input samples. */ + if (dsp->resample) + { + /* Use the real resampling delta = + * dsp->frequency * 65536 / NATIVE_FREQUENCY, and + * round towards zero to avoid buffer overflows. */ + count = (int)(((unsigned long)count * + dsp->data.resample_data.delta) >> 16); + } + +#ifdef HAVE_PITCHSCREEN + if (dsp->tdspeed_active) + count = tdspeed_est_input_size(count); +#endif + + return count; +} + +static void dsp_set_gain_var(long *var, long value) +{ + *var = value; + new_gain = true; +} + +static void dsp_update_functions(struct dsp_config *dsp) +{ + sample_input_new_format(dsp); + sample_output_new_format(dsp); + if (dsp == &AUDIO_DSP) + dsp_set_crossfeed(crossfeed_enabled); +} + +intptr_t dsp_configure(struct dsp_config *dsp, int setting, intptr_t value) +{ + switch (setting) + { + case DSP_MYDSP: + switch (value) + { + case CODEC_IDX_AUDIO: + return (intptr_t)&AUDIO_DSP; + case CODEC_IDX_VOICE: + return (intptr_t)&VOICE_DSP; + default: + return (intptr_t)NULL; + } + + case DSP_SET_FREQUENCY: + memset(&dsp->data.resample_data, 0, sizeof (dsp->data.resample_data)); + /* Fall through!!! */ + case DSP_SWITCH_FREQUENCY: + dsp->codec_frequency = (value == 0) ? NATIVE_FREQUENCY : value; + /* Account for playback speed adjustment when setting dsp->frequency + if we're called from the main audio thread. Voice UI thread should + not need this feature. + */ +#ifdef HAVE_PITCHSCREEN + if (dsp == &AUDIO_DSP) + dsp->frequency = pitch_ratio * dsp->codec_frequency / PITCH_SPEED_100; + else +#endif + dsp->frequency = dsp->codec_frequency; + + resampler_new_delta(dsp); +#ifdef HAVE_PITCHSCREEN + tdspeed_setup(dsp); +#endif + break; + + case DSP_SET_SAMPLE_DEPTH: + dsp->sample_depth = value; + + if (dsp->sample_depth <= NATIVE_DEPTH) + { + dsp->data.frac_bits = WORD_FRACBITS; + dsp->sample_bytes = sizeof (int16_t); /* samples are 16 bits */ + dsp->data.clip_max = ((1 << WORD_FRACBITS) - 1); + dsp->data.clip_min = -((1 << WORD_FRACBITS)); + } + else + { + dsp->data.frac_bits = value; + dsp->sample_bytes = sizeof (int32_t); /* samples are 32 bits */ + dsp->data.clip_max = (1 << value) - 1; + dsp->data.clip_min = -(1 << value); + } + + dsp->data.output_scale = dsp->data.frac_bits + 1 - NATIVE_DEPTH; + sample_input_new_format(dsp); + dither_init(dsp); + break; + + case DSP_SET_STEREO_MODE: + dsp->stereo_mode = value; + dsp->data.num_channels = value == STEREO_MONO ? 1 : 2; + dsp_update_functions(dsp); +#ifdef HAVE_PITCHSCREEN + tdspeed_setup(dsp); +#endif + break; + + case DSP_RESET: + dsp->stereo_mode = STEREO_NONINTERLEAVED; + dsp->data.num_channels = 2; + dsp->sample_depth = NATIVE_DEPTH; + dsp->data.frac_bits = WORD_FRACBITS; + dsp->sample_bytes = sizeof (int16_t); + dsp->data.output_scale = dsp->data.frac_bits + 1 - NATIVE_DEPTH; + dsp->data.clip_max = ((1 << WORD_FRACBITS) - 1); + dsp->data.clip_min = -((1 << WORD_FRACBITS)); + dsp->codec_frequency = dsp->frequency = NATIVE_FREQUENCY; + + if (dsp == &AUDIO_DSP) + { + track_gain = 0; + album_gain = 0; + track_peak = 0; + album_peak = 0; + new_gain = true; + } + + dsp_update_functions(dsp); + resampler_new_delta(dsp); +#ifdef HAVE_PITCHSCREEN + tdspeed_setup(dsp); +#endif + if (dsp == &AUDIO_DSP) + compressor_reset(); + break; + + case DSP_FLUSH: + memset(&dsp->data.resample_data, 0, + sizeof (dsp->data.resample_data)); + resampler_new_delta(dsp); + dither_init(dsp); +#ifdef HAVE_PITCHSCREEN + tdspeed_setup(dsp); +#endif + if (dsp == &AUDIO_DSP) + compressor_reset(); + break; + + case DSP_SET_TRACK_GAIN: + if (dsp == &AUDIO_DSP) + dsp_set_gain_var(&track_gain, value); + break; + + case DSP_SET_ALBUM_GAIN: + if (dsp == &AUDIO_DSP) + dsp_set_gain_var(&album_gain, value); + break; + + case DSP_SET_TRACK_PEAK: + if (dsp == &AUDIO_DSP) + dsp_set_gain_var(&track_peak, value); + break; + + case DSP_SET_ALBUM_PEAK: + if (dsp == &AUDIO_DSP) + dsp_set_gain_var(&album_peak, value); + break; + + default: + return 0; + } + + return 1; +} + +int get_replaygain_mode(bool have_track_gain, bool have_album_gain) +{ + int type; + + bool track = ((global_settings.replaygain_type == REPLAYGAIN_TRACK) + || ((global_settings.replaygain_type == REPLAYGAIN_SHUFFLE) + && global_settings.playlist_shuffle)); + + type = (!track && have_album_gain) ? REPLAYGAIN_ALBUM + : have_track_gain ? REPLAYGAIN_TRACK : -1; + + return type; +} + +void dsp_set_replaygain(void) +{ + long gain = 0; + + new_gain = false; + + if ((global_settings.replaygain_type != REPLAYGAIN_OFF) || + global_settings.replaygain_noclip) + { + bool track_mode = get_replaygain_mode(track_gain != 0, + album_gain != 0) == REPLAYGAIN_TRACK; + long peak = (track_mode || !album_peak) ? track_peak : album_peak; + + if (global_settings.replaygain_type != REPLAYGAIN_OFF) + { + gain = (track_mode || !album_gain) ? track_gain : album_gain; + + if (global_settings.replaygain_preamp) + { + long preamp = get_replaygain_int( + global_settings.replaygain_preamp * 10); + + gain = (long) (((int64_t) gain * preamp) >> 24); + } + } + + if (gain == 0) + { + /* So that noclip can work even with no gain information. */ + gain = DEFAULT_GAIN; + } + + if (global_settings.replaygain_noclip && (peak != 0) + && ((((int64_t) gain * peak) >> 24) >= DEFAULT_GAIN)) + { + gain = (((int64_t) DEFAULT_GAIN << 24) / peak); + } + + if (gain == DEFAULT_GAIN) + { + /* Nothing to do, disable processing. */ + gain = 0; + } + } + + /* Store in S7.24 format to simplify calculations. */ + replaygain = gain; + set_gain(&AUDIO_DSP); +} + +/** SET COMPRESSOR + * Called by the menu system to configure the compressor process */ +void dsp_set_compressor(void) +{ + /* enable/disable the compressor */ + AUDIO_DSP.compressor_process = compressor_update() ? + compressor_process : NULL; +} diff --git a/lib/rbcodec/dsp/dsp.h b/lib/rbcodec/dsp/dsp.h new file mode 100644 index 0000000000..2a00f649f8 --- /dev/null +++ b/lib/rbcodec/dsp/dsp.h @@ -0,0 +1,125 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2005 Miika Pekkarinen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#ifndef _DSP_H +#define _DSP_H + +#include +#include + +#define NATIVE_FREQUENCY 44100 + +enum +{ + STEREO_INTERLEAVED = 0, + STEREO_NONINTERLEAVED, + STEREO_MONO, + STEREO_NUM_MODES, +}; + +enum +{ + CODEC_IDX_AUDIO = 0, + CODEC_IDX_VOICE, +}; + +enum +{ + DSP_MYDSP = 1, + DSP_SET_FREQUENCY, + DSP_SWITCH_FREQUENCY, + DSP_SET_SAMPLE_DEPTH, + DSP_SET_STEREO_MODE, + DSP_RESET, + DSP_FLUSH, + DSP_SET_TRACK_GAIN, + DSP_SET_ALBUM_GAIN, + DSP_SET_TRACK_PEAK, + DSP_SET_ALBUM_PEAK, + DSP_CROSSFEED +}; + + +/**************************************************************************** + * NOTE: Any assembly routines that use these structures must be updated + * if current data members are moved or changed. + */ +struct resample_data +{ + uint32_t delta; /* 00h */ + uint32_t phase; /* 04h */ + int32_t last_sample[2]; /* 08h */ + /* 10h */ +}; + +/* This is for passing needed data to external dsp routines. If another + * dsp parameter needs to be passed, add to the end of the structure + * and remove from dsp_config. + * If another function type becomes assembly/external and requires dsp + * config info, add a pointer paramter of type "struct dsp_data *". + * If removing something from other than the end, reserve the spot or + * else update every implementation for every target. + * Be sure to add the offset of the new member for easy viewing as well. :) + * It is the first member of dsp_config and all members can be accessesed + * through the main aggregate but this is intended to make a safe haven + * for these items whereas the c part can be rearranged at will. dsp_data + * could even moved within dsp_config without disurbing the order. + */ +struct dsp_data +{ + int output_scale; /* 00h */ + int num_channels; /* 04h */ + struct resample_data resample_data; /* 08h */ + int32_t clip_min; /* 18h */ + int32_t clip_max; /* 1ch */ + int32_t gain; /* 20h - Note that this is in S8.23 format. */ + int frac_bits; /* 24h */ + /* 28h */ +}; + +struct dsp_config; + +int dsp_process(struct dsp_config *dsp, char *dest, + const char *src[], int count); +int dsp_input_count(struct dsp_config *dsp, int count); +int dsp_output_count(struct dsp_config *dsp, int count); +intptr_t dsp_configure(struct dsp_config *dsp, int setting, + intptr_t value); +int get_replaygain_mode(bool have_track_gain, bool have_album_gain); +void dsp_set_replaygain(void); +void dsp_set_crossfeed(bool enable); +void dsp_set_crossfeed_direct_gain(int gain); +void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, + long cutoff); +void dsp_set_eq(bool enable); +void dsp_set_eq_precut(int precut); +void dsp_set_eq_coefs(int band); +void dsp_dither_enable(bool enable); +void dsp_timestretch_enable(bool enable); +bool dsp_timestretch_available(void); +void sound_set_pitch(int32_t r); +int32_t sound_get_pitch(void); +void dsp_set_timestretch(int32_t percent); +int32_t dsp_get_timestretch(void); +int dsp_callback(int msg, intptr_t param); +void dsp_set_compressor(void); + +#endif diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S new file mode 100644 index 0000000000..7e360749a3 --- /dev/null +++ b/lib/rbcodec/dsp/dsp_arm.S @@ -0,0 +1,561 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006-2007 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + #include "config.h" + +/**************************************************************************** + * void channels_process_sound_chan_mono(int count, int32_t *buf[]) + */ + +#include "config.h" + + .section .icode, "ax", %progbits + .align 2 + .global channels_process_sound_chan_mono + .type channels_process_sound_chan_mono, %function +channels_process_sound_chan_mono: + @ input: r0 = count, r1 = buf + stmfd sp!, { r4, lr } @ + @ + ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1] + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + beq .mono_singlesample @ Zero? Only one sample! + @ +.monoloop: @ + ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1 + ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1 + mov r3, r3, asr #1 @ Mo0 = Li0 / 2 + Ri0 / 2 + mov r4, r4, asr #1 @ Mo1 = Li1 / 2 + Ri1 / 2 + add r12, r3, r12, asr #1 @ + add r14, r4, r14, asr #1 @ + subs r0, r0, #2 @ + stmia r1!, { r12, r14 } @ store Mo0, Mo1 + stmia r2!, { r12, r14 } @ store Mo0, Mo1 + bgt .monoloop @ + @ + ldmpc cond=lt, regs=r4 @ if count was even, we're done + @ +.mono_singlesample: @ + ldr r3, [r1] @ r3 = Ls + ldr r12, [r2] @ r12 = Rs + mov r3, r3, asr #1 @ Mo = Ls / 2 + Rs / 2 + add r12, r3, r12, asr #1 @ + str r12, [r1] @ store Mo + str r12, [r2] @ store Mo + @ + ldmpc regs=r4 @ + .size channels_process_sound_chan_mono, \ + .-channels_process_sound_chan_mono + +/**************************************************************************** + * void channels_process_sound_chan_custom(int count, int32_t *buf[]) + */ + .section .icode, "ax", %progbits + .align 2 + .global channels_process_sound_chan_custom + .type channels_process_sound_chan_custom, %function +channels_process_sound_chan_custom: + stmfd sp!, { r4-r10, lr } + + ldr r3, =dsp_sw_gain + ldr r4, =dsp_sw_cross + + ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1] + ldr r3, [r3] @ r3 = dsp_sw_gain + ldr r4, [r4] @ r4 = dsp_sw_cross + + subs r0, r0, #1 + beq .custom_single_sample @ Zero? Only one sample! + +.custom_loop: + ldmia r1, { r5, r6 } @ r5 = Li0, r6 = Li1 + ldmia r2, { r7, r8 } @ r7 = Ri0, r8 = Ri1 + + subs r0, r0, #2 + + smull r9, r10, r5, r3 @ Lc0 = Li0*gain + smull r12, r14, r7, r3 @ Rc0 = Ri0*gain + smlal r9, r10, r7, r4 @ Lc0 += Ri0*cross + smlal r12, r14, r5, r4 @ Rc0 += Li0*cross + + mov r9, r9, lsr #31 @ Convert to s0.31 + mov r12, r12, lsr #31 + orr r5, r9, r10, asl #1 + orr r7, r12, r14, asl #1 + + smull r9, r10, r6, r3 @ Lc1 = Li1*gain + smull r12, r14, r8, r3 @ Rc1 = Ri1*gain + smlal r9, r10, r8, r4 @ Lc1 += Ri1*cross + smlal r12, r14, r6, r4 @ Rc1 += Li1*cross + + mov r9, r9, lsr #31 @ Convert to s0.31 + mov r12, r12, lsr #31 + orr r6, r9, r10, asl #1 + orr r8, r12, r14, asl #1 + + stmia r1!, { r5, r6 } @ Store Lc0, Lc1 + stmia r2!, { r7, r8 } @ Store Rc0, Rc1 + + bgt .custom_loop + + ldmpc cond=lt, regs=r4-r10 @ < 0? even count + +.custom_single_sample: + ldr r5, [r1] @ handle odd sample + ldr r7, [r2] + + smull r9, r10, r5, r3 @ Lc0 = Li0*gain + smull r12, r14, r7, r3 @ Rc0 = Ri0*gain + smlal r9, r10, r7, r4 @ Lc0 += Ri0*cross + smlal r12, r14, r5, r4 @ Rc0 += Li0*cross + + mov r9, r9, lsr #31 @ Convert to s0.31 + mov r12, r12, lsr #31 + orr r5, r9, r10, asl #1 + orr r7, r12, r14, asl #1 + + str r5, [r1] @ Store Lc0 + str r7, [r2] @ Store Rc0 + + ldmpc regs=r4-r10 + .size channels_process_sound_chan_custom, \ + .-channels_process_sound_chan_custom + +/**************************************************************************** + * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) + */ + .section .icode, "ax", %progbits + .align 2 + .global channels_process_sound_chan_karaoke + .type channels_process_sound_chan_karaoke, %function +channels_process_sound_chan_karaoke: + @ input: r0 = count, r1 = buf + stmfd sp!, { r4, lr } @ + @ + ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1] + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + beq .karaoke_singlesample @ Zero? Only one sample! + @ +.karaokeloop: @ + ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1 + ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1 + mov r3, r3, asr #1 @ Lo0 = Li0 / 2 - Ri0 / 2 + mov r4, r4, asr #1 @ Lo1 = Li1 / 2 - Ri1 / 2 + sub r3, r3, r12, asr #1 @ + sub r4, r4, r14, asr #1 @ + rsb r12, r3, #0 @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2 + rsb r14, r4, #0 @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2 + subs r0, r0, #2 @ + stmia r1!, { r3, r4 } @ store Lo0, Lo1 + stmia r2!, { r12, r14 } @ store Ro0, Ro1 + bgt .karaokeloop @ + @ + ldmpc cond=lt, regs=r4 @ if count was even, we're done + @ +.karaoke_singlesample: @ + ldr r3, [r1] @ r3 = Li + ldr r12, [r2] @ r12 = Ri + mov r3, r3, asr #1 @ Lk = Li / 2 - Ri /2 + sub r3, r3, r12, asr #1 @ + rsb r12, r3, #0 @ Rk = -Lo = Ri / 2 - Li / 2 + str r3, [r1] @ store Lo + str r12, [r2] @ store Ro + @ + ldmpc regs=r4 @ + .size channels_process_sound_chan_karaoke, \ + .-channels_process_sound_chan_karaoke + +#if ARM_ARCH < 6 +/**************************************************************************** + * void sample_output_mono(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .icode, "ax", %progbits + .align 2 + .global sample_output_mono + .type sample_output_mono, %function +sample_output_mono: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4-r6, lr } + + ldr r1, [r1] @ lr = data->output_scale + ldr r2, [r2] @ r2 = src[0] + + mov r4, #1 + mov r4, r4, lsl r1 @ r4 = 1 << (scale-1) + mov r4, r4, lsr #1 + mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for + @ clipping and masking + subs r0, r0, #1 @ + beq .som_singlesample @ Zero? Only one sample! + +.somloop: + ldmia r2!, { r5, r6 } + add r5, r5, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale + mov r5, r5, asr r1 + mov r12, r5, asr #15 + teq r12, r12, asr #31 + eorne r5, r14, r5, asr #31 @ Clip (-32768...+32767) + add r6, r6, r4 + mov r6, r6, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale + mov r12, r6, asr #15 + teq r12, r12, asr #31 + eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) + + and r5, r5, r14, lsr #16 + and r6, r6, r14, lsr #16 + orr r5, r5, r5, lsl #16 @ pack first 2 halfwords into 1 word + orr r6, r6, r6, lsl #16 @ pack last 2 halfwords into 1 word + stmia r3!, { r5, r6 } + + subs r0, r0, #2 + bgt .somloop + + ldmpc cond=lt, regs=r4-r6 @ even 'count'? return + +.som_singlesample: + ldr r5, [r2] @ do odd sample + add r5, r5, r4 + mov r5, r5, asr r1 + mov r12, r5, asr #15 + teq r12, r12, asr #31 + eorne r5, r14, r5, asr #31 + + and r5, r5, r14, lsr #16 @ pack 2 halfwords into 1 word + orr r5, r5, r5, lsl #16 + str r5, [r3] + + ldmpc regs=r4-r6 + .size sample_output_mono, .-sample_output_mono + +/**************************************************************************** + * void sample_output_stereo(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .icode, "ax", %progbits + .align 2 + .global sample_output_stereo + .type sample_output_stereo, %function +sample_output_stereo: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4-r9, lr } + + ldr r1, [r1] @ r1 = data->output_scale + ldmia r2, { r2, r5 } @ r2 = src[0], r5 = src[1] + + mov r4, #1 + mov r4, r4, lsl r1 @ r4 = 1 << (scale-1) + mov r4, r4, lsr #1 @ + + mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for + @ clipping and masking + subs r0, r0, #1 @ + beq .sos_singlesample @ Zero? Only one sample! + +.sosloop: + ldmia r2!, { r6, r7 } @ 2 left + ldmia r5!, { r8, r9 } @ 2 right + + add r6, r6, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale + mov r6, r6, asr r1 + mov r12, r6, asr #15 + teq r12, r12, asr #31 + eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) + add r7, r7, r4 + mov r7, r7, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale + mov r12, r7, asr #15 + teq r12, r12, asr #31 + eorne r7, r14, r7, asr #31 @ Clip (-32768...+32767) + + add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale + mov r8, r8, asr r1 + mov r12, r8, asr #15 + teq r12, r12, asr #31 + eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767) + add r9, r9, r4 @ r9 = (r9 + 1<<(scale-1)) >> scale + mov r9, r9, asr r1 + mov r12, r9, asr #15 + teq r12, r12, asr #31 + eorne r9, r14, r9, asr #31 @ Clip (-32768...+32767) + + and r6, r6, r14, lsr #16 @ pack first 2 halfwords into 1 word + orr r8, r6, r8, asl #16 + and r7, r7, r14, lsr #16 @ pack last 2 halfwords into 1 word + orr r9, r7, r9, asl #16 + + stmia r3!, { r8, r9 } + + subs r0, r0, #2 + bgt .sosloop + + ldmpc cond=lt, regs=r4-r9 @ even 'count'? return + +.sos_singlesample: + ldr r6, [r2] @ left odd sample + ldr r8, [r5] @ right odd sample + + add r6, r6, r4 @ r6 = (r7 + 1<<(scale-1)) >> scale + mov r6, r6, asr r1 + mov r12, r6, asr #15 + teq r12, r12, asr #31 + eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) + add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale + mov r8, r8, asr r1 + mov r12, r8, asr #15 + teq r12, r12, asr #31 + eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767) + + and r6, r6, r14, lsr #16 @ pack 2 halfwords into 1 word + orr r8, r6, r8, asl #16 + + str r8, [r3] + + ldmpc regs=r4-r9 + .size sample_output_stereo, .-sample_output_stereo +#endif /* ARM_ARCH < 6 */ + +/**************************************************************************** + * void apply_crossfeed(int count, int32_t* src[]) + */ + .section .text + .global apply_crossfeed +apply_crossfeed: + @ unfortunately, we ended up in a bit of a register squeeze here, and need + @ to keep the count on the stack :/ + stmdb sp!, { r4-r11, lr } @ stack modified regs + ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1] + + ldr r1, =crossfeed_data + ldmia r1!, { r4-r11 } @ load direct gain and filter data + mov r12, r0 @ better to ldm delay + count later + add r0, r1, #13*4*2 @ calculate end of delay + stmdb sp!, { r0, r12 } @ stack end of delay adr and count + ldr r0, [r1, #13*4*2] @ fetch current delay line address + + /* Register usage in loop: + * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1], + * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs), + * r8-r11 = filter history, r12 = temp, r14 = accumulator low + */ +.cfloop: + smull r14, r1, r6, r8 @ acc = b1*dr[n - 1] + smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1] + ldr r8, [r0, #4] @ r8 = dr[n] + smlal r14, r1, r5, r8 @ acc += b0*dr[n] + mov r9, r1, lsl #1 @ fix format for filter history + ldr r12, [r2] @ load left input + smlal r14, r1, r4, r12 @ acc += gain*x_l[n] + mov r1, r1, lsl #1 @ fix format + str r1, [r2], #4 @ save result + + smull r14, r1, r6, r10 @ acc = b1*dl[n - 1] + smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1] + ldr r10, [r0] @ r10 = dl[n] + str r12, [r0], #4 @ save left input to delay line + smlal r14, r1, r5, r10 @ acc += b0*dl[n] + mov r11, r1, lsl #1 @ fix format for filter history + ldr r12, [r3] @ load right input + smlal r14, r1, r4, r12 @ acc += gain*x_r[n] + str r12, [r0], #4 @ save right input to delay line + mov r1, r1, lsl #1 @ fix format + ldmia sp, { r12, r14 } @ fetch delay line end addr and count from stack + str r1, [r3], #4 @ save result + + cmp r0, r12 @ need to wrap to start of delay? + subeq r0, r0, #13*4*2 @ wrap back delay line ptr to start + + subs r14, r14, #1 @ are we finished? + strne r14, [sp, #4] @ nope, save count back to stack + bne .cfloop + + @ save data back to struct + ldr r12, =crossfeed_data + 4*4 + stmia r12, { r8-r11 } @ save filter history + str r0, [r12, #30*4] @ save delay line index + add sp, sp, #8 @ remove temp variables from stack + ldmpc regs=r4-r11 + .size apply_crossfeed, .-apply_crossfeed + +/**************************************************************************** + * int dsp_downsample(int count, struct dsp_data *data, + * in32_t *src[], int32_t *dst[]) + */ + .section .text + .global dsp_downsample +dsp_downsample: + stmdb sp!, { r4-r11, lr } @ stack modified regs + ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta + sub r5, r5, #1 @ pre-decrement num_channels for use + add r4, r1, #12 @ r4 = &resample_data.phase + mov r12, #0xff + orr r12, r12, #0xff00 @ r12 = 0xffff +.dschannel_loop: + ldr r1, [r4] @ r1 = resample_data.phase + ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] + ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] + add r9, r4, #4 @ r9 = &last_sample[0] + ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] + sub r11, r0, #1 + ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... + str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample + movs r9, r1, lsr #16 @ r9 = pos = phase >> 16 + ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop + beq .dsuse_last_start + cmp r9, r0 @ if pos >= count, we're already done + bge .dsloop_skip + + @ Register usage in loop: + @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, + @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos] +.dsloop: + add r9, r7, r9, lsl #2 @ r9 = &s[pos] + ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos] +.dsuse_last_start: + sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1] + @ keep frac in lower bits to take advantage of multiplier early termination + and r9, r1, r12 @ frac = phase & 0xffff + smull r9, r14, r11, r9 + add r1, r1, r6 @ phase += delta + add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff + add r10, r10, r14, lsl #16 + str r10, [r8], #4 @ *d++ = out + mov r9, r1, lsr #16 @ pos = phase >> 16 + cmp r9, r0 @ pos < count? + blt .dsloop @ yup, do more samples +.dsloop_skip: + subs r5, r5, #1 + bpl .dschannel_loop @ if (--ch) >= 0, do another channel + sub r1, r1, r0, lsl #16 @ wrap phase back to start + str r1, [r4] @ store back + ldr r1, [r3] @ r1 = &dst[0] + sub r8, r8, r1 @ dst - &dst[0] + mov r0, r8, lsr #2 @ convert bytes->samples + ldmpc regs=r4-r11 @ ... and we're out + .size dsp_downsample, .-dsp_downsample + +/**************************************************************************** + * int dsp_upsample(int count, struct dsp_data *dsp, + * in32_t *src[], int32_t *dst[]) + */ + .section .text + .global dsp_upsample +dsp_upsample: + stmfd sp!, { r4-r11, lr } @ stack modified regs + ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta + sub r5, r5, #1 @ pre-decrement num_channels for use + add r4, r1, #12 @ r4 = &resample_data.phase + mov r6, r6, lsl #16 @ we'll use carry to detect pos increments + stmfd sp!, { r0, r4 } @ stack count and &resample_data.phase +.uschannel_loop: + ldr r12, [r4] @ r12 = resample_data.phase + ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] + ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] + add r9, r4, #4 @ r9 = &last_sample[0] + mov r1, r12, lsl #16 @ we'll use carry to detect pos increments + sub r11, r0, #1 + ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... + ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] + str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample + movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16 + beq .usstart_0 @ pos = 0 + cmp r14, r0 @ if pos >= count, we're already done + bge .usloop_skip + add r7, r7, r14, lsl #2 @ r7 = &s[pos] + ldr r10, [r7, #-4] @ r11 = s[pos - 1] + b .usstart_0 + + @ Register usage in loop: + @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, + @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos] +.usloop_1: + mov r10, r11 @ r10 = previous sample +.usstart_0: + ldr r11, [r7], #4 @ r11 = next sample + mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 + sub r9, r11, r10 @ r9 = diff = s[pos] - s[pos - 1] +.usloop_0: + smull r12, r14, r4, r9 + adds r1, r1, r6 @ phase += delta << 16 + mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 + add r14, r10, r14, lsl #16 + add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff + str r14, [r8], #4 @ *d++ = out + bcc .usloop_0 @ if carry is set, pos is incremented + subs r0, r0, #1 @ if count > 0, do another sample + bgt .usloop_1 +.usloop_skip: + subs r5, r5, #1 + ldmfd sp, { r0, r4 } @ reload count and &resample_data.phase + bpl .uschannel_loop @ if (--ch) >= 0, do another channel + mov r1, r1, lsr #16 @ wrap phase back to start of next frame + ldr r2, [r3] @ r1 = &dst[0] + str r1, [r4] @ store phase + sub r8, r8, r2 @ dst - &dst[0] + mov r0, r8, lsr #2 @ convert bytes->samples + add sp, sp, #8 @ adjust stack for temp variables + ldmpc regs=r4-r11 @ ... and we're out + .size dsp_upsample, .-dsp_upsample + +/**************************************************************************** + * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) + */ + .section .icode, "ax", %progbits + .align 2 + .global dsp_apply_gain + .type dsp_apply_gain, %function +dsp_apply_gain: + @ input: r0 = count, r1 = data, r2 = buf[] + stmfd sp!, { r4-r8, lr } + + ldr r3, [r1, #4] @ r3 = data->num_channels + ldr r4, [r1, #32] @ r5 = data->gain + +.dag_outerloop: + ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[] + subs r12, r0, #1 @ r12 = r0 = count - 1 + beq .dag_singlesample @ Zero? Only one sample! + +.dag_innerloop: + ldmia r1, { r5, r6 } @ load r5, r6 from r1 + smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8) + smull r14, r5, r6, r4 @ r14 = FRACMUL_SHL(r6, r4, 8) + subs r12, r12, #2 + mov r7, r7, lsr #23 + mov r14, r14, lsr #23 + orr r7, r7, r8, asl #9 + orr r14, r14, r5, asl #9 + stmia r1!, { r7, r14 } @ save r7, r14 to [r1] and increment r1 + bgt .dag_innerloop @ end of inner loop + + blt .dag_evencount @ < 0? even count + +.dag_singlesample: + ldr r5, [r1] @ handle odd sample + smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8) + mov r7, r7, lsr #23 + orr r7, r7, r8, asl #9 + str r7, [r1] + +.dag_evencount: + subs r3, r3, #1 + bgt .dag_outerloop @ end of outer loop + + ldmpc regs=r4-r8 + .size dsp_apply_gain, .-dsp_apply_gain diff --git a/lib/rbcodec/dsp/dsp_arm_v6.S b/lib/rbcodec/dsp/dsp_arm_v6.S new file mode 100644 index 0000000000..39949498ea --- /dev/null +++ b/lib/rbcodec/dsp/dsp_arm_v6.S @@ -0,0 +1,127 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2010 Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/**************************************************************************** + * void sample_output_mono(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .text, "ax", %progbits + .align 2 + .global sample_output_mono + .type sample_output_mono, %function +sample_output_mono: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4, lr } @ + @ + ldr r1, [r1] @ r1 = data->output_scale + ldr r2, [r2] @ r2 = src[0] + @ + mov r4, #1 @ r4 = 1 << (scale - 1) + mov r4, r4, lsl r1 @ + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + mov r4, r4, lsr #1 @ + beq 2f @ Zero? Only one sample! + @ +1: @ + ldmia r2!, { r12, r14 } @ load Mi0, Mi1 + qadd r12, r12, r4 @ round, scale, saturate and + qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1 + mov r12, r12, asr r1 @ + mov r14, r14, asr r1 @ + ssat r12, #16, r12 @ + ssat r14, #16, r14 @ + pkhbt r12, r12, r12, asl #16 @ + pkhbt r14, r14, r14, asl #16 @ + subs r0, r0, #2 @ + stmia r3!, { r12, r14 } @ store So0, So1 + bgt 1b @ + @ + ldmltfd sp!, { r4, pc } @ if count was even, we're done + @ +2: @ + ldr r12, [r2] @ round, scale, saturate + qadd r12, r12, r4 @ and pack Mi to So + mov r12, r12, asr r1 @ + ssat r12, #16, r12 @ + pkhbt r12, r12, r12, asl #16 @ + str r12, [r3] @ store So + @ + ldmfd sp!, { r4, pc } @ + .size sample_output_mono, .-sample_output_mono + +/**************************************************************************** + * void sample_output_stereo(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + */ + .section .text, "ax", %progbits + .align 2 + .global sample_output_stereo + .type sample_output_stereo, %function +sample_output_stereo: + @ input: r0 = count, r1 = data, r2 = src, r3 = dst + stmfd sp!, { r4-r7, lr } @ + @ + ldr r1, [r1] @ r1 = data->output_scale + ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1] + @ + mov r5, #1 @ r5 = 1 << (scale - 1) + mov r5, r5, lsl r1 @ + subs r0, r0, #1 @ odd: end at 0; even: end at -1 + mov r5, r5, lsr #1 @ + beq 2f @ Zero? Only one sample! + @ +1: @ + ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1 + ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1 + qadd r6, r6, r5 @ round, scale, saturate and pack + qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1 + qadd r12, r12, r5 @ + qadd r14, r14, r5 @ + mov r6, r6, asr r1 @ + mov r7, r7, asr r1 @ + mov r12, r12, asr r1 @ + mov r14, r14, asr r1 @ + ssat r6, #16, r6 @ + ssat r12, #16, r12 @ + ssat r7, #16, r7 @ + ssat r14, #16, r14 @ + pkhbt r6, r6, r12, asl #16 @ + pkhbt r7, r7, r14, asl #16 @ + subs r0, r0, #2 @ + stmia r3!, { r6, r7 } @ store So0, So1 + bgt 1b @ + @ + ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done + @ +2: @ + ldr r6, [r2] @ r6 = Li + ldr r12, [r4] @ r12 = Ri + qadd r6, r6, r5 @ round, scale, saturate + qadd r12, r12, r5 @ and pack Li+Ri to So + mov r6, r6, asr r1 @ + mov r12, r12, asr r1 @ + ssat r6, #16, r6 @ + ssat r12, #16, r12 @ + pkhbt r6, r6, r12, asl #16 @ + str r6, [r3] @ store So + @ + ldmfd sp!, { r4-r7, pc } @ + .size sample_output_stereo, .-sample_output_stereo diff --git a/lib/rbcodec/dsp/dsp_asm.h b/lib/rbcodec/dsp/dsp_asm.h new file mode 100644 index 0000000000..7bf18370a3 --- /dev/null +++ b/lib/rbcodec/dsp/dsp_asm.h @@ -0,0 +1,86 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include + +#ifndef _DSP_ASM_H +#define _DSP_ASM_H + +/* Set the appropriate #defines based on CPU or whatever matters */ +#if defined(CPU_ARM) +#define DSP_HAVE_ASM_APPLY_GAIN +#define DSP_HAVE_ASM_RESAMPLING +#define DSP_HAVE_ASM_CROSSFEED +#define DSP_HAVE_ASM_SOUND_CHAN_MONO +#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM +#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE +#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO +#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO +#elif defined (CPU_COLDFIRE) +#define DSP_HAVE_ASM_APPLY_GAIN +#define DSP_HAVE_ASM_RESAMPLING +#define DSP_HAVE_ASM_CROSSFEED +#define DSP_HAVE_ASM_SOUND_CHAN_MONO +#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM +#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE +#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO +#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO +#endif /* CPU_COLDFIRE */ + +/* Declare prototypes based upon what's #defined above */ +#ifdef DSP_HAVE_ASM_CROSSFEED +void apply_crossfeed(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_APPLY_GAIN +void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]); +#endif /* DSP_HAVE_ASM_APPLY_GAIN* */ + +#ifdef DSP_HAVE_ASM_RESAMPLING +int dsp_upsample(int count, struct dsp_data *data, + const int32_t *src[], int32_t *dst[]); +int dsp_downsample(int count, struct dsp_data *data, + const int32_t *src[], int32_t *dst[]); +#endif /* DSP_HAVE_ASM_RESAMPLING */ + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO +void channels_process_sound_chan_mono(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM +void channels_process_sound_chan_custom(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE +void channels_process_sound_chan_karaoke(int count, int32_t *buf[]); +#endif + +#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO +void sample_output_stereo(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst); +#endif + +#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO +void sample_output_mono(int count, struct dsp_data *data, + const int32_t *src[], int16_t *dst); +#endif + +#endif /* _DSP_ASM_H */ diff --git a/lib/rbcodec/dsp/dsp_cf.S b/lib/rbcodec/dsp/dsp_cf.S new file mode 100644 index 0000000000..cda811a7d5 --- /dev/null +++ b/lib/rbcodec/dsp/dsp_cf.S @@ -0,0 +1,611 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Thom Johansen + * Portions Copyright (C) 2007 Michael Sevakis + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/**************************************************************************** + * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) + */ + .section .text + .align 2 + .global dsp_apply_gain +dsp_apply_gain: + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | + movem.l 28(%sp), %a0-%a1 | %a0 = data, + | %a1 = buf + move.l 4(%a0), %d1 | %d1 = data->num_channels + move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23) +10: | channel loop | + move.l 24(%sp), %d0 | %d0 = count + move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1] + move.l %a2, %a3 | %a3 = d = s + move.l (%a2)+, %d2 | %d2 = *s++, + mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) + subq.l #1, %d0 | --count > 0 ? : effectively n++ + ble.b 30f | loop done | no? finish up +20: | loop | + move.l %accext01, %d4 | fetch S(n-1)[7:0] + movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] + asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0] + mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) + move.b %d4, %d3 | + move.l %d3, (%a3)+ | + subq.l #1, %d0 | --count > 0 ? : effectively n++ + bgt.b 20b | loop | yes? do more samples +30: | loop done | + move.l %accext01, %d4 | fetch S(n-1)[7:0] + movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] + asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0] + move.b %d4, %d3 | + move.l %d3, (%a3) | + subq.l #1, %d1 | next channel + bgt.b 10b | channel loop | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup stack + rts | + .size dsp_apply_gain,.-dsp_apply_gain + +/**************************************************************************** + * void apply_crossfeed(int count, int32_t *buf[]) + */ + .section .text + .align 2 + .global apply_crossfeed +apply_crossfeed: + lea.l -44(%sp), %sp | + movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs + movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src + movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] + lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data + move.l (%a1)+, %d6 | %d6 = direct gain + movem.l 12(%a1), %d0-%d3 | fetch filter history samples + move.l 132(%a1), %a0 | fetch delay line address + movem.l (%a1), %a1-%a3 | load filter coefs + lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit + bra.b 20f | loop start | go to loop start point + /* Register usage in loop: + * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), + * %a4 = buf[0], %a5 = buf[1], + * %a6 = delay line pointer wrap limit, + * %d0..%d3 = history + * %d4..%d5 = temp. + * %d6 = direct gain, + * %d7 = count + */ +10: | loop | + movclr.l %acc0, %d4 | write outputs + move.l %d4, (%a4)+ | . + movclr.l %acc1, %d5 | . + move.l %d5, (%a5)+ | . +20: | loop start | + mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n] + mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n] + mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R + mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n] + mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n] + mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L + movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line + move.l %acc0, %d3 | get filtered delayed left sample (y_l[n]) + move.l %acc1, %d1 | get filtered delayed right sample (y_r[n]) + mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n] + mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] + cmp.l %a6, %a0 | wrap %a0 if passed end + bhs.b 30f | wrap buffer | + .word 0x51fb | tpf.l | trap the buffer wrap +30: | wrap buffer | ...fwd taken branches more costly + lea.l -104(%a0), %a0 | wrap it up + subq.l #1, %d7 | --count > 0 ? + bgt.b 10b | loop | yes? do more + movclr.l %acc0, %d4 | write last outputs + move.l %d4, (%a4) | . + movclr.l %acc1, %d5 | . + move.l %d5, (%a5) | . + lea.l crossfeed_data+16, %a1 | save data back to struct + movem.l %d0-%d3, (%a1) | ...history + move.l %a0, 120(%a1) | ...delay_p + movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs + lea.l 44(%sp), %sp | + rts | + .size apply_crossfeed,.-apply_crossfeed + +/**************************************************************************** + * int dsp_downsample(int count, struct dsp_data *data, + * in32_t *src[], int32_t *dst[]) + */ + .section .text + .align 2 + .global dsp_downsample +dsp_downsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count + | %a0 = data + | %a1 = src + | %a2 = dst + movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels + | %d4 = delta = data->resample_data.delta + moveq.l #16, %d7 | %d7 = shift +10: | channel loop | + move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase + move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] + move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] + lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1] + move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] + move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1] + move.l %d5, %d6 | %d6 = pos = phase >> 16 + lsr.l %d7, %d6 | + cmp.l %d2, %d6 | past end of samples? + bge.b 40f | skip resample loop| yes? skip loop + tst.l %d6 | need last sample of prev. frame? + bne.b 20f | resample loop | no? start main loop + move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] + bra.b 30f | resample start last | start with last (last in %d0) +20: | resample loop | + lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] + movem.l (%a5), %d0-%d1 | +30: | resample start last | + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] + move.l %d0, %acc0 | %acc0 = previous sample + move.l %d5, %d0 | frac = (phase << 16) >> 1 + lsl.l %d7, %d0 | + lsr.l #1, %d0 | + mac.l %d0, %d1, %acc0 | %acc0 += frac * diff + add.l %d4, %d5 | phase += delta + move.l %d5, %d6 | pos = phase >> 16 + lsr.l %d7, %d6 | + movclr.l %acc0, %d0 | + move.l %d0, (%a4)+ | *d++ = %d0 + cmp.l %d2, %d6 | pos < count? + blt.b 20b | resample loop | yes? continue resampling +40: | skip resample loop | + subq.l #1, %d3 | ch > 0? + bgt.b 10b | channel loop | yes? process next channel + lsl.l %d7, %d2 | wrap phase to start of next frame + sub.l %d2, %d5 | data->resample_data.phase = + move.l %d5, 12(%a0) | ... phase - (count << 16) + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + asr.l #2, %d0 | convert bytes->samples + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye + .size dsp_downsample,.-dsp_downsample + +/**************************************************************************** + * int dsp_upsample(int count, struct dsp_data *dsp, + * const int32_t *src[], int32_t *dst[]) + */ + .section .text + .align 2 + .global dsp_upsample +dsp_upsample: + lea.l -40(%sp), %sp | save non-clobberables + movem.l %d2-%d7/%a2-%a5, (%sp) | + movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count + | %a0 = data + | %a1 = src + | %a2 = dst + movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels + | %d4 = delta = data->resample_data.delta + swap %d4 | swap delta to high word to use... + | ...carries to increment position +10: | channel loop | + move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase + move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] + lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] + lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1] + move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] + move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] + move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] + move.l (%a3)+, %d1 | fetch first sample - might throw this... + | ...away later but we'll be preincremented + move.l %d1, %d6 | save sample value + sub.l %d0, %d1 | %d1 = diff = s[0] - last + swap %d5 | swap phase to high word to use + | carries to increment position + move.l %d5, %d7 | %d7 = pos = phase >> 16 + clr.w %d5 | + eor.l %d5, %d7 | pos == 0? + beq.b 40f | loop start | yes? start loop + cmp.l %d2, %d7 | past end of samples? + bge.b 50f | skip resample loop| yes? go to next channel and collect info + lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1] + movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos] + move.l %d1, %d6 | save sample value + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] + bra.b 40f | loop start | +20: | next sample loop | + move.l %d6, %d0 | move previous sample to %d0 + move.l (%a3)+, %d1 | fetch next sample + move.l %d1, %d6 | save sample value + sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] +30: | same sample loop | + movclr.l %acc0, %d7 | %d7 = result + move.l %d7, (%a4)+ | *d++ = %d7 +40: | loop start | + lsr.l #1, %d5 | make phase into frac + move.l %d0, %acc0 | %acc0 = s[pos-1] + mac.l %d1, %d5, %acc0 | %acc0 = diff * frac + lsl.l #1, %d5 | restore frac to phase + add.l %d4, %d5 | phase += delta + bcc.b 30b | same sample loop | load next values? + cmp.l %a5, %a3 | src <= src_end? + bls.b 20b | next sample loop | yes? continue resampling + movclr.l %acc0, %d7 | %d7 = result + move.l %d7, (%a4)+ | *d++ = %d7 +50: | skip resample loop | + subq.l #1, %d3 | ch > 0? + bgt.b 10b | channel loop | yes? process next channel + swap %d5 | wrap phase to start of next frame + move.l %d5, 12(%a0) | ...and save in data->resample_data.phase + move.l %a4, %d0 | return d - d[0] + sub.l (%a2), %d0 | + movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables + asr.l #2, %d0 | convert bytes->samples + lea.l 40(%sp), %sp | cleanup stack + rts | buh-bye + .size dsp_upsample,.-dsp_upsample + +/**************************************************************************** + * void channels_process_sound_chan_mono(int count, int32_t *buf[]) + * + * Mix left and right channels 50/50 into a center channel. + */ + .section .text + .align 2 + .global channels_process_sound_chan_mono +channels_process_sound_chan_mono: + movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | + movem.l (%a0), %a0-%a1 | get channel pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write + move.l #0x40000000, %d3 | %d3 = 0.5 + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc0 | + subq.l #1, %d0 | + ble.s 20f | loop done | +10: | loop | + movclr.l %acc0, %d4 | L = R = l/2 + r/2 + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc0 | + move.l %d4, (%a2)+ | output to original buffer + move.l %d4, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d4 | output last sample + move.l %d4, (%a2) | + move.l %d4, (%a3) | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_mono, \ + .-channels_process_sound_chan_mono + +/**************************************************************************** + * void channels_process_sound_chan_custom(int count, int32_t *buf[]) + * + * Apply stereo width (narrowing/expanding) effect. + */ + .section .text + .align 2 + .global channels_process_sound_chan_custom +channels_process_sound_chan_custom: + movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf + lea.l -28(%sp), %sp | save registers + movem.l %d2-%d6/%a2-%a3, (%sp) | + movem.l (%a0), %a0-%a1 | get channel pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write + move.l dsp_sw_gain, %d3 | load straight (mid) gain + move.l dsp_sw_cross, %d4 | load cross (side) gain + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross + mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross + mac.l %d2, %d4 , %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc1 | + subq.l #1, %d0 | + ble.b 20f | loop done | +10: | loop | + movclr.l %acc0, %d5 | + movclr.l %acc1, %d6 | + mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross + mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross + mac.l %d2, %d4 , %acc0 | + mac.l %d2, %d3, (%a1)+, %d2, %acc1 | + move.l %d5, (%a2)+ | + move.l %d6, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d5 | output last sample + movclr.l %acc1, %d6 | + move.l %d5, (%a2) | + move.l %d6, (%a3) | + movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers + lea.l 28(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_custom, \ + .-channels_process_sound_chan_custom + +/**************************************************************************** + * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) + * + * Separate channels into side channels. + */ + .section .text + .align 2 + .global channels_process_sound_chan_karaoke +channels_process_sound_chan_karaoke: + movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf + lea.l -20(%sp), %sp | save registers + movem.l %d2-%d4/%a2-%a3, (%sp) | + movem.l (%a0), %a0-%a1 | get channel src pointers + move.l %a0, %a2 | use separate dst pointers since read + move.l %a1, %a3 | pointers run one ahead of write + move.l #0x40000000, %d3 | %d3 = 0.5 + move.l (%a0)+, %d1 | prime the input registers + move.l (%a1)+, %d2 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 + msac.l %d2, %d3, (%a1)+, %d2, %acc0 | + subq.l #1, %d0 | + ble.b 20f | loop done | +10: | loop | + movclr.l %acc0, %d4 | + mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 + msac.l %d2, %d3, (%a1)+, %d2, %acc0 | + move.l %d4, (%a2)+ | + neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 + move.l %d4, (%a3)+ | + subq.l #1, %d0 | + bgt.s 10b | loop | +20: | loop done | + movclr.l %acc0, %d4 | output last sample + move.l %d4, (%a2) | + neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 + move.l %d4, (%a3) | + movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers + lea.l 20(%sp), %sp | cleanup + rts | + .size channels_process_sound_chan_karaoke, \ + .-channels_process_sound_chan_karaoke + +/**************************************************************************** + * void sample_output_stereo(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + * + * Framework based on the ubiquitous Rockbox line transfer logic for + * Coldfire CPUs. + * + * Does emac clamping and scaling (which proved faster than the usual + * checks and branches - even single test clamping) and writes using + * line burst transfers. Also better than writing a single L-R pair per + * loop but a good deal more code. + * + * Attemping bursting during reads is rather futile since the source and + * destination alignments rarely agree and too much complication will + * slow us up. The parallel loads seem to do a bit better at least until + * a pcm buffer can always give line aligned chunk and then aligning the + * dest can then imply the source is aligned if the source buffers are. + * For now longword alignment is assumed of both the source and dest. + * + */ + .section .text + .align 2 + .global sample_output_stereo +sample_output_stereo: + lea.l -48(%sp), %sp | save registers + move.l %macsr, %d1 | do it now as at many lines will + movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition + move.l #0x80, %macsr | put emac unit in signed int mode + movem.l 52(%sp), %a0-%a2/%a4 | + lea.l (%a4, %a0.l*4), %a0 | %a0 = end address + move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale)) + sub.l #16, %d1 | + neg.l %d1 | + moveq.l #1, %d0 | + asl.l %d1, %d0 | + move.l %d0, %a1 | + move.l #0x8000, %a6 | %a6 = rounding term + movem.l (%a2), %a2-%a3 | get L/R channel pointers + moveq.l #28, %d0 | %d0 = second line bound + add.l %a4, %d0 | + and.l #0xfffffff0, %d0 | + cmp.l %a0, %d0 | at least a full line? + bhi.w 40f | long loop 1 start | no? do as trailing longwords + sub.l #16, %d0 | %d1 = first line bound + cmp.l %a4, %d0 | any leading longwords? + bls.b 20f | line loop start | no? start line loop +10: | long loop 0 | + move.l (%a2)+, %d1 | read longword from L and R + move.l %a6, %acc0 | + move.l %acc0, %acc1 | + mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word + mac.l %d2, %a1, %acc1 | shift R to high word + movclr.l %acc0, %d1 | get possibly saturated results + movclr.l %acc1, %d2 | + swap %d2 | move R to low word + move.w %d2, %d1 | interleave MS 16 bits of each + move.l %d1, (%a4)+ | ...and write both + cmp.l %a4, %d0 | + bhi.b 10b | long loop 0 | +20: | line loop start | + lea.l -12(%a0), %a5 | %a5 = at or just before last line bound +30: | line loop | + move.l (%a3)+, %d4 | get next 4 R samples and scale + move.l %a6, %acc0 | + move.l %acc0, %acc1 | + move.l %acc1, %acc2 | + move.l %acc2, %acc3 | + mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation + mac.l %d5, %a1, (%a3)+, %d6, %acc1 | + mac.l %d6, %a1, (%a3)+, %d7, %acc2 | + mac.l %d7, %a1, (%a2)+, %d0, %acc3 | + lea.l 16(%a4), %a4 | increment dest here, mitigate stalls + movclr.l %acc0, %d4 | obtain R results + movclr.l %acc1, %d5 | + movclr.l %acc2, %d6 | + movclr.l %acc3, %d7 | + move.l %a6, %acc0 | + move.l %acc0, %acc1 | + move.l %acc1, %acc2 | + move.l %acc2, %acc3 | + mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale + mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation + mac.l %d2, %a1, (%a2)+, %d3, %acc2 | + mac.l %d3, %a1 , %acc3 | + swap %d4 | a) interleave most significant... + swap %d5 | + swap %d6 | + swap %d7 | + movclr.l %acc0, %d0 | obtain L results + movclr.l %acc1, %d1 | + movclr.l %acc2, %d2 | + movclr.l %acc3, %d3 | + move.w %d4, %d0 | a) ... 16 bits of L and R + move.w %d5, %d1 | + move.w %d6, %d2 | + move.w %d7, %d3 | + movem.l %d0-%d3, -16(%a4) | write four stereo samples + cmp.l %a4, %a5 | + bhi.b 30b | line loop | +40: | long loop 1 start | + cmp.l %a4, %a0 | any longwords left? + bls.b 60f | output end | no? stop +50: | long loop 1 | + move.l (%a2)+, %d1 | handle trailing longwords + move.l %a6, %acc0 | + move.l %acc0, %acc1 | + mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones + mac.l %d2, %a1, %acc1 | + movclr.l %acc0, %d1 | + movclr.l %acc1, %d2 | + swap %d2 | + move.w %d2, %d1 | + move.l %d1, (%a4)+ | + cmp.l %a4, %a0 | + bhi.b 50b | long loop 1 +60: | output end | + movem.l (%sp), %d1-%d7/%a2-%a6 | restore registers + move.l %d1, %macsr | + lea.l 48(%sp), %sp | cleanup + rts | + .size sample_output_stereo, .-sample_output_stereo + +/**************************************************************************** + * void sample_output_mono(int count, struct dsp_data *data, + * const int32_t *src[], int16_t *dst) + * + * Same treatment as sample_output_stereo but for one channel. + */ + .section .text + .align 2 + .global sample_output_mono +sample_output_mono: + lea.l -32(%sp), %sp | save registers + move.l %macsr, %d1 | do it now as at many lines will + movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition + move.l #0x80, %macsr | put emac unit in signed int mode + movem.l 36(%sp), %a0-%a3 | + lea.l (%a3, %a0.l*4), %a0 | %a0 = end address + move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale)) + sub.l #16, %d1 | + neg.l %d1 | + moveq.l #1, %d5 | + asl.l %d1, %d5 | + move.l #0x8000, %a4 | %a4 = rounding term + movem.l (%a2), %a2 | get source channel pointer + moveq.l #28, %d0 | %d0 = second line bound + add.l %a3, %d0 | + and.l #0xfffffff0, %d0 | + cmp.l %a0, %d0 | at least a full line? + bhi.w 40f | long loop 1 start | no? do as trailing longwords + sub.l #16, %d0 | %d1 = first line bound + cmp.l %a3, %d0 | any leading longwords? + bls.b 20f | line loop start | no? start line loop +10: | long loop 0 | + move.l (%a2)+, %d1 | read longword from L and R + move.l %a4, %acc0 | + mac.l %d1, %d5, %acc0 | shift L to high word + movclr.l %acc0, %d1 | get possibly saturated results + move.l %d1, %d2 | + swap %d2 | move R to low word + move.w %d2, %d1 | duplicate single channel into + move.l %d1, (%a3)+ | L and R + cmp.l %a3, %d0 | + bhi.b 10b | long loop 0 | +20: | line loop start | + lea.l -12(%a0), %a1 | %a1 = at or just before last line bound +30: | line loop | + move.l (%a2)+, %d0 | get next 4 L samples and scale + move.l %a4, %acc0 | + move.l %acc0, %acc1 | + move.l %acc1, %acc2 | + move.l %acc2, %acc3 | + mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation + mac.l %d1, %d5, (%a2)+, %d2, %acc1 | + mac.l %d2, %d5, (%a2)+, %d3, %acc2 | + mac.l %d3, %d5 , %acc3 | + lea.l 16(%a3), %a3 | increment dest here, mitigate stalls + movclr.l %acc0, %d0 | obtain results + movclr.l %acc1, %d1 | + movclr.l %acc2, %d2 | + movclr.l %acc3, %d3 | + move.l %d0, %d4 | duplicate single channel + swap %d4 | into L and R + move.w %d4, %d0 | + move.l %d1, %d4 | + swap %d4 | + move.w %d4, %d1 | + move.l %d2, %d4 | + swap %d4 | + move.w %d4, %d2 | + move.l %d3, %d4 | + swap %d4 | + move.w %d4, %d3 | + movem.l %d0-%d3, -16(%a3) | write four stereo samples + cmp.l %a3, %a1 | + bhi.b 30b | line loop | +40: | long loop 1 start | + cmp.l %a3, %a0 | any longwords left? + bls.b 60f | output end | no? stop +50: | loop loop 1 | + move.l (%a2)+, %d1 | handle trailing longwords + move.l %a4, %acc0 | + mac.l %d1, %d5, %acc0 | the same way as leading ones + movclr.l %acc0, %d1 | + move.l %d1, %d2 | + swap %d2 | + move.w %d2, %d1 | + move.l %d1, (%a3)+ | + cmp.l %a3, %a0 | + bhi.b 50b | long loop 1 | +60: | output end | + movem.l (%sp), %d1-%d5/%a2-%a4 | restore registers + move.l %d1, %macsr | + lea.l 32(%sp), %sp | cleanup + rts | + .size sample_output_mono, .-sample_output_mono diff --git a/lib/rbcodec/dsp/eq.c b/lib/rbcodec/dsp/eq.c new file mode 100644 index 0000000000..122a46a4c5 --- /dev/null +++ b/lib/rbcodec/dsp/eq.c @@ -0,0 +1,268 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006-2007 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include +#include "config.h" +#include "fixedpoint.h" +#include "fracmul.h" +#include "eq.h" +#include "replaygain.h" + +/** + * Calculate first order shelving filter. Filter is not directly usable by the + * eq_filter() function. + * @param cutoff shelf midpoint frequency. See eq_pk_coefs for format. + * @param A decibel value multiplied by ten, describing gain/attenuation of + * shelf. Max value is 24 dB. + * @param low true for low-shelf filter, false for high-shelf filter. + * @param c pointer to coefficient storage. Coefficients are s4.27 format. + */ +void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c) +{ + long sin, cos; + int32_t b0, b1, a0, a1; /* s3.28 */ + const long g = get_replaygain_int(A*5) << 4; /* 10^(db/40), s3.28 */ + + sin = fp_sincos(cutoff/2, &cos); + if (low) { + const int32_t sin_div_g = fp_div(sin, g, 25); + const int32_t sin_g = FRACMUL(sin, g); + cos >>= 3; + b0 = sin_g + cos; /* 0.25 .. 4.10 */ + b1 = sin_g - cos; /* -1 .. 3.98 */ + a0 = sin_div_g + cos; /* 0.25 .. 4.10 */ + a1 = sin_div_g - cos; /* -1 .. 3.98 */ + } else { + const int32_t cos_div_g = fp_div(cos, g, 25); + const int32_t cos_g = FRACMUL(cos, g); + sin >>= 3; + b0 = sin + cos_g; /* 0.25 .. 4.10 */ + b1 = sin - cos_g; /* -3.98 .. 1 */ + a0 = sin + cos_div_g; /* 0.25 .. 4.10 */ + a1 = sin - cos_div_g; /* -3.98 .. 1 */ + } + + const int32_t rcp_a0 = fp_div(1, a0, 57); /* 0.24 .. 3.98, s2.29 */ + *c++ = FRACMUL_SHL(b0, rcp_a0, 1); /* 0.063 .. 15.85 */ + *c++ = FRACMUL_SHL(b1, rcp_a0, 1); /* -15.85 .. 15.85 */ + *c++ = -FRACMUL_SHL(a1, rcp_a0, 1); /* -1 .. 1 */ +} + +#ifdef HAVE_SW_TONE_CONTROLS +/** + * Calculate second order section filter consisting of one low-shelf and one + * high-shelf section. + * @param cutoff_low low-shelf midpoint frequency. See eq_pk_coefs for format. + * @param cutoff_high high-shelf midpoint frequency. + * @param A_low decibel value multiplied by ten, describing gain/attenuation of + * low-shelf part. Max value is 24 dB. + * @param A_high decibel value multiplied by ten, describing gain/attenuation of + * high-shelf part. Max value is 24 dB. + * @param A decibel value multiplied by ten, describing additional overall gain. + * @param c pointer to coefficient storage. Coefficients are s4.27 format. + */ +void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high, + long A_low, long A_high, long A, int32_t *c) +{ + const long g = get_replaygain_int(A*10) << 7; /* 10^(db/20), s0.31 */ + int32_t c_ls[3], c_hs[3]; + + filter_shelf_coefs(cutoff_low, A_low, true, c_ls); + filter_shelf_coefs(cutoff_high, A_high, false, c_hs); + c_ls[0] = FRACMUL(g, c_ls[0]); + c_ls[1] = FRACMUL(g, c_ls[1]); + + /* now we cascade the two first order filters to one second order filter + * which can be used by eq_filter(). these resulting coefficients have a + * really wide numerical range, so we use a fixed point format which will + * work for the selected cutoff frequencies (in dsp.c) only. + */ + const int32_t b0 = c_ls[0], b1 = c_ls[1], b2 = c_hs[0], b3 = c_hs[1]; + const int32_t a0 = c_ls[2], a1 = c_hs[2]; + *c++ = FRACMUL_SHL(b0, b2, 4); + *c++ = FRACMUL_SHL(b0, b3, 4) + FRACMUL_SHL(b1, b2, 4); + *c++ = FRACMUL_SHL(b1, b3, 4); + *c++ = a0 + a1; + *c++ = -FRACMUL_SHL(a0, a1, 4); +} +#endif + +/* Coef calculation taken from Audio-EQ-Cookbook.txt by Robert Bristow-Johnson. + * Slightly faster calculation can be done by deriving forms which use tan() + * instead of cos() and sin(), but the latter are far easier to use when doing + * fixed point math, and performance is not a big point in the calculation part. + * All the 'a' filter coefficients are negated so we can use only additions + * in the filtering equation. + */ + +/** + * Calculate second order section peaking filter coefficients. + * @param cutoff a value from 0 to 0x80000000, where 0 represents 0 Hz and + * 0x80000000 represents the Nyquist frequency (samplerate/2). + * @param Q Q factor value multiplied by ten. Lower bound is artificially set + * at 0.5. + * @param db decibel value multiplied by ten, describing gain/attenuation at + * peak freq. Max value is 24 dB. + * @param c pointer to coefficient storage. Coefficients are s3.28 format. + */ +void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c) +{ + long cs; + const long one = 1 << 28; /* s3.28 */ + const long A = get_replaygain_int(db*5) << 5; /* 10^(db/40), s2.29 */ + const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */ + int32_t a0, a1, a2; /* these are all s3.28 format */ + int32_t b0, b1, b2; + const long alphadivA = fp_div(alpha, A, 27); + const long alphaA = FRACMUL(alpha, A); + + /* possible numerical ranges are in comments by each coef */ + b0 = one + alphaA; /* [1 .. 5] */ + b1 = a1 = -2*(cs >> 3); /* [-2 .. 2] */ + b2 = one - alphaA; /* [-3 .. 1] */ + a0 = one + alphadivA; /* [1 .. 5] */ + a2 = one - alphadivA; /* [-3 .. 1] */ + + /* range of this is roughly [0.2 .. 1], but we'll never hit 1 completely */ + const long rcp_a0 = fp_div(1, a0, 59); /* s0.31 */ + *c++ = FRACMUL(b0, rcp_a0); /* [0.25 .. 4] */ + *c++ = FRACMUL(b1, rcp_a0); /* [-2 .. 2] */ + *c++ = FRACMUL(b2, rcp_a0); /* [-2.4 .. 1] */ + *c++ = FRACMUL(-a1, rcp_a0); /* [-2 .. 2] */ + *c++ = FRACMUL(-a2, rcp_a0); /* [-0.6 .. 1] */ +} + +/** + * Calculate coefficients for lowshelf filter. Parameters are as for + * eq_pk_coefs, but the coefficient format is s5.26 fixed point. + */ +void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c) +{ + long cs; + const long one = 1 << 25; /* s6.25 */ + const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */ + const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */ + const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */ + const long ap1 = (A >> 4) + one; + const long am1 = (A >> 4) - one; + const long ap1_cs = FRACMUL(ap1, cs); + const long am1_cs = FRACMUL(am1, cs); + const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha); + int32_t a0, a1, a2; /* these are all s6.25 format */ + int32_t b0, b1, b2; + + /* [0.1 .. 40] */ + b0 = FRACMUL_SHL(A, ap1 - am1_cs + twosqrtalpha, 2); + /* [-16 .. 63.4] */ + b1 = FRACMUL_SHL(A, am1 - ap1_cs, 3); + /* [0 .. 31.7] */ + b2 = FRACMUL_SHL(A, ap1 - am1_cs - twosqrtalpha, 2); + /* [0.5 .. 10] */ + a0 = ap1 + am1_cs + twosqrtalpha; + /* [-16 .. 4] */ + a1 = -2*(am1 + ap1_cs); + /* [0 .. 8] */ + a2 = ap1 + am1_cs - twosqrtalpha; + + /* [0.1 .. 1.99] */ + const long rcp_a0 = fp_div(1, a0, 55); /* s1.30 */ + *c++ = FRACMUL_SHL(b0, rcp_a0, 2); /* [0.06 .. 15.9] */ + *c++ = FRACMUL_SHL(b1, rcp_a0, 2); /* [-2 .. 31.7] */ + *c++ = FRACMUL_SHL(b2, rcp_a0, 2); /* [0 .. 15.9] */ + *c++ = FRACMUL_SHL(-a1, rcp_a0, 2); /* [-2 .. 2] */ + *c++ = FRACMUL_SHL(-a2, rcp_a0, 2); /* [0 .. 1] */ +} + +/** + * Calculate coefficients for highshelf filter. Parameters are as for + * eq_pk_coefs, but the coefficient format is s5.26 fixed point. + */ +void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c) +{ + long cs; + const long one = 1 << 25; /* s6.25 */ + const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */ + const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */ + const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */ + const long ap1 = (A >> 4) + one; + const long am1 = (A >> 4) - one; + const long ap1_cs = FRACMUL(ap1, cs); + const long am1_cs = FRACMUL(am1, cs); + const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha); + int32_t a0, a1, a2; /* these are all s6.25 format */ + int32_t b0, b1, b2; + + /* [0.1 .. 40] */ + b0 = FRACMUL_SHL(A, ap1 + am1_cs + twosqrtalpha, 2); + /* [-63.5 .. 16] */ + b1 = -FRACMUL_SHL(A, am1 + ap1_cs, 3); + /* [0 .. 32] */ + b2 = FRACMUL_SHL(A, ap1 + am1_cs - twosqrtalpha, 2); + /* [0.5 .. 10] */ + a0 = ap1 - am1_cs + twosqrtalpha; + /* [-4 .. 16] */ + a1 = 2*(am1 - ap1_cs); + /* [0 .. 8] */ + a2 = ap1 - am1_cs - twosqrtalpha; + + /* [0.1 .. 1.99] */ + const long rcp_a0 = fp_div(1, a0, 55); /* s1.30 */ + *c++ = FRACMUL_SHL(b0, rcp_a0, 2); /* [0 .. 16] */ + *c++ = FRACMUL_SHL(b1, rcp_a0, 2); /* [-31.7 .. 2] */ + *c++ = FRACMUL_SHL(b2, rcp_a0, 2); /* [0 .. 16] */ + *c++ = FRACMUL_SHL(-a1, rcp_a0, 2); /* [-2 .. 2] */ + *c++ = FRACMUL_SHL(-a2, rcp_a0, 2); /* [0 .. 1] */ +} + +/* We realise the filters as a second order direct form 1 structure. Direct + * form 1 was chosen because of better numerical properties for fixed point + * implementations. + */ + +#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) +void eq_filter(int32_t **x, struct eqfilter *f, unsigned num, + unsigned channels, unsigned shift) +{ + unsigned c, i; + long long acc; + + /* Direct form 1 filtering code. + y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], + where y[] is output and x[] is input. + */ + + for (c = 0; c < channels; c++) { + for (i = 0; i < num; i++) { + acc = (long long) x[c][i] * f->coefs[0]; + acc += (long long) f->history[c][0] * f->coefs[1]; + acc += (long long) f->history[c][1] * f->coefs[2]; + acc += (long long) f->history[c][2] * f->coefs[3]; + acc += (long long) f->history[c][3] * f->coefs[4]; + f->history[c][1] = f->history[c][0]; + f->history[c][0] = x[c][i]; + f->history[c][3] = f->history[c][2]; + x[c][i] = (acc << shift) >> 32; + f->history[c][2] = x[c][i]; + } + } +} +#endif + diff --git a/lib/rbcodec/dsp/eq.h b/lib/rbcodec/dsp/eq.h new file mode 100644 index 0000000000..a44e9153ac --- /dev/null +++ b/lib/rbcodec/dsp/eq.h @@ -0,0 +1,50 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006-2007 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#ifndef _EQ_H +#define _EQ_H + +#include +#include + +/* These depend on the fixed point formats used by the different filter types + and need to be changed when they change. + */ +#define FILTER_BISHELF_SHIFT 5 +#define EQ_PEAK_SHIFT 4 +#define EQ_SHELF_SHIFT 6 + +struct eqfilter { + int32_t coefs[5]; /* Order is b0, b1, b2, a1, a2 */ + int32_t history[2][4]; +}; + +void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c); +void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high, + long A_low, long A_high, long A, int32_t *c); +void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c); +void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c); +void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c); +void eq_filter(int32_t **x, struct eqfilter *f, unsigned num, + unsigned channels, unsigned shift); + +#endif + diff --git a/lib/rbcodec/dsp/eq_arm.S b/lib/rbcodec/dsp/eq_arm.S new file mode 100644 index 0000000000..b0e1771e89 --- /dev/null +++ b/lib/rbcodec/dsp/eq_arm.S @@ -0,0 +1,89 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006-2007 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" + +/* uncomment this to make filtering calculate lower bits after shifting. + * without this, "shift" of the lower bits will be lost here. + */ +/* #define HIGH_PRECISION */ + +/* + * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num, + * unsigned channels, unsigned shift) + */ +#if CONFIG_CPU == PP5002 + .section .icode,"ax",%progbits +#else + .text +#endif + .global eq_filter +eq_filter: + ldr r12, [sp] @ get shift parameter + stmdb sp!, { r0-r11, lr } @ save all params and clobbered regs + ldmia r1!, { r4-r8 } @ load coefs + mov r10, r1 @ loop prelude expects filter struct addr in r10 + +.filterloop: + ldr r9, [sp] @ get pointer to this channels data + add r0, r9, #4 + str r0, [sp] @ save back pointer to next channels data + ldr r9, [r9] @ r9 = x[] + ldr r14, [sp, #8] @ r14 = numsamples + ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr + str r10, [sp, #4] @ save it for loop end + + /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator, + * r12 = shift amount, r14 = number of samples. + */ +.loop: + /* Direct form 1 filtering code. + * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], + * where y[] is output and x[] is input. This is performed out of order to + * reuse registers, we're pretty short on regs. + */ + smull r10, r11, r6, r1 @ acc = b2*x[i - 2] + mov r1, r0 @ fix input history + smlal r10, r11, r5, r0 @ acc += b1*x[i - 1] + ldr r0, [r9] @ load input and fix history in same operation + smlal r10, r11, r7, r2 @ acc += a1*y[i - 1] + smlal r10, r11, r8, r3 @ acc += a2*y[i - 2] + smlal r10, r11, r4, r0 @ acc += b0*x[i] /* avoid stall on arm9*/ + mov r3, r2 @ fix output history + mov r2, r11, asl r12 @ get upper part of result and shift left +#ifdef HIGH_PRECISION + rsb r11, r12, #32 @ get shift amount for lower part + orr r2, r2, r10, lsr r11 @ then mix in correctly shifted lower part +#endif + str r2, [r9], #4 @ save result + subs r14, r14, #1 @ are we done with this channel? + bne .loop + + ldr r10, [sp, #4] @ load filter struct pointer + stmia r10!, { r0-r3 } @ save back history + ldr r11, [sp, #12] @ load number of channels + subs r11, r11, #1 @ all channels processed? + strne r11, [sp, #12] + bne .filterloop + + add sp, sp, #16 @ compensate for temp storage + ldmpc regs=r4-r11 + diff --git a/lib/rbcodec/dsp/eq_cf.S b/lib/rbcodec/dsp/eq_cf.S new file mode 100644 index 0000000000..30a28b9d99 --- /dev/null +++ b/lib/rbcodec/dsp/eq_cf.S @@ -0,0 +1,91 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006-2007 Thom Johansen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +/* uncomment this to make filtering calculate lower bits after shifting. + * without this, "shift" - 1 of the lower bits will be lost here. + */ +/* #define HIGH_PRECISION */ + +/* + * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num, + * unsigned channels, unsigned shift) + */ + .text + .global eq_filter +eq_filter: + lea.l (-11*4, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs + move.l (11*4+8, %sp), %a5 | fetch filter structure address + move.l (11*4+20, %sp), %d7 | load shift count + subq.l #1, %d7 | EMAC gives us one free shift +#ifdef HIGH_PRECISION + moveq.l #8, %d6 + sub.l %d7, %d6 | shift for lower part of accumulator +#endif + movem.l (%a5), %a0-%a4 | load coefs + lea.l (5*4, %a5), %a5 | point to filter history + +.filterloop: + move.l (11*4+4, %sp), %a6 | load input channel pointer + addq.l #4, (11*4+4, %sp) | point x to next channel + move.l (%a6), %a6 + move.l (11*4+12, %sp), %d5 | number of samples + movem.l (%a5), %d0-%d3 | load filter history + + /* d0-d3 = history, d4 = temp, d5 = sample count, d6 = lower shift amount, + * d7 = upper shift amount, a0-a4 = coefs, a5 = history pointer, a6 = x[] + */ +.loop: + /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode. + * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], + * where y[] is output and x[] is input. This is performed out of order + * to do parallel load of input value. + */ + mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2] + move.l %d0, %d1 | fix input history + mac.l %a1, %d0, (%a6), %d0, %acc0 | acc += b1*x[i - 1], x[i] -> d0 + mac.l %a0, %d0, %acc0 | acc += b0*x[i] + mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1] + mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2] + move.l %d2, %d3 | fix output history +#ifdef HIGH_PRECISION + move.l %accext01, %d2 | fetch lower part of accumulator + move.b %d2, %d4 | clear upper three bytes + lsr.l %d6, %d4 | shift lower bits +#endif + movclr.l %acc0, %d2 | fetch upper part of result + asl.l %d7, %d2 | restore fixed point format +#ifdef HIGH_PRECISION + or.l %d2, %d4 | combine lower and upper parts +#endif + move.l %d2, (%a6)+ | save result + subq.l #1, %d5 | are we done with this channel? + jne .loop + + movem.l %d0-%d3, (%a5) | save history back to struct + lea.l (4*4, %a5), %a5 | point to next channel's history + subq.l #1, (11*4+16, %sp) | have we processed both channels? + jne .filterloop + + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (11*4, %sp), %sp + rts + diff --git a/lib/rbcodec/dsp/eqs/Acoustic.cfg b/lib/rbcodec/dsp/eqs/Acoustic.cfg new file mode 100644 index 0000000000..34b5ed8a2b --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Acoustic.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 45 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 45 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 10 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 15 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 30 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 20 diff --git a/lib/rbcodec/dsp/eqs/Bass.cfg b/lib/rbcodec/dsp/eqs/Bass.cfg new file mode 100644 index 0000000000..2742459081 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Bass.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 50 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 50 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 35 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 15 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 5 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: -5 diff --git a/lib/rbcodec/dsp/eqs/Classical.cfg b/lib/rbcodec/dsp/eqs/Classical.cfg new file mode 100644 index 0000000000..bf2f9f9566 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Classical.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 50 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 50 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 40 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: -20 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 10 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 20 diff --git a/lib/rbcodec/dsp/eqs/Default.cfg b/lib/rbcodec/dsp/eqs/Default.cfg new file mode 100644 index 0000000000..d6f345fa9e --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Default.cfg @@ -0,0 +1,17 @@ +eq enabled: off +eq precut: 0 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 0 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 0 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 0 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 0 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 0 diff --git a/lib/rbcodec/dsp/eqs/Disco.cfg b/lib/rbcodec/dsp/eqs/Disco.cfg new file mode 100644 index 0000000000..f894f26da1 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Disco.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 45 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 30 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 10 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 45 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 25 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 10 diff --git a/lib/rbcodec/dsp/eqs/Electronic.cfg b/lib/rbcodec/dsp/eqs/Electronic.cfg new file mode 100644 index 0000000000..e70c911272 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Electronic.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 55 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 45 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 5 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 25 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 15 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 55 diff --git a/lib/rbcodec/dsp/eqs/Hip-Hop.cfg b/lib/rbcodec/dsp/eqs/Hip-Hop.cfg new file mode 100644 index 0000000000..2d38425dc4 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Hip-Hop.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 65 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 65 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 25 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: -10 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 15 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 35 diff --git a/lib/rbcodec/dsp/eqs/Jazz.cfg b/lib/rbcodec/dsp/eqs/Jazz.cfg new file mode 100644 index 0000000000..f576f9fcc1 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Jazz.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 60 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 40 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 15 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: -25 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 5 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 60 diff --git a/lib/rbcodec/dsp/eqs/Lounge.cfg b/lib/rbcodec/dsp/eqs/Lounge.cfg new file mode 100644 index 0000000000..39ae23a7e7 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Lounge.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 20 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: -25 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 5 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 20 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: -15 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 15 diff --git a/lib/rbcodec/dsp/eqs/Pop.cfg b/lib/rbcodec/dsp/eqs/Pop.cfg new file mode 100644 index 0000000000..1d8cefe173 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Pop.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 50 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: -10 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 5 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 50 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 15 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: -10 diff --git a/lib/rbcodec/dsp/eqs/R&B.cfg b/lib/rbcodec/dsp/eqs/R&B.cfg new file mode 100644 index 0000000000..a460b587f5 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/R&B.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 45 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 35 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 45 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 5 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 25 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 30 diff --git a/lib/rbcodec/dsp/eqs/Rock.cfg b/lib/rbcodec/dsp/eqs/Rock.cfg new file mode 100644 index 0000000000..ec4f0356a8 --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Rock.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 45 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: 25 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 10 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 0 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 20 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 45 diff --git a/lib/rbcodec/dsp/eqs/Vocal.cfg b/lib/rbcodec/dsp/eqs/Vocal.cfg new file mode 100644 index 0000000000..1de754f07c --- /dev/null +++ b/lib/rbcodec/dsp/eqs/Vocal.cfg @@ -0,0 +1,17 @@ +eq enabled: on +eq precut: 45 +eq band 0 cutoff: 60 +eq band 0 q: 7 +eq band 0 gain: -45 +eq band 1 cutoff: 200 +eq band 1 q: 10 +eq band 1 gain: 5 +eq band 2 cutoff: 800 +eq band 2 q: 10 +eq band 2 gain: 45 +eq band 3 cutoff: 4000 +eq band 3 q: 10 +eq band 3 gain: 20 +eq band 4 cutoff: 12000 +eq band 4 q: 7 +eq band 4 gain: 0 diff --git a/lib/rbcodec/dsp/tdspeed.c b/lib/rbcodec/dsp/tdspeed.c new file mode 100644 index 0000000000..731be12621 --- /dev/null +++ b/lib/rbcodec/dsp/tdspeed.c @@ -0,0 +1,450 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Nicolas Pitre + * Copyright (C) 2006-2007 by Stéphane Doyon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include +#include +#include +#include +#include "sound.h" +#include "core_alloc.h" +#include "system.h" +#include "tdspeed.h" +#include "settings.h" + +#define assert(cond) + +#define MIN_RATE 8000 +#define MAX_RATE 48000 /* double buffer for double rate */ +#define MINFREQ 100 + +#define FIXED_BUFSIZE 3072 /* 48KHz factor 3.0 */ + +static int32_t** dsp_src; +static int handles[4]; +static int32_t *overlap_buffer[2] = { NULL, NULL }; +static int32_t *outbuf[2] = { NULL, NULL }; + +static int move_callback(int handle, void* current, void* new) +{ + /* TODO */ + (void)handle; + if (dsp_src) + { + int ch = (current == outbuf[0]) ? 0 : 1; + dsp_src[ch] = outbuf[ch] = new; + } + return BUFLIB_CB_OK; +} + +static struct buflib_callbacks ops = { + .move_callback = move_callback, + .shrink_callback = NULL, +}; + +static int ovl_move_callback(int handle, void* current, void* new) +{ + /* TODO */ + (void)handle; + if (dsp_src) + { + int ch = (current == overlap_buffer[0]) ? 0 : 1; + overlap_buffer[ch] = new; + } + return BUFLIB_CB_OK; +} + +static struct buflib_callbacks ovl_ops = { + .move_callback = ovl_move_callback, + .shrink_callback = NULL, +}; + + +static struct tdspeed_state_s +{ + bool stereo; + int32_t shift_max; /* maximum displacement on a frame */ + int32_t src_step; /* source window pace */ + int32_t dst_step; /* destination window pace */ + int32_t dst_order; /* power of two for dst_step */ + int32_t ovl_shift; /* overlap buffer frame shift */ + int32_t ovl_size; /* overlap buffer used size */ + int32_t ovl_space; /* overlap buffer size */ + int32_t *ovl_buff[2]; /* overlap buffer */ +} tdspeed_state; + +void tdspeed_init(void) +{ + if (!global_settings.timestretch_enabled) + return; + + /* Allocate buffers */ + if (overlap_buffer[0] == NULL) + { + handles[0] = core_alloc_ex("tdspeed ovl left", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops); + overlap_buffer[0] = core_get_data(handles[0]); + } + if (overlap_buffer[1] == NULL) + { + handles[1] = core_alloc_ex("tdspeed ovl right", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops); + overlap_buffer[1] = core_get_data(handles[1]); + } + if (outbuf[0] == NULL) + { + handles[2] = core_alloc_ex("tdspeed left", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops); + outbuf[0] = core_get_data(handles[2]); + } + if (outbuf[1] == NULL) + { + handles[3] = core_alloc_ex("tdspeed right", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops); + outbuf[1] = core_get_data(handles[3]); + } +} + +void tdspeed_finish(void) +{ + for(unsigned i = 0; i < ARRAYLEN(handles); i++) + { + if (handles[i] > 0) + { + core_free(handles[i]); + handles[i] = 0; + } + } + overlap_buffer[0] = overlap_buffer[1] = NULL; + outbuf[0] = outbuf[1] = NULL; +} + +bool tdspeed_config(int samplerate, bool stereo, int32_t factor) +{ + struct tdspeed_state_s *st = &tdspeed_state; + int src_frame_sz; + + /* Check buffers were allocated ok */ + if (overlap_buffer[0] == NULL || overlap_buffer[1] == NULL) + return false; + + if (outbuf[0] == NULL || outbuf[1] == NULL) + return false; + + /* Check parameters */ + if (factor == PITCH_SPEED_100) + return false; + + if (samplerate < MIN_RATE || samplerate > MAX_RATE) + return false; + + if (factor < STRETCH_MIN || factor > STRETCH_MAX) + return false; + + st->stereo = stereo; + st->dst_step = samplerate / MINFREQ; + + if (factor > PITCH_SPEED_100) + st->dst_step = st->dst_step * PITCH_SPEED_100 / factor; + + st->dst_order = 1; + + while (st->dst_step >>= 1) + st->dst_order++; + + st->dst_step = (1 << st->dst_order); + st->src_step = st->dst_step * factor / PITCH_SPEED_100; + st->shift_max = (st->dst_step > st->src_step) ? st->dst_step : st->src_step; + + src_frame_sz = st->shift_max + st->dst_step; + + if (st->dst_step > st->src_step) + src_frame_sz += st->dst_step - st->src_step; + + st->ovl_space = ((src_frame_sz - 2) / st->src_step) * st->src_step + + src_frame_sz; + + if (st->src_step > st->dst_step) + st->ovl_space += 2*st->src_step - st->dst_step; + + if (st->ovl_space > FIXED_BUFSIZE) + st->ovl_space = FIXED_BUFSIZE; + + st->ovl_size = 0; + st->ovl_shift = 0; + + st->ovl_buff[0] = overlap_buffer[0]; + + if (stereo) + st->ovl_buff[1] = overlap_buffer[1]; + else + st->ovl_buff[1] = st->ovl_buff[0]; + + return true; +} + +static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2], + int data_len, int last, int out_size) +/* data_len in samples */ +{ + struct tdspeed_state_s *st = &tdspeed_state; + int32_t *dest[2]; + int32_t next_frame, prev_frame, src_frame_sz; + bool stereo = buf_in[0] != buf_in[1]; + + assert(stereo == st->stereo); + + src_frame_sz = st->shift_max + st->dst_step; + + if (st->dst_step > st->src_step) + src_frame_sz += st->dst_step - st->src_step; + + /* deal with overlap data first, if any */ + if (st->ovl_size) + { + int32_t have, copy, steps; + have = st->ovl_size; + + if (st->ovl_shift > 0) + have -= st->ovl_shift; + + /* append just enough data to have all of the overlap buffer consumed */ + steps = (have - 1) / st->src_step; + copy = steps * st->src_step + src_frame_sz - have; + + if (copy < src_frame_sz - st->dst_step) + copy += st->src_step; /* one more step to allow for pregap data */ + + if (copy > data_len) + copy = data_len; + + assert(st->ovl_size + copy <= FIXED_BUFSIZE); + memcpy(st->ovl_buff[0] + st->ovl_size, buf_in[0], + copy * sizeof(int32_t)); + + if (stereo) + memcpy(st->ovl_buff[1] + st->ovl_size, buf_in[1], + copy * sizeof(int32_t)); + + if (!last && have + copy < src_frame_sz) + { + /* still not enough to process at least one frame */ + st->ovl_size += copy; + return 0; + } + + /* recursively call ourselves to process the overlap buffer */ + have = st->ovl_size; + st->ovl_size = 0; + + if (copy == data_len) + { + assert(have + copy <= FIXED_BUFSIZE); + return tdspeed_apply(buf_out, st->ovl_buff, have+copy, last, + out_size); + } + + assert(have + copy <= FIXED_BUFSIZE); + int i = tdspeed_apply(buf_out, st->ovl_buff, have+copy, -1, out_size); + + dest[0] = buf_out[0] + i; + dest[1] = buf_out[1] + i; + + /* readjust pointers to account for data already consumed */ + next_frame = copy - src_frame_sz + st->src_step; + prev_frame = next_frame - st->ovl_shift; + } + else + { + dest[0] = buf_out[0]; + dest[1] = buf_out[1]; + + next_frame = prev_frame = 0; + + if (st->ovl_shift > 0) + next_frame += st->ovl_shift; + else + prev_frame += -st->ovl_shift; + } + + st->ovl_shift = 0; + + /* process all complete frames */ + while (data_len - next_frame >= src_frame_sz) + { + /* find frame overlap by autocorelation */ + int const INC1 = 8; + int const INC2 = 32; + + int64_t min_delta = INT64_MAX; /* most positive */ + int shift = 0; + + /* Power of 2 of a 28bit number requires 56bits, can accumulate + 256times in a 64bit variable. */ + assert(st->dst_step / INC2 <= 256); + assert(next_frame + st->shift_max - 1 + st->dst_step - 1 < data_len); + assert(prev_frame + st->dst_step - 1 < data_len); + + for (int i = 0; i < st->shift_max; i += INC1) + { + int64_t delta = 0; + + int32_t *curr = buf_in[0] + next_frame + i; + int32_t *prev = buf_in[0] + prev_frame; + + for (int j = 0; j < st->dst_step; j += INC2, curr += INC2, prev += INC2) + { + int32_t diff = *curr - *prev; + delta += abs(diff); + + if (delta >= min_delta) + goto skip; + } + + if (stereo) + { + curr = buf_in[1] + next_frame + i; + prev = buf_in[1] + prev_frame; + + for (int j = 0; j < st->dst_step; j += INC2, curr += INC2, prev += INC2) + { + int32_t diff = *curr - *prev; + delta += abs(diff); + + if (delta >= min_delta) + goto skip; + } + } + + min_delta = delta; + shift = i; +skip:; + } + + /* overlap fading-out previous frame with fading-in current frame */ + int32_t *curr = buf_in[0] + next_frame + shift; + int32_t *prev = buf_in[0] + prev_frame; + + int32_t *d = dest[0]; + + assert(next_frame + shift + st->dst_step - 1 < data_len); + assert(prev_frame + st->dst_step - 1 < data_len); + assert(dest[0] - buf_out[0] + st->dst_step - 1 < out_size); + + for (int i = 0, j = st->dst_step; j; i++, j--) + { + *d++ = (*curr++ * (int64_t)i + + *prev++ * (int64_t)j) >> st->dst_order; + } + + dest[0] = d; + + if (stereo) + { + curr = buf_in[1] + next_frame + shift; + prev = buf_in[1] + prev_frame; + + d = dest[1]; + + for (int i = 0, j = st->dst_step; j; i++, j--) + { + assert(d < buf_out[1] + out_size); + + *d++ = (*curr++ * (int64_t)i + + *prev++ * (int64_t)j) >> st->dst_order; + } + + dest[1] = d; + } + + /* adjust pointers for next frame */ + prev_frame = next_frame + shift + st->dst_step; + next_frame += st->src_step; + + /* here next_frame - prev_frame = src_step - dst_step - shift */ + assert(next_frame - prev_frame == st->src_step - st->dst_step - shift); + } + + /* now deal with remaining partial frames */ + if (last == -1) + { + /* special overlap buffer processing: remember frame shift only */ + st->ovl_shift = next_frame - prev_frame; + } + else if (last != 0) + { + /* last call: purge all remaining data to output buffer */ + int i = data_len - prev_frame; + + assert(dest[0] + i <= buf_out[0] + out_size); + memcpy(dest[0], buf_in[0] + prev_frame, i * sizeof(int32_t)); + + dest[0] += i; + + if (stereo) + { + assert(dest[1] + i <= buf_out[1] + out_size); + memcpy(dest[1], buf_in[1] + prev_frame, i * sizeof(int32_t)); + dest[1] += i; + } + } + else + { + /* preserve remaining data + needed overlap data for next call */ + st->ovl_shift = next_frame - prev_frame; + int i = (st->ovl_shift < 0) ? next_frame : prev_frame; + st->ovl_size = data_len - i; + + assert(st->ovl_size <= FIXED_BUFSIZE); + memcpy(st->ovl_buff[0], buf_in[0] + i, st->ovl_size * sizeof(int32_t)); + + if (stereo) + memcpy(st->ovl_buff[1], buf_in[1] + i, st->ovl_size * sizeof(int32_t)); + } + + return dest[0] - buf_out[0]; +} + +long tdspeed_est_output_size() +{ + return TDSPEED_OUTBUFSIZE; +} + +long tdspeed_est_input_size(long size) +{ + struct tdspeed_state_s *st = &tdspeed_state; + + size = (size - st->ovl_size) * st->src_step / st->dst_step; + + if (size < 0) + size = 0; + + return size; +} + +int tdspeed_doit(int32_t *src[], int count) +{ + dsp_src = src; + count = tdspeed_apply( (int32_t *[2]) { outbuf[0], outbuf[1] }, + src, count, 0, TDSPEED_OUTBUFSIZE); + + src[0] = outbuf[0]; + src[1] = outbuf[1]; + + return count; +} + diff --git a/lib/rbcodec/dsp/tdspeed.h b/lib/rbcodec/dsp/tdspeed.h new file mode 100644 index 0000000000..e91eeb1701 --- /dev/null +++ b/lib/rbcodec/dsp/tdspeed.h @@ -0,0 +1,49 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Nicolas Pitre + * Copyright (C) 2006-2007 by Stéphane Doyon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#ifndef _TDSPEED_H +#define _TDSPEED_H + +#include "dsp.h" + +#define TDSPEED_OUTBUFSIZE 4096 + +/* some #define functions to get the pitch, stretch and speed values based on */ +/* two known values. Remember that params are alphabetical. */ +#define GET_SPEED(pitch, stretch) \ + ((pitch * stretch + PITCH_SPEED_100 / 2L) / PITCH_SPEED_100) +#define GET_PITCH(speed, stretch) \ + ((speed * PITCH_SPEED_100 + stretch / 2L) / stretch) +#define GET_STRETCH(pitch, speed) \ + ((speed * PITCH_SPEED_100 + pitch / 2L) / pitch) + +void tdspeed_init(void); +void tdspeed_finish(void); +bool tdspeed_config(int samplerate, bool stereo, int32_t factor); +long tdspeed_est_output_size(void); +long tdspeed_est_input_size(long size); +int tdspeed_doit(int32_t *src[], int count); + +#define STRETCH_MAX (250L * PITCH_SPEED_PRECISION) /* 250% */ +#define STRETCH_MIN (35L * PITCH_SPEED_PRECISION) /* 35% */ + +#endif -- cgit v1.2.3