From 51a8be1a0f11d329af99bf9c33a61af11d7a43ed Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Mon, 15 Feb 2010 16:11:49 +0000 Subject: Further optimization of atrac3 codec. Refacturate gainCompensateAndOverlap(), avoid multiplication if not needed, unroll loops. Speeds up codec by 1.1 MHz (+2%) on ARM. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24668 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libatrac/atrac3.c | 183 +++++++++++++++++++++++++++++++++--------- 1 file changed, 145 insertions(+), 38 deletions(-) (limited to 'apps/codecs') diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index 18a4f120e1..ff6e639a3b 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c @@ -59,6 +59,7 @@ static int32_t qmf_window[48] IBSS_ATTR; static VLC spectral_coeff_tab[7]; static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; + /** * Matrixing within quadrature mirror synthesis filter. * @@ -91,6 +92,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; } #endif + /** * Matrixing within quadrature mirror synthesis filter. * @@ -195,6 +197,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; } #endif + /** * IMDCT windowing. * @@ -214,6 +217,7 @@ atrac3_imdct_windowing(int32_t *buffer, } } + /** * Quadrature mirror synthesis filter. * @@ -240,12 +244,13 @@ static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t)); } + /** * Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands * caused by the reverse spectra of the QMF. * - * @param pInput float input - * @param pOutput float output + * @param pInput input + * @param pOutput output * @param odd_band 1 if the band is an odd band */ @@ -274,7 +279,7 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){ uint32_t* obuf = (uint32_t*) out; #if ((defined(TEST) || defined(SIMULATOR)) && !defined(CPU_ARM)) - off = 0; //no check for memory alignment of inbuffer + off = 0; /* no check for memory alignment of inbuffer */ #else off = (intptr_t)inbuffer & 3; #endif /* TEST */ @@ -306,6 +311,7 @@ static void init_atrac3_transforms(void) { } } + /** * Mantissa decoding * @@ -338,7 +344,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding } else { for (cnt = 0; cnt < numCodes; cnt++) { if (numBits) - code = get_bits(gb, numBits); //numBits is always 4 in this case + code = get_bits(gb, numBits); /* numBits is always 4 in this case */ else code = 0; mantissas[cnt*2] = seTab_0[code >> 2]; @@ -366,6 +372,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding } } + /** * Restore the quantized band spectrum coefficients * @@ -382,8 +389,8 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut) int mantissas[128]; int32_t SF; - numSubbands = get_bits(gb, 5); // number of coded subbands - codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC + numSubbands = get_bits(gb, 5); /* number of coded subbands */ + codingMode = get_bits1(gb); /* coding Mode: 0 - VLC/ 1-CLC */ /* Get the VLC selector table for the subbands, 0 means not coded. */ for (cnt = 0; cnt <= numSubbands; cnt++) @@ -437,6 +444,7 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut) return numSubbands; } + /** * Restore the quantized tonal components * @@ -517,6 +525,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent return component_count; } + /** * Decode gain parameters for the coded bands * @@ -554,21 +563,125 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands) return 0; } + +/** + * Apply fix (constant) gain and overlap for sample[start...255]. + * + * @param pIn input buffer + * @param pPrev previous buffer to perform overlap against + * @param pOut output buffer + * @param start index to start with (always a multiple of 8) + * @param gain gain to apply + */ + +static void applyFixGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut, + int32_t start, int32_t gain) +{ + int32_t i = start; + + /* start is always a multiple of 8 and therefore allows us to unroll the + * loop to 8 calculation per loop + */ + if (ONE_16 == gain) { + /* gain1 = 1.0 -> no multiplication needed, just adding */ + /* Remark: This path is called >90%. */ + do { + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + pOut[i] = pIn[i] + pPrev[i]; i++; + } while (i<256); + } else { + /* gain1 != 1.0 -> we need to do a multiplication */ + /* Remark: This path is called seldom. */ + do { + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++; + } while (i<256); + } +} + + +/** + * Apply variable gain and overlap. Returns sample index after applying gain, + * resulting sample index is always a multiple of 8. + * + * @param pIn input buffer + * @param pPrev previous buffer to perform overlap against + * @param pOut output buffer + * @param start index to start with (always a multiple of 8) + * @param end end index for first loop (always a multiple of 8) + * @param gain1 current bands gain to apply + * @param gain2 next bands gain to apply + * @param gain_inc stepwise adaption from gain1 to gain2 + */ + +static int applyVariableGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut, + int32_t start, int32_t end, + int32_t gain1, int32_t gain2, int32_t gain_inc) +{ + int32_t i = start; + + /* Apply fix gains until end index is reached */ + do { + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + } while (i < end); + + /* Interpolation is done over next eight samples */ + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++; + gain2 = fixmul16(gain2, gain_inc); + + return i; +} + + /** * Apply gain parameters and perform the MDCT overlapping part * - * @param pIn input float buffer - * @param pPrev previous float buffer to perform overlap against - * @param pOut output float buffer + * @param pIn input buffer + * @param pPrev previous buffer to perform overlap against + * @param pOut output buffer * @param pGain1 current band gain info * @param pGain2 next band gain info */ -static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2) +static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, + gain_info *pGain1, gain_info *pGain2) { /* gain compensation function */ int32_t gain1, gain2, gain_inc; - int cnt, numdata, nsample, startLoc, endLoc; + int cnt, numdata, nsample, startLoc; if (pGain2->num_gain_data == 0) gain1 = ONE_16; @@ -576,41 +689,35 @@ static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOu gain1 = gain_tab1[pGain2->levcode[0]]; if (pGain1->num_gain_data == 0) { - for (cnt = 0; cnt < 256; cnt++) - pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt]; + /* Remark: This path is called >90%. */ + /* Apply gain for all samples from 0...255 */ + applyFixGain(pIn, pPrev, pOut, 0, gain1); } else { + /* Remark: This path is called seldom. */ numdata = pGain1->num_gain_data; pGain1->loccode[numdata] = 32; pGain1->levcode[numdata] = 4; - - nsample = 0; // current sample = 0 + + nsample = 0; /* starting loop with =0 */ for (cnt = 0; cnt < numdata; cnt++) { startLoc = pGain1->loccode[cnt] * 8; - endLoc = startLoc + 8; - gain2 = gain_tab1[pGain1->levcode[cnt]]; + gain2 = gain_tab1[pGain1->levcode[cnt]]; gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15]; - /* interpolate */ - for (; nsample < startLoc; nsample++) - pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2); - - /* interpolation is done over eight samples */ - for (; nsample < endLoc; nsample++) { - pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2); - gain2 = fixmul16(gain2, gain_inc); - } + /* Apply variable gain (gain1 -> gain2) to samples */ + nsample = applyVariableGain(pIn, pPrev, pOut, nsample, startLoc, gain1, gain2, gain_inc); } - - for (; nsample < 256; nsample++) - pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample]; + /* Apply gain for the residual samples from nsample...255 */ + applyFixGain(pIn, pPrev, pOut, nsample, gain1); } /* Delay for the overlapping part. */ memcpy(pPrev, &pIn[256], 256*sizeof(int32_t)); } + /** * Combine the tonal band spectrum and regular band spectrum * Return position of the last tonal coefficient @@ -639,6 +746,7 @@ static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_comp return lastPos; } + /** * Linear equidistant interpolation between two points x and y. 7 interpolation * points can be calculated. Result is scaled by <<16. @@ -712,7 +820,7 @@ static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pC } break; default: - //assert(0); + /* assert(0) */; break; } } @@ -755,18 +863,16 @@ static void channelWeighting (int32_t *su1, int32_t *su2, int *p3) } } - /** * Decode a Sound Unit * * @param gb the GetBit context * @param pSnd the channel unit to be used - * @param pOut the decoded samples before IQMF in float representation + * @param pOut the decoded samples before IQMF * @param channelNum channel number * @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono) */ - static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode) { int band, result=0, numSubbands, lastTonal, numBands; @@ -807,8 +913,9 @@ static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_ /* Perform the IMDCT step without overlapping. */ if (band <= numBands) { IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf); - } else + } else { memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t)); + } /* gain compensation and overlapping */ gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), @@ -982,12 +1089,12 @@ int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx) /* Take care of the codec-specific extradata. */ if (rmctx->extradata_size == 14) { /* Parse the extradata, WAV format */ - DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); //Unknown value always 1 + DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); /* Unknown value always 1 */ q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]); q->codingMode = rm_get_uint16le(&edata_ptr[6]); - DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); //Dupe of coding mode - q->frame_factor = rm_get_uint16le(&edata_ptr[10]); //Unknown always 1 - DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); //Unknown always 0 + DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); /* Dupe of coding mode */ + q->frame_factor = rm_get_uint16le(&edata_ptr[10]); /* Unknown always 1 */ + DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); /* Unknown always 0 */ /* setup */ q->samples_per_frame = 1024 * q->channels; -- cgit v1.2.3