Further optimization of atrac3 codec. Refacturate gainCompensateAndOverlap(), avoid multiplication if not needed, unroll loops. Speeds up codec by 1.1 MHz (+2%) on ARM.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24668 a1c6a512-1295-4272-9138-f99709370657
author: Andree Buschmann <AndreeBuschmann@t-online.de> 2010-02-15 16:11:49 +0000
committer: Andree Buschmann <AndreeBuschmann@t-online.de> 2010-02-15 16:11:49 +0000
commit: 51a8be1a0f11d329af99bf9c33a61af11d7a43ed (patch)
tree: aab163bd31c1ffae00a30365a5e305089a13f21a
parent: ad1ba429b98b2f4803a5113a094812091d229c3f (diff)
download: rockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.tar.gz
rockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.zip
1 files changed, 145 insertions, 38 deletions
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c
index 18a4f120e1..ff6e639a3b 100644
--- a/apps/codecs/libatrac/atrac3.c
+++ b/apps/codecs/libatrac/atrac3.c
@@ -59,6 +59,7 @@ static int32_t          qmf_window[48] IBSS_ATTR;
 static VLC              spectral_coeff_tab[7];
 static channel_unit     channel_units[2] IBSS_ATTR_LARGE_IRAM;
 /**
 * Matrixing within quadrature mirror synthesis filter.
 *
@@ -91,6 +92,7 @@ static channel_unit     channel_units[2] IBSS_ATTR_LARGE_IRAM;
    }
 #endif
 /**
 * Matrixing within quadrature mirror synthesis filter.
 *
@@ -195,6 +197,7 @@ static channel_unit     channel_units[2] IBSS_ATTR_LARGE_IRAM;
    }
 #endif
 /**
 * IMDCT windowing.
 *
@@ -214,6 +217,7 @@ atrac3_imdct_windowing(int32_t *buffer,
    }
 }
 /**
 * Quadrature mirror synthesis filter.
 *
@@ -240,12 +244,13 @@ static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut,
    memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
 }
 /**
 * Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
 * caused by the reverse spectra of the QMF.
 *
- * @param pInput    float input
+ * @param pInput    input
- * @param pOutput   float output
+ * @param pOutput   output
 * @param odd_band  1 if the band is an odd band
 */
@@ -274,7 +279,7 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
    uint32_t* obuf = (uint32_t*) out;
 #if ((defined(TEST) || defined(SIMULATOR)) && !defined(CPU_ARM))
-    off = 0; //no check for memory alignment of inbuffer
+    off = 0; /* no check for memory alignment of inbuffer */
 #else
    off = (intptr_t)inbuffer & 3;
 #endif /* TEST */
@@ -306,6 +311,7 @@ static void init_atrac3_transforms(void) {
    }
 }
 /**
 * Mantissa decoding
 *
@@ -338,7 +344,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
        } else {
            for (cnt = 0; cnt < numCodes; cnt++) {
                if (numBits)
-                    code = get_bits(gb, numBits); //numBits is always 4 in this case
+                    code = get_bits(gb, numBits); /* numBits is always 4 in this case */
                else
                    code = 0;
                mantissas[cnt*2] = seTab_0[code >> 2];
@@ -366,6 +372,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
    }
 }
 /**
 * Restore the quantized band spectrum coefficients
 *
@@ -382,8 +389,8 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
    int   mantissas[128];
    int32_t SF;
-    numSubbands = get_bits(gb, 5); // number of coded subbands
+    numSubbands = get_bits(gb, 5); /* number of coded subbands */
-    codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
+    codingMode = get_bits1(gb); /* coding Mode: 0 - VLC/ 1-CLC */
    /* Get the VLC selector table for the subbands, 0 means not coded. */
    for (cnt = 0; cnt <= numSubbands; cnt++)
@@ -437,6 +444,7 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
    return numSubbands;
 }
 /**
 * Restore the quantized tonal components
 *
@@ -517,6 +525,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
    return component_count;
 }
 /**
 * Decode gain parameters for the coded bands
 *
@@ -554,21 +563,125 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
    return 0;
 }
+/**
+ * Apply fix (constant) gain and overlap for sample[start...255].
+ *
+ * @param pIn           input buffer
+ * @param pPrev         previous buffer to perform overlap against
+ * @param pOut          output buffer
+ * @param start         index to start with (always a multiple of 8)
+ * @param gain          gain to apply
+ */
+ 
+static void applyFixGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut, 
+                          int32_t start, int32_t gain)
+{
+    int32_t i = start;
+    
+    /* start is always a multiple of 8 and therefore allows us to unroll the 
+     * loop to 8 calculation per loop 
+     */
+    if (ONE_16 == gain) {
+        /* gain1 = 1.0 -> no multiplication needed, just adding */
+        /* Remark: This path is called >90%. */
+        do {
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+            pOut[i] = pIn[i] + pPrev[i]; i++;
+        } while (i<256);
+    } else {
+        /* gain1 != 1.0 -> we need to do a multiplication */
+        /* Remark: This path is called seldom. */
+        do {
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+            pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
+        } while (i<256);
+    }
+}
+/**
+ * Apply variable gain and overlap. Returns sample index after applying gain,
+ * resulting sample index is always a multiple of 8.
+ *
+ * @param pIn           input buffer
+ * @param pPrev         previous buffer to perform overlap against
+ * @param pOut          output buffer
+ * @param start         index to start with (always a multiple of 8)
+ * @param end           end index for first loop (always a multiple of 8)
+ * @param gain1         current bands gain to apply
+ * @param gain2         next bands gain to apply
+ * @param gain_inc      stepwise adaption from gain1 to gain2
+ */
+ 
+static int applyVariableGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut, 
+                              int32_t start, int32_t end, 
+                              int32_t gain1, int32_t gain2, int32_t gain_inc)
+{
+    int32_t i = start;
+    
+    /* Apply fix gains until end index is reached */
+    do {
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+        pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    } while (i < end);
+    /* Interpolation is done over next eight samples */
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
+    gain2 = fixmul16(gain2, gain_inc);
+    
+    return i;
+}
 /**
 * Apply gain parameters and perform the MDCT overlapping part
 *
- * @param pIn           input float buffer
+ * @param pIn           input buffer
- * @param pPrev         previous float buffer to perform overlap against
+ * @param pPrev         previous buffer to perform overlap against
- * @param pOut          output float buffer
+ * @param pOut          output buffer
 * @param pGain1        current band gain info
 * @param pGain2        next band gain info
 */
-static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2)
+static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, 
+                                      gain_info *pGain1, gain_info *pGain2)
 {
    /* gain compensation function */
    int32_t  gain1, gain2, gain_inc;
-    int   cnt, numdata, nsample, startLoc, endLoc;
+    int   cnt, numdata, nsample, startLoc;
    if (pGain2->num_gain_data == 0)
        gain1 = ONE_16;
@@ -576,41 +689,35 @@ static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOu
        gain1 = gain_tab1[pGain2->levcode[0]];
    if (pGain1->num_gain_data == 0) {
-        for (cnt = 0; cnt < 256; cnt++)
+        /* Remark: This path is called >90%. */
-            pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt];
+        /* Apply gain for all samples from 0...255 */
+        applyFixGain(pIn, pPrev, pOut, 0, gain1);
    } else {
+        /* Remark: This path is called seldom. */
        numdata = pGain1->num_gain_data;
        pGain1->loccode[numdata] = 32;
        pGain1->levcode[numdata] = 4;
+        
-        nsample = 0; // current sample = 0
+        nsample = 0; /* starting loop with =0 */
        for (cnt = 0; cnt < numdata; cnt++) {
            startLoc = pGain1->loccode[cnt] * 8;
-            endLoc = startLoc + 8;
-            gain2 = gain_tab1[pGain1->levcode[cnt]];
+            gain2    = gain_tab1[pGain1->levcode[cnt]];
            gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];
-            /* interpolate */
+            /* Apply variable gain (gain1 -> gain2) to samples */
-            for (; nsample < startLoc; nsample++)
+            nsample  = applyVariableGain(pIn, pPrev, pOut, nsample, startLoc, gain1, gain2, gain_inc);
-                pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
-            /* interpolation is done over eight samples */
-            for (; nsample < endLoc; nsample++) {
-                pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
-                gain2 = fixmul16(gain2, gain_inc);
-            }
        }
+        /* Apply gain for the residual samples from nsample...255 */
-        for (; nsample < 256; nsample++)
+        applyFixGain(pIn, pPrev, pOut, nsample, gain1);
-            pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
    }
    /* Delay for the overlapping part. */
    memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
 }
 /**
 * Combine the tonal band spectrum and regular band spectrum
 * Return position of the last tonal coefficient
@@ -639,6 +746,7 @@ static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_comp
    return lastPos;
 }
 /**
 * Linear equidistant interpolation between two points x and y. 7 interpolation
 * points can be calculated. Result is scaled by <<16.
@@ -712,7 +820,7 @@ static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pC
                }
                break;
            default:
-                //assert(0);
+                /* assert(0) */;
                break;
        }
    }
@@ -755,18 +863,16 @@ static void channelWeighting (int32_t *su1, int32_t *su2, int *p3)
    }
 }
 /**
 * Decode a Sound Unit
 *
 * @param gb            the GetBit context
 * @param pSnd          the channel unit to be used
- * @param pOut          the decoded samples before IQMF in float representation
+ * @param pOut          the decoded samples before IQMF
 * @param channelNum    channel number
 * @param codingMode    the coding mode (JOINT_STEREO or regular stereo/mono)
 */
 static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode)
 {
    int   band, result=0, numSubbands, lastTonal, numBands;
@@ -807,8 +913,9 @@ static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_
        /* Perform the IMDCT step without overlapping. */
        if (band <= numBands) {
            IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf);
-        } else
+        } else {
            memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
+        }
        /* gain compensation and overlapping */
        gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
@@ -982,12 +1089,12 @@ int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
    /* Take care of the codec-specific extradata. */
    if (rmctx->extradata_size == 14) {
        /* Parse the extradata, WAV format */
-        DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0]));  //Unknown value always 1
+        DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0]));    /* Unknown value always 1 */
        q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]);
        q->codingMode = rm_get_uint16le(&edata_ptr[6]);
-        DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8]));  //Dupe of coding mode
+        DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8]));    /* Dupe of coding mode */
-        q->frame_factor = rm_get_uint16le(&edata_ptr[10]);  //Unknown always 1
+        q->frame_factor = rm_get_uint16le(&edata_ptr[10]);      /* Unknown always 1 */
-        DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12]));  //Unknown always 0
+        DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); /* Unknown always 0 */
        /* setup */
        q->samples_per_frame = 1024 * q->channels;
author	Andree Buschmann <AndreeBuschmann@t-online.de>	2010-02-15 16:11:49 +0000
committer	Andree Buschmann <AndreeBuschmann@t-online.de>	2010-02-15 16:11:49 +0000
commit	51a8be1a0f11d329af99bf9c33a61af11d7a43ed (patch)
tree	aab163bd31c1ffae00a30365a5e305089a13f21a
parent	ad1ba429b98b2f4803a5113a094812091d229c3f (diff)
download	rockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.tar.gz rockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.zip

diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index 18a4f120e1..ff6e639a3b 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c
@@ -59,6 +59,7 @@ static int32_t qmf_window[48] IBSS_ATTR;
59	static VLC spectral_coeff_tab[7];	59	static VLC spectral_coeff_tab[7];
60	static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;	60	static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
61		61
		62
62	/**	63	/**
63	* Matrixing within quadrature mirror synthesis filter.	64	* Matrixing within quadrature mirror synthesis filter.
64	*	65	*
@@ -91,6 +92,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
91	}	92	}
92	#endif	93	#endif
93		94
		95
94	/**	96	/**
95	* Matrixing within quadrature mirror synthesis filter.	97	* Matrixing within quadrature mirror synthesis filter.
96	*	98	*
@@ -195,6 +197,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
195	}	197	}
196	#endif	198	#endif
197		199
		200
198	/**	201	/**
199	* IMDCT windowing.	202	* IMDCT windowing.
200	*	203	*
@@ -214,6 +217,7 @@ atrac3_imdct_windowing(int32_t *buffer,
214	}	217	}
215	}	218	}
216		219
		220
217	/**	221	/**
218	* Quadrature mirror synthesis filter.	222	* Quadrature mirror synthesis filter.
219	*	223	*
@@ -240,12 +244,13 @@ static void iqmf (int32_t inlo, int32_t inhi, unsigned int nIn, int32_t *pOut,
240	memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));	244	memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
241	}	245	}
242		246
		247
243	/**	248	/**
244	* Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands	249	* Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
245	* caused by the reverse spectra of the QMF.	250	* caused by the reverse spectra of the QMF.
246	*	251	*
247	* @param pInput float input	252	* @param pInput input
248	* @param pOutput float output	253	* @param pOutput output
249	* @param odd_band 1 if the band is an odd band	254	* @param odd_band 1 if the band is an odd band
250	*/	255	*/
251		256
@@ -274,7 +279,7 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
274	uint32_t* obuf = (uint32_t*) out;	279	uint32_t* obuf = (uint32_t*) out;
275		280
276	#if ((defined(TEST) \|\| defined(SIMULATOR)) && !defined(CPU_ARM))	281	#if ((defined(TEST) \|\| defined(SIMULATOR)) && !defined(CPU_ARM))
277	off = 0; //no check for memory alignment of inbuffer	282	off = 0; /* no check for memory alignment of inbuffer */
278	#else	283	#else
279	off = (intptr_t)inbuffer & 3;	284	off = (intptr_t)inbuffer & 3;
280	#endif /* TEST */	285	#endif /* TEST */
@@ -306,6 +311,7 @@ static void init_atrac3_transforms(void) {
306	}	311	}
307	}	312	}
308		313
		314
309	/**	315	/**
310	* Mantissa decoding	316	* Mantissa decoding
311	*	317	*
@@ -338,7 +344,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
338	} else {	344	} else {
339	for (cnt = 0; cnt < numCodes; cnt++) {	345	for (cnt = 0; cnt < numCodes; cnt++) {
340	if (numBits)	346	if (numBits)
341	code = get_bits(gb, numBits); //numBits is always 4 in this case	347	code = get_bits(gb, numBits); /* numBits is always 4 in this case */
342	else	348	else
343	code = 0;	349	code = 0;
344	mantissas[cnt*2] = seTab_0[code >> 2];	350	mantissas[cnt*2] = seTab_0[code >> 2];
@@ -366,6 +372,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
366	}	372	}
367	}	373	}
368		374
		375
369	/**	376	/**
370	* Restore the quantized band spectrum coefficients	377	* Restore the quantized band spectrum coefficients
371	*	378	*
@@ -382,8 +389,8 @@ int decodeSpectrum (GetBitContext gb, int32_t pOut)
382	int mantissas[128];	389	int mantissas[128];
383	int32_t SF;	390	int32_t SF;
384		391
385	numSubbands = get_bits(gb, 5); // number of coded subbands	392	numSubbands = get_bits(gb, 5); /* number of coded subbands */
386	codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC	393	codingMode = get_bits1(gb); /* coding Mode: 0 - VLC/ 1-CLC */
387		394
388	/* Get the VLC selector table for the subbands, 0 means not coded. */	395	/* Get the VLC selector table for the subbands, 0 means not coded. */
389	for (cnt = 0; cnt <= numSubbands; cnt++)	396	for (cnt = 0; cnt <= numSubbands; cnt++)
@@ -437,6 +444,7 @@ int decodeSpectrum (GetBitContext gb, int32_t pOut)
437	return numSubbands;	444	return numSubbands;
438	}	445	}
439		446
		447
440	/**	448	/**
441	* Restore the quantized tonal components	449	* Restore the quantized tonal components
442	*	450	*
@@ -517,6 +525,7 @@ static int decodeTonalComponents (GetBitContext gb, tonal_component pComponent
517	return component_count;	525	return component_count;
518	}	526	}
519		527
		528
520	/**	529	/**
521	* Decode gain parameters for the coded bands	530	* Decode gain parameters for the coded bands
522	*	531	*
@@ -554,21 +563,125 @@ static int decodeGainControl (GetBitContext gb, gain_block pGb, int numBands)
554	return 0;	563	return 0;
555	}	564	}
556		565
		566
		567	/**
		568	* Apply fix (constant) gain and overlap for sample[start...255].
		569	*
		570	* @param pIn input buffer
		571	* @param pPrev previous buffer to perform overlap against
		572	* @param pOut output buffer
		573	* @param start index to start with (always a multiple of 8)
		574	* @param gain gain to apply
		575	*/
		576
		577	static void applyFixGain (int32_t pIn, int32_t pPrev, int32_t *pOut,
		578	int32_t start, int32_t gain)
		579	{
		580	int32_t i = start;
		581
		582	/* start is always a multiple of 8 and therefore allows us to unroll the
		583	* loop to 8 calculation per loop
		584	*/
		585	if (ONE_16 == gain) {
		586	/* gain1 = 1.0 -> no multiplication needed, just adding */
		587	/* Remark: This path is called >90%. */
		588	do {
		589	pOut[i] = pIn[i] + pPrev[i]; i++;
		590	pOut[i] = pIn[i] + pPrev[i]; i++;
		591	pOut[i] = pIn[i] + pPrev[i]; i++;
		592	pOut[i] = pIn[i] + pPrev[i]; i++;
		593	pOut[i] = pIn[i] + pPrev[i]; i++;
		594	pOut[i] = pIn[i] + pPrev[i]; i++;
		595	pOut[i] = pIn[i] + pPrev[i]; i++;
		596	pOut[i] = pIn[i] + pPrev[i]; i++;
		597	} while (i<256);
		598	} else {
		599	/* gain1 != 1.0 -> we need to do a multiplication */
		600	/* Remark: This path is called seldom. */
		601	do {
		602	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		603	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		604	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		605	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		606	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		607	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		608	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		609	pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
		610	} while (i<256);
		611	}
		612	}
		613
		614
		615	/**
		616	* Apply variable gain and overlap. Returns sample index after applying gain,
		617	* resulting sample index is always a multiple of 8.
		618	*
		619	* @param pIn input buffer
		620	* @param pPrev previous buffer to perform overlap against
		621	* @param pOut output buffer
		622	* @param start index to start with (always a multiple of 8)
		623	* @param end end index for first loop (always a multiple of 8)
		624	* @param gain1 current bands gain to apply
		625	* @param gain2 next bands gain to apply
		626	* @param gain_inc stepwise adaption from gain1 to gain2
		627	*/
		628
		629	static int applyVariableGain (int32_t pIn, int32_t pPrev, int32_t *pOut,
		630	int32_t start, int32_t end,
		631	int32_t gain1, int32_t gain2, int32_t gain_inc)
		632	{
		633	int32_t i = start;
		634
		635	/* Apply fix gains until end index is reached */
		636	do {
		637	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		638	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		639	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		640	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		641	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		642	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		643	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		644	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		645	} while (i < end);
		646
		647	/* Interpolation is done over next eight samples */
		648	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		649	gain2 = fixmul16(gain2, gain_inc);
		650	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		651	gain2 = fixmul16(gain2, gain_inc);
		652	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		653	gain2 = fixmul16(gain2, gain_inc);
		654	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		655	gain2 = fixmul16(gain2, gain_inc);
		656	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		657	gain2 = fixmul16(gain2, gain_inc);
		658	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		659	gain2 = fixmul16(gain2, gain_inc);
		660	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		661	gain2 = fixmul16(gain2, gain_inc);
		662	pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
		663	gain2 = fixmul16(gain2, gain_inc);
		664
		665	return i;
		666	}
		667
		668
557	/**	669	/**
558	* Apply gain parameters and perform the MDCT overlapping part	670	* Apply gain parameters and perform the MDCT overlapping part
559	*	671	*
560	* @param pIn input float buffer	672	* @param pIn input buffer
561	* @param pPrev previous float buffer to perform overlap against	673	* @param pPrev previous buffer to perform overlap against
562	* @param pOut output float buffer	674	* @param pOut output buffer
563	* @param pGain1 current band gain info	675	* @param pGain1 current band gain info
564	* @param pGain2 next band gain info	676	* @param pGain2 next band gain info
565	*/	677	*/
566		678
567	static void gainCompensateAndOverlap (int32_t pIn, int32_t pPrev, int32_t pOut, gain_info pGain1, gain_info *pGain2)	679	static void gainCompensateAndOverlap (int32_t pIn, int32_t pPrev, int32_t *pOut,
		680	gain_info pGain1, gain_info pGain2)
568	{	681	{
569	/* gain compensation function */	682	/* gain compensation function */
570	int32_t gain1, gain2, gain_inc;	683	int32_t gain1, gain2, gain_inc;
571	int cnt, numdata, nsample, startLoc, endLoc;	684	int cnt, numdata, nsample, startLoc;
572		685
573	if (pGain2->num_gain_data == 0)	686	if (pGain2->num_gain_data == 0)
574	gain1 = ONE_16;	687	gain1 = ONE_16;
@@ -576,41 +689,35 @@ static void gainCompensateAndOverlap (int32_t pIn, int32_t pPrev, int32_t *pOu
576	gain1 = gain_tab1[pGain2->levcode[0]];	689	gain1 = gain_tab1[pGain2->levcode[0]];
577		690
578	if (pGain1->num_gain_data == 0) {	691	if (pGain1->num_gain_data == 0) {
579	for (cnt = 0; cnt < 256; cnt++)	692	/* Remark: This path is called >90%. */
580	pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt];	693	/* Apply gain for all samples from 0...255 */
		694	applyFixGain(pIn, pPrev, pOut, 0, gain1);
581	} else {	695	} else {
		696	/* Remark: This path is called seldom. */
582	numdata = pGain1->num_gain_data;	697	numdata = pGain1->num_gain_data;
583	pGain1->loccode[numdata] = 32;	698	pGain1->loccode[numdata] = 32;
584	pGain1->levcode[numdata] = 4;	699	pGain1->levcode[numdata] = 4;
585		700
586	nsample = 0; // current sample = 0	701	nsample = 0; /* starting loop with =0 */
587		702
588	for (cnt = 0; cnt < numdata; cnt++) {	703	for (cnt = 0; cnt < numdata; cnt++) {
589	startLoc = pGain1->loccode[cnt] * 8;	704	startLoc = pGain1->loccode[cnt] * 8;
590	endLoc = startLoc + 8;
591		705
592	gain2 = gain_tab1[pGain1->levcode[cnt]];	706	gain2 = gain_tab1[pGain1->levcode[cnt]];
593	gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];	707	gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];
594		708
595	/* interpolate */	709	/* Apply variable gain (gain1 -> gain2) to samples */
596	for (; nsample < startLoc; nsample++)	710	nsample = applyVariableGain(pIn, pPrev, pOut, nsample, startLoc, gain1, gain2, gain_inc);
597	pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
598
599	/* interpolation is done over eight samples */
600	for (; nsample < endLoc; nsample++) {
601	pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
602	gain2 = fixmul16(gain2, gain_inc);
603	}
604	}	711	}
605		712	/* Apply gain for the residual samples from nsample...255 */
606	for (; nsample < 256; nsample++)	713	applyFixGain(pIn, pPrev, pOut, nsample, gain1);
607	pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
608	}	714	}
609		715
610	/* Delay for the overlapping part. */	716	/* Delay for the overlapping part. */
611	memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));	717	memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
612	}	718	}
613		719
		720
614	/**	721	/**
615	* Combine the tonal band spectrum and regular band spectrum	722	* Combine the tonal band spectrum and regular band spectrum
616	* Return position of the last tonal coefficient	723	* Return position of the last tonal coefficient
@@ -639,6 +746,7 @@ static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_comp
639	return lastPos;	746	return lastPos;
640	}	747	}
641		748
		749
642	/**	750	/**
643	* Linear equidistant interpolation between two points x and y. 7 interpolation	751	* Linear equidistant interpolation between two points x and y. 7 interpolation
644	* points can be calculated. Result is scaled by <<16.	752	* points can be calculated. Result is scaled by <<16.
@@ -712,7 +820,7 @@ static void reverseMatrixing(int32_t su1, int32_t su2, int pPrevCode, int pC
712	}	820	}
713	break;	821	break;
714	default:	822	default:
715	//assert(0);	823	/* assert(0) */;
716	break;	824	break;
717	}	825	}
718	}	826	}
@@ -755,18 +863,16 @@ static void channelWeighting (int32_t su1, int32_t su2, int *p3)
755	}	863	}
756	}	864	}
757		865
758
759	/**	866	/**
760	* Decode a Sound Unit	867	* Decode a Sound Unit
761	*	868	*
762	* @param gb the GetBit context	869	* @param gb the GetBit context
763	* @param pSnd the channel unit to be used	870	* @param pSnd the channel unit to be used
764	* @param pOut the decoded samples before IQMF in float representation	871	* @param pOut the decoded samples before IQMF
765	* @param channelNum channel number	872	* @param channelNum channel number
766	* @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono)	873	* @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono)
767	*/	874	*/
768		875
769
770	static int decodeChannelSoundUnit (GetBitContext gb, channel_unit pSnd, int32_t *pOut, int channelNum, int codingMode)	876	static int decodeChannelSoundUnit (GetBitContext gb, channel_unit pSnd, int32_t *pOut, int channelNum, int codingMode)
771	{	877	{
772	int band, result=0, numSubbands, lastTonal, numBands;	878	int band, result=0, numSubbands, lastTonal, numBands;
@@ -807,8 +913,9 @@ static int decodeChannelSoundUnit (GetBitContext gb, channel_unit pSnd, int32_
807	/* Perform the IMDCT step without overlapping. */	913	/* Perform the IMDCT step without overlapping. */
808	if (band <= numBands) {	914	if (band <= numBands) {
809	IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf);	915	IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf);
810	} else	916	} else {
811	memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));	917	memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
		918	}
812		919
813	/* gain compensation and overlapping */	920	/* gain compensation and overlapping */
814	gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band256]), &(pOut[band256]),	921	gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band256]), &(pOut[band256]),
@@ -982,12 +1089,12 @@ int atrac3_decode_init(ATRAC3Context q, RMContext rmctx)
982	/* Take care of the codec-specific extradata. */	1089	/* Take care of the codec-specific extradata. */
983	if (rmctx->extradata_size == 14) {	1090	if (rmctx->extradata_size == 14) {
984	/* Parse the extradata, WAV format */	1091	/* Parse the extradata, WAV format */
985	DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); //Unknown value always 1	1092	DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); /* Unknown value always 1 */
986	q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]);	1093	q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]);
987	q->codingMode = rm_get_uint16le(&edata_ptr[6]);	1094	q->codingMode = rm_get_uint16le(&edata_ptr[6]);
988	DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); //Dupe of coding mode	1095	DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); /* Dupe of coding mode */
989	q->frame_factor = rm_get_uint16le(&edata_ptr[10]); //Unknown always 1	1096	q->frame_factor = rm_get_uint16le(&edata_ptr[10]); /* Unknown always 1 */
990	DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); //Unknown always 0	1097	DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); /* Unknown always 0 */
991		1098
992	/* setup */	1099	/* setup */
993	q->samples_per_frame = 1024 * q->channels;	1100	q->samples_per_frame = 1024 * q->channels;