summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-02-15 16:11:49 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-02-15 16:11:49 +0000
commit51a8be1a0f11d329af99bf9c33a61af11d7a43ed (patch)
treeaab163bd31c1ffae00a30365a5e305089a13f21a
parentad1ba429b98b2f4803a5113a094812091d229c3f (diff)
downloadrockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.tar.gz
rockbox-51a8be1a0f11d329af99bf9c33a61af11d7a43ed.zip
Further optimization of atrac3 codec. Refacturate gainCompensateAndOverlap(), avoid multiplication if not needed, unroll loops. Speeds up codec by 1.1 MHz (+2%) on ARM.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24668 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libatrac/atrac3.c183
1 files changed, 145 insertions, 38 deletions
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c
index 18a4f120e1..ff6e639a3b 100644
--- a/apps/codecs/libatrac/atrac3.c
+++ b/apps/codecs/libatrac/atrac3.c
@@ -59,6 +59,7 @@ static int32_t qmf_window[48] IBSS_ATTR;
59static VLC spectral_coeff_tab[7]; 59static VLC spectral_coeff_tab[7];
60static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; 60static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
61 61
62
62/** 63/**
63 * Matrixing within quadrature mirror synthesis filter. 64 * Matrixing within quadrature mirror synthesis filter.
64 * 65 *
@@ -91,6 +92,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
91 } 92 }
92#endif 93#endif
93 94
95
94/** 96/**
95 * Matrixing within quadrature mirror synthesis filter. 97 * Matrixing within quadrature mirror synthesis filter.
96 * 98 *
@@ -195,6 +197,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
195 } 197 }
196#endif 198#endif
197 199
200
198/** 201/**
199 * IMDCT windowing. 202 * IMDCT windowing.
200 * 203 *
@@ -214,6 +217,7 @@ atrac3_imdct_windowing(int32_t *buffer,
214 } 217 }
215} 218}
216 219
220
217/** 221/**
218 * Quadrature mirror synthesis filter. 222 * Quadrature mirror synthesis filter.
219 * 223 *
@@ -240,12 +244,13 @@ static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut,
240 memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t)); 244 memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
241} 245}
242 246
247
243/** 248/**
244 * Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands 249 * Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
245 * caused by the reverse spectra of the QMF. 250 * caused by the reverse spectra of the QMF.
246 * 251 *
247 * @param pInput float input 252 * @param pInput input
248 * @param pOutput float output 253 * @param pOutput output
249 * @param odd_band 1 if the band is an odd band 254 * @param odd_band 1 if the band is an odd band
250 */ 255 */
251 256
@@ -274,7 +279,7 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
274 uint32_t* obuf = (uint32_t*) out; 279 uint32_t* obuf = (uint32_t*) out;
275 280
276#if ((defined(TEST) || defined(SIMULATOR)) && !defined(CPU_ARM)) 281#if ((defined(TEST) || defined(SIMULATOR)) && !defined(CPU_ARM))
277 off = 0; //no check for memory alignment of inbuffer 282 off = 0; /* no check for memory alignment of inbuffer */
278#else 283#else
279 off = (intptr_t)inbuffer & 3; 284 off = (intptr_t)inbuffer & 3;
280#endif /* TEST */ 285#endif /* TEST */
@@ -306,6 +311,7 @@ static void init_atrac3_transforms(void) {
306 } 311 }
307} 312}
308 313
314
309/** 315/**
310 * Mantissa decoding 316 * Mantissa decoding
311 * 317 *
@@ -338,7 +344,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
338 } else { 344 } else {
339 for (cnt = 0; cnt < numCodes; cnt++) { 345 for (cnt = 0; cnt < numCodes; cnt++) {
340 if (numBits) 346 if (numBits)
341 code = get_bits(gb, numBits); //numBits is always 4 in this case 347 code = get_bits(gb, numBits); /* numBits is always 4 in this case */
342 else 348 else
343 code = 0; 349 code = 0;
344 mantissas[cnt*2] = seTab_0[code >> 2]; 350 mantissas[cnt*2] = seTab_0[code >> 2];
@@ -366,6 +372,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
366 } 372 }
367} 373}
368 374
375
369/** 376/**
370 * Restore the quantized band spectrum coefficients 377 * Restore the quantized band spectrum coefficients
371 * 378 *
@@ -382,8 +389,8 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
382 int mantissas[128]; 389 int mantissas[128];
383 int32_t SF; 390 int32_t SF;
384 391
385 numSubbands = get_bits(gb, 5); // number of coded subbands 392 numSubbands = get_bits(gb, 5); /* number of coded subbands */
386 codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC 393 codingMode = get_bits1(gb); /* coding Mode: 0 - VLC/ 1-CLC */
387 394
388 /* Get the VLC selector table for the subbands, 0 means not coded. */ 395 /* Get the VLC selector table for the subbands, 0 means not coded. */
389 for (cnt = 0; cnt <= numSubbands; cnt++) 396 for (cnt = 0; cnt <= numSubbands; cnt++)
@@ -437,6 +444,7 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
437 return numSubbands; 444 return numSubbands;
438} 445}
439 446
447
440/** 448/**
441 * Restore the quantized tonal components 449 * Restore the quantized tonal components
442 * 450 *
@@ -517,6 +525,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
517 return component_count; 525 return component_count;
518} 526}
519 527
528
520/** 529/**
521 * Decode gain parameters for the coded bands 530 * Decode gain parameters for the coded bands
522 * 531 *
@@ -554,21 +563,125 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
554 return 0; 563 return 0;
555} 564}
556 565
566
567/**
568 * Apply fix (constant) gain and overlap for sample[start...255].
569 *
570 * @param pIn input buffer
571 * @param pPrev previous buffer to perform overlap against
572 * @param pOut output buffer
573 * @param start index to start with (always a multiple of 8)
574 * @param gain gain to apply
575 */
576
577static void applyFixGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
578 int32_t start, int32_t gain)
579{
580 int32_t i = start;
581
582 /* start is always a multiple of 8 and therefore allows us to unroll the
583 * loop to 8 calculation per loop
584 */
585 if (ONE_16 == gain) {
586 /* gain1 = 1.0 -> no multiplication needed, just adding */
587 /* Remark: This path is called >90%. */
588 do {
589 pOut[i] = pIn[i] + pPrev[i]; i++;
590 pOut[i] = pIn[i] + pPrev[i]; i++;
591 pOut[i] = pIn[i] + pPrev[i]; i++;
592 pOut[i] = pIn[i] + pPrev[i]; i++;
593 pOut[i] = pIn[i] + pPrev[i]; i++;
594 pOut[i] = pIn[i] + pPrev[i]; i++;
595 pOut[i] = pIn[i] + pPrev[i]; i++;
596 pOut[i] = pIn[i] + pPrev[i]; i++;
597 } while (i<256);
598 } else {
599 /* gain1 != 1.0 -> we need to do a multiplication */
600 /* Remark: This path is called seldom. */
601 do {
602 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
603 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
604 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
605 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
606 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
607 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
608 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
609 pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
610 } while (i<256);
611 }
612}
613
614
615/**
616 * Apply variable gain and overlap. Returns sample index after applying gain,
617 * resulting sample index is always a multiple of 8.
618 *
619 * @param pIn input buffer
620 * @param pPrev previous buffer to perform overlap against
621 * @param pOut output buffer
622 * @param start index to start with (always a multiple of 8)
623 * @param end end index for first loop (always a multiple of 8)
624 * @param gain1 current bands gain to apply
625 * @param gain2 next bands gain to apply
626 * @param gain_inc stepwise adaption from gain1 to gain2
627 */
628
629static int applyVariableGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
630 int32_t start, int32_t end,
631 int32_t gain1, int32_t gain2, int32_t gain_inc)
632{
633 int32_t i = start;
634
635 /* Apply fix gains until end index is reached */
636 do {
637 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
638 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
639 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
640 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
641 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
642 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
643 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
644 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
645 } while (i < end);
646
647 /* Interpolation is done over next eight samples */
648 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
649 gain2 = fixmul16(gain2, gain_inc);
650 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
651 gain2 = fixmul16(gain2, gain_inc);
652 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
653 gain2 = fixmul16(gain2, gain_inc);
654 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
655 gain2 = fixmul16(gain2, gain_inc);
656 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
657 gain2 = fixmul16(gain2, gain_inc);
658 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
659 gain2 = fixmul16(gain2, gain_inc);
660 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
661 gain2 = fixmul16(gain2, gain_inc);
662 pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
663 gain2 = fixmul16(gain2, gain_inc);
664
665 return i;
666}
667
668
557/** 669/**
558 * Apply gain parameters and perform the MDCT overlapping part 670 * Apply gain parameters and perform the MDCT overlapping part
559 * 671 *
560 * @param pIn input float buffer 672 * @param pIn input buffer
561 * @param pPrev previous float buffer to perform overlap against 673 * @param pPrev previous buffer to perform overlap against
562 * @param pOut output float buffer 674 * @param pOut output buffer
563 * @param pGain1 current band gain info 675 * @param pGain1 current band gain info
564 * @param pGain2 next band gain info 676 * @param pGain2 next band gain info
565 */ 677 */
566 678
567static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2) 679static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
680 gain_info *pGain1, gain_info *pGain2)
568{ 681{
569 /* gain compensation function */ 682 /* gain compensation function */
570 int32_t gain1, gain2, gain_inc; 683 int32_t gain1, gain2, gain_inc;
571 int cnt, numdata, nsample, startLoc, endLoc; 684 int cnt, numdata, nsample, startLoc;
572 685
573 if (pGain2->num_gain_data == 0) 686 if (pGain2->num_gain_data == 0)
574 gain1 = ONE_16; 687 gain1 = ONE_16;
@@ -576,41 +689,35 @@ static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOu
576 gain1 = gain_tab1[pGain2->levcode[0]]; 689 gain1 = gain_tab1[pGain2->levcode[0]];
577 690
578 if (pGain1->num_gain_data == 0) { 691 if (pGain1->num_gain_data == 0) {
579 for (cnt = 0; cnt < 256; cnt++) 692 /* Remark: This path is called >90%. */
580 pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt]; 693 /* Apply gain for all samples from 0...255 */
694 applyFixGain(pIn, pPrev, pOut, 0, gain1);
581 } else { 695 } else {
696 /* Remark: This path is called seldom. */
582 numdata = pGain1->num_gain_data; 697 numdata = pGain1->num_gain_data;
583 pGain1->loccode[numdata] = 32; 698 pGain1->loccode[numdata] = 32;
584 pGain1->levcode[numdata] = 4; 699 pGain1->levcode[numdata] = 4;
585 700
586 nsample = 0; // current sample = 0 701 nsample = 0; /* starting loop with =0 */
587 702
588 for (cnt = 0; cnt < numdata; cnt++) { 703 for (cnt = 0; cnt < numdata; cnt++) {
589 startLoc = pGain1->loccode[cnt] * 8; 704 startLoc = pGain1->loccode[cnt] * 8;
590 endLoc = startLoc + 8;
591 705
592 gain2 = gain_tab1[pGain1->levcode[cnt]]; 706 gain2 = gain_tab1[pGain1->levcode[cnt]];
593 gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15]; 707 gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];
594 708
595 /* interpolate */ 709 /* Apply variable gain (gain1 -> gain2) to samples */
596 for (; nsample < startLoc; nsample++) 710 nsample = applyVariableGain(pIn, pPrev, pOut, nsample, startLoc, gain1, gain2, gain_inc);
597 pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
598
599 /* interpolation is done over eight samples */
600 for (; nsample < endLoc; nsample++) {
601 pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
602 gain2 = fixmul16(gain2, gain_inc);
603 }
604 } 711 }
605 712 /* Apply gain for the residual samples from nsample...255 */
606 for (; nsample < 256; nsample++) 713 applyFixGain(pIn, pPrev, pOut, nsample, gain1);
607 pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
608 } 714 }
609 715
610 /* Delay for the overlapping part. */ 716 /* Delay for the overlapping part. */
611 memcpy(pPrev, &pIn[256], 256*sizeof(int32_t)); 717 memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
612} 718}
613 719
720
614/** 721/**
615 * Combine the tonal band spectrum and regular band spectrum 722 * Combine the tonal band spectrum and regular band spectrum
616 * Return position of the last tonal coefficient 723 * Return position of the last tonal coefficient
@@ -639,6 +746,7 @@ static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_comp
639 return lastPos; 746 return lastPos;
640} 747}
641 748
749
642/** 750/**
643 * Linear equidistant interpolation between two points x and y. 7 interpolation 751 * Linear equidistant interpolation between two points x and y. 7 interpolation
644 * points can be calculated. Result is scaled by <<16. 752 * points can be calculated. Result is scaled by <<16.
@@ -712,7 +820,7 @@ static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pC
712 } 820 }
713 break; 821 break;
714 default: 822 default:
715 //assert(0); 823 /* assert(0) */;
716 break; 824 break;
717 } 825 }
718 } 826 }
@@ -755,18 +863,16 @@ static void channelWeighting (int32_t *su1, int32_t *su2, int *p3)
755 } 863 }
756} 864}
757 865
758
759/** 866/**
760 * Decode a Sound Unit 867 * Decode a Sound Unit
761 * 868 *
762 * @param gb the GetBit context 869 * @param gb the GetBit context
763 * @param pSnd the channel unit to be used 870 * @param pSnd the channel unit to be used
764 * @param pOut the decoded samples before IQMF in float representation 871 * @param pOut the decoded samples before IQMF
765 * @param channelNum channel number 872 * @param channelNum channel number
766 * @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono) 873 * @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono)
767 */ 874 */
768 875
769
770static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode) 876static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode)
771{ 877{
772 int band, result=0, numSubbands, lastTonal, numBands; 878 int band, result=0, numSubbands, lastTonal, numBands;
@@ -807,8 +913,9 @@ static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_
807 /* Perform the IMDCT step without overlapping. */ 913 /* Perform the IMDCT step without overlapping. */
808 if (band <= numBands) { 914 if (band <= numBands) {
809 IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf); 915 IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf);
810 } else 916 } else {
811 memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t)); 917 memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
918 }
812 919
813 /* gain compensation and overlapping */ 920 /* gain compensation and overlapping */
814 gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), 921 gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
@@ -982,12 +1089,12 @@ int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
982 /* Take care of the codec-specific extradata. */ 1089 /* Take care of the codec-specific extradata. */
983 if (rmctx->extradata_size == 14) { 1090 if (rmctx->extradata_size == 14) {
984 /* Parse the extradata, WAV format */ 1091 /* Parse the extradata, WAV format */
985 DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); //Unknown value always 1 1092 DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); /* Unknown value always 1 */
986 q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]); 1093 q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]);
987 q->codingMode = rm_get_uint16le(&edata_ptr[6]); 1094 q->codingMode = rm_get_uint16le(&edata_ptr[6]);
988 DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); //Dupe of coding mode 1095 DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); /* Dupe of coding mode */
989 q->frame_factor = rm_get_uint16le(&edata_ptr[10]); //Unknown always 1 1096 q->frame_factor = rm_get_uint16le(&edata_ptr[10]); /* Unknown always 1 */
990 DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); //Unknown always 0 1097 DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); /* Unknown always 0 */
991 1098
992 /* setup */ 1099 /* setup */
993 q->samples_per_frame = 1024 * q->channels; 1100 q->samples_per_frame = 1024 * q->channels;