summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2007-07-09 02:48:32 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2007-07-09 02:48:32 +0000
commit4b1cf3ea44060cd4c6b9a867e2f706f9bb570d80 (patch)
tree082edd6cf481f0308cee8c681833d307dd34d6a7
parent5a1999eacd917e82a2725e1e0ee8f853dd14b93a (diff)
downloadrockbox-4b1cf3ea44060cd4c6b9a867e2f706f9bb570d80.tar.gz
rockbox-4b1cf3ea44060cd4c6b9a867e2f706f9bb570d80.zip
Compute MDCT in place. Make that place within IRAM. Gives nice speed boost on ARM. Probably make a huge difference on Coldfire.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13828 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwma/wmadeci.c36
1 files changed, 18 insertions, 18 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index a1326abbfe..dc45e64042 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -126,7 +126,7 @@ uint16_t *runtabarray[2], *levtabarray[2];
126 126
127uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 127uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336
128 128
129FFTComplex mdct_tmp[BLOCK_MAX_SIZE] IBSS_ATTR; /* temporary storage for imdct */ 129FFTComplex mdct_tmp[1] ; /* dummy var */
130 130
131//may also be too large by ~ 1KB each? 131//may also be too large by ~ 1KB each?
132static VLC_TYPE vlcbuf1[6144][2]; 132static VLC_TYPE vlcbuf1[6144][2];
@@ -555,14 +555,14 @@ fail:
555 */ 555 */
556void ff_imdct_calc(MDCTContext *s, 556void ff_imdct_calc(MDCTContext *s,
557 fixed32 *output, 557 fixed32 *output,
558 const fixed32 *input, 558 fixed32 *input)
559 FFTComplex *tmp)
560{ 559{
561 int k, n8, n4, n2, n, j,scale; 560 int k, n8, n4, n2, n, j,scale;
562 const fixed32 *tcos = s->tcos; 561 const fixed32 *tcos = s->tcos;
563 const fixed32 *tsin = s->tsin; 562 const fixed32 *tsin = s->tsin;
564 const fixed32 *in1, *in2; 563 const fixed32 *in1, *in2;
565 FFTComplex *z = (FFTComplex *)tmp; 564 FFTComplex *z1 = (FFTComplex *)output;
565 FFTComplex *z2 = (FFTComplex *)input;
566 int revtabshift = 12 - s->nbits; 566 int revtabshift = 12 - s->nbits;
567 567
568 n = 1 << s->nbits; 568 n = 1 << s->nbits;
@@ -579,31 +579,31 @@ void ff_imdct_calc(MDCTContext *s,
579 for(k = 0; k < n4; k++) 579 for(k = 0; k < n4; k++)
580 { 580 {
581 j=revtab0[k<<revtabshift]; 581 j=revtab0[k<<revtabshift];
582 CMUL(&z[j].re, &z[j].im, *in2, *in1, tcos[k], tsin[k]); 582 CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
583 in1 += 2; 583 in1 += 2;
584 in2 -= 2; 584 in2 -= 2;
585 } 585 }
586 586
587 scale = fft_calc_unscaled(&s->fft, z); 587 scale = fft_calc_unscaled(&s->fft, z1);
588 588
589 /* post rotation + reordering */ 589 /* post rotation + reordering */
590 590
591 for(k = 0; k < n4; k++) 591 for(k = 0; k < n4; k++)
592 { 592 {
593 CMUL(&z[k].re, &z[k].im, (z[k].re), (z[k].im), tcos[k], tsin[k]); 593 CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
594 } 594 }
595 595
596 for(k = 0; k < n8; k++) 596 for(k = 0; k < n8; k++)
597 { 597 {
598 fixed32 r1,r2,r3,r4,r1n,r2n,r3n; 598 fixed32 r1,r2,r3,r4,r1n,r2n,r3n;
599 599
600 r1 = z[n8 + k].im; 600 r1 = z2[n8 + k].im;
601 r1n = r1 * -1; 601 r1n = r1 * -1;
602 r2 = z[n8-1-k].re; 602 r2 = z2[n8-1-k].re;
603 r2n = r2 * -1; 603 r2n = r2 * -1;
604 r3 = z[k+n8].re; 604 r3 = z2[k+n8].re;
605 r3n = r3 * -1; 605 r3n = r3 * -1;
606 r4 = z[n8-k-1].im; 606 r4 = z2[n8-k-1].im;
607 607
608 output[2*k] = r1n; 608 output[2*k] = r1n;
609 output[n2-1-2*k] = r1; 609 output[n2-1-2*k] = r1;
@@ -1748,6 +1748,7 @@ static int wma_decode_block(WMADecodeContext *s)
1748 { 1748 {
1749 fixed32 a, b; 1749 fixed32 a, b;
1750 int i; 1750 int i;
1751 fixed32 (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE] = (s->coefs);
1751 1752
1752 /* nominal case for ms stereo: we do it before mdct */ 1753 /* nominal case for ms stereo: we do it before mdct */
1753 /* no need to optimize this case because it should almost 1754 /* no need to optimize this case because it should almost
@@ -1760,10 +1761,10 @@ static int wma_decode_block(WMADecodeContext *s)
1760 1761
1761 for(i = 0; i < s->block_len; ++i) 1762 for(i = 0; i < s->block_len; ++i)
1762 { 1763 {
1763 a = (*s->coefs)[0][i]; 1764 a = (*coefs)[0][i];
1764 b = (*s->coefs)[1][i]; 1765 b = (*coefs)[1][i];
1765 (*s->coefs)[0][i] = a + b; 1766 (*coefs)[0][i] = a + b;
1766 (*s->coefs)[1][i] = a - b; 1767 (*coefs)[1][i] = a - b;
1767 } 1768 }
1768 } 1769 }
1769 1770
@@ -1771,7 +1772,7 @@ static int wma_decode_block(WMADecodeContext *s)
1771 { 1772 {
1772 if (s->channel_coded[ch]) 1773 if (s->channel_coded[ch])
1773 { 1774 {
1774 static fixed32 output[BLOCK_MAX_SIZE * 2]; 1775 static fixed32 output[BLOCK_MAX_SIZE * 2] IBSS_ATTR;
1775 1776
1776 int n4, index, n; 1777 int n4, index, n;
1777 1778
@@ -1780,8 +1781,7 @@ static int wma_decode_block(WMADecodeContext *s)
1780 1781
1781 ff_imdct_calc(&s->mdct_ctx[bsize], 1782 ff_imdct_calc(&s->mdct_ctx[bsize],
1782 output, 1783 output,
1783 (*(s->coefs))[ch], 1784 (*(s->coefs))[ch]);
1784 s->mdct_tmp);
1785 1785
1786 1786
1787 /* add in the frame */ 1787 /* add in the frame */