Clean up iMDCT coefficient calculations.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14451 a1c6a512-1295-4272-9138-f99709370657
author: Michael Giacomelli <giac2000@hotmail.com> 2007-08-24 20:02:50 +0000
committer: Michael Giacomelli <giac2000@hotmail.com> 2007-08-24 20:02:50 +0000
commit: 153d74443ae4f187fae0432346d0346bddfe46f3 (patch)
tree: df8d1f2776228bb02f93da6d179b93e961b44c45
parent: c5683b3c18cd64bff14d2213f827e966d9b7982a (diff)
download: rockbox-153d74443ae4f187fae0432346d0346bddfe46f3.tar.gz
rockbox-153d74443ae4f187fae0432346d0346bddfe46f3.zip
1 files changed, 30 insertions, 28 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 2796815d72..3b81669d8f 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -30,7 +30,7 @@
 #include "bitstream.h"
-#define VLCBITS 7               /*7 is the lowest without glitching*/
+#define VLCBITS 7       /*7 is the lowest without glitching*/
 #define VLCMAX ((22+VLCBITS-1)/VLCBITS)
 #define EXPVLCBITS 7
@@ -390,7 +390,7 @@ void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, const fixed32 *windo
                  "smull r8, r9, r0, r4;"
                  "ldmia %[dst], {r0, r4};"
-                                  "add   r0, r0, r9, lsl #1;"  /* *dst=*dst+(r9<<1)*/
+                  "add   r0, r0, r9, lsl #1;"  /* *dst=*dst+(r9<<1)*/
                  "smull r8, r9, r1, r5;"
                  "add   r1, r4, r9, lsl #1;"
                  "stmia %[dst]!, {r0, r1};"
@@ -433,7 +433,7 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
  * We ensure that when the windows overlap their squared sum
  * is always 1 (MDCT reconstruction rule).
  *
-  *     The Vorbis I spec has a great diagram explaining this process.
+  * The Vorbis I spec has a great diagram explaining this process.
  * See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html
  */
 static void wma_window(WMADecodeContext *s, fixed32 *in, fixed32 *out)
@@ -450,7 +450,7 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
         vector_fmul_add_add(out, in, s->windows[bsize], block_len);
     } else {
-                 /*previous block was smaller or the same size, so use it's size to set the window length*/
+         /*previous block was smaller or the same size, so use it's size to set the window length*/
         block_len = 1 << s->prev_block_len_bits;
         /*find the middle of the two overlapped blocks, this will be the first overlapped sample*/
         n = (s->block_len - block_len) / 2;
@@ -460,10 +460,10 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
         memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));
     }
-        /* Advance to the end of the current block and prepare to window it for the next block.
+    /* Advance to the end of the current block and prepare to window it for the next block.
-         * Since the window function needs to be reversed, we do it backwards starting with the
+     * Since the window function needs to be reversed, we do it backwards starting with the
-         * last sample and moving towards the first
+     * last sample and moving towards the first
-         */
+     */
     out += s->block_len;
     in += s->block_len;
@@ -1169,7 +1169,7 @@ static int wma_decode_block(WMADecodeContext *s)
    int nb_coefs[MAX_CHANNELS];
    fixed32 mdct_norm;
-        DEBUGF("***decode_block: %d of (%d samples) (%d)\n",  s->block_num, s->frame_len, s->block_len);
+    DEBUGF("***decode_block: %d of (%d samples) (%d)\n",  s->block_num, s->frame_len, s->block_len);
   /* compute current block length */
    if (s->use_variable_block_len)
@@ -1456,14 +1456,22 @@ static int wma_decode_block(WMADecodeContext *s)
            coefs1 = s->coefs1[ch];
            exponents = s->exponents[ch];
            esize = s->exponents_bsize[ch];
-            mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
-            mult = fixmul64byfixed(mult, mdct_norm);        //what the hell?  This is actually fixed64*2^16!
            coefs = (*(s->coefs))[ch];
-               n=0;
+            n=0;
+          /*
+          *  Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could
+          *  overflow here as well, so switch to 17.15 during coefs calculation.
+          */
            if (s->use_noise_coding)
            {
+                /*TODO:  mult should be converted to 32 bit to speed up noise coding*/
+                mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
+                mult = mult* mdct_norm;        //what the hell?  This is actually fixed64*2^16!
                mult1 = mult;
                /* very low freqs : noise */
@@ -1565,29 +1573,23 @@ static int wma_decode_block(WMADecodeContext *s)
            }
            else
            {
+                /*Noise coding not used, simply convert from exp to fixed representation*/
-                /* XXX: optimize more */
+                fixed32 mult3 = (fixed32)(fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch])));
+                mult3 = fixmul32(mult3, mdct_norm);
                n = nb_coefs[ch];
+                /* XXX: optimize more, unrolling this loop in asm might be a good idea */
                for(i = 0;i < n; ++i)
                {
-      /*
+                    atemp = (coefs1[i] * mult3)>>1;
-      *  Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could
+                    *coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
-      *  overflow here as well, so switch to 17.15 now.  As a bonus, this saves us a shift later on.
+                }
-      */
-                  atemp = (fixed32)(coefs1[i]*mult>>17);
-                //this "works" in the sense that the mdcts converge
-                 //atemp= ftofix32(coefs1[i] * fixtof64(exponents[i]) * fixtof64(mult>>16));
-                  *coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
-               }
                n = s->block_len - s->coefs_end[bsize];
-                for(i = 0;i < n; ++i)
+                memset(coefs, 0, n*sizeof(fixed32));
-                    *coefs++ = 0;
            }
        }
    }
author	Michael Giacomelli <giac2000@hotmail.com>	2007-08-24 20:02:50 +0000
committer	Michael Giacomelli <giac2000@hotmail.com>	2007-08-24 20:02:50 +0000
commit	153d74443ae4f187fae0432346d0346bddfe46f3 (patch)
tree	df8d1f2776228bb02f93da6d179b93e961b44c45
parent	c5683b3c18cd64bff14d2213f827e966d9b7982a (diff)
download	rockbox-153d74443ae4f187fae0432346d0346bddfe46f3.tar.gz rockbox-153d74443ae4f187fae0432346d0346bddfe46f3.zip

diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 2796815d72..3b81669d8f 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c
@@ -30,7 +30,7 @@
30	#include "bitstream.h"	30	#include "bitstream.h"
31		31
32		32
33	#define VLCBITS 7 /7 is the lowest without glitching/	33	#define VLCBITS 7 /7 is the lowest without glitching/
34	#define VLCMAX ((22+VLCBITS-1)/VLCBITS)	34	#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
35		35
36	#define EXPVLCBITS 7	36	#define EXPVLCBITS 7
@@ -390,7 +390,7 @@ void vector_fmul_add_add(fixed32 dst, const fixed32 data, const fixed32 *windo
390	"smull r8, r9, r0, r4;"	390	"smull r8, r9, r0, r4;"
391		391
392	"ldmia %[dst], {r0, r4};"	392	"ldmia %[dst], {r0, r4};"
393	"add r0, r0, r9, lsl #1;" /* dst=dst+(r9<<1)*/	393	"add r0, r0, r9, lsl #1;" /* dst=dst+(r9<<1)*/
394	"smull r8, r9, r1, r5;"	394	"smull r8, r9, r1, r5;"
395	"add r1, r4, r9, lsl #1;"	395	"add r1, r4, r9, lsl #1;"
396	"stmia %[dst]!, {r0, r1};"	396	"stmia %[dst]!, {r0, r1};"
@@ -433,7 +433,7 @@ static inline void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const
433	* We ensure that when the windows overlap their squared sum	433	* We ensure that when the windows overlap their squared sum
434	* is always 1 (MDCT reconstruction rule).	434	* is always 1 (MDCT reconstruction rule).
435	*	435	*
436	* The Vorbis I spec has a great diagram explaining this process.	436	* The Vorbis I spec has a great diagram explaining this process.
437	* See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html	437	* See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html
438	*/	438	*/
439	static void wma_window(WMADecodeContext s, fixed32 in, fixed32 *out)	439	static void wma_window(WMADecodeContext s, fixed32 in, fixed32 *out)
@@ -450,7 +450,7 @@ static inline void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const
450	vector_fmul_add_add(out, in, s->windows[bsize], block_len);	450	vector_fmul_add_add(out, in, s->windows[bsize], block_len);
451		451
452	} else {	452	} else {
453	/previous block was smaller or the same size, so use it's size to set the window length/	453	/previous block was smaller or the same size, so use it's size to set the window length/
454	block_len = 1 << s->prev_block_len_bits;	454	block_len = 1 << s->prev_block_len_bits;
455	/find the middle of the two overlapped blocks, this will be the first overlapped sample/	455	/find the middle of the two overlapped blocks, this will be the first overlapped sample/
456	n = (s->block_len - block_len) / 2;	456	n = (s->block_len - block_len) / 2;
@@ -460,10 +460,10 @@ static inline void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const
460		460
461	memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));	461	memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));
462	}	462	}
463	/* Advance to the end of the current block and prepare to window it for the next block.	463	/* Advance to the end of the current block and prepare to window it for the next block.
464	* Since the window function needs to be reversed, we do it backwards starting with the	464	* Since the window function needs to be reversed, we do it backwards starting with the
465	* last sample and moving towards the first	465	* last sample and moving towards the first
466	*/	466	*/
467	out += s->block_len;	467	out += s->block_len;
468	in += s->block_len;	468	in += s->block_len;
469		469
@@ -1169,7 +1169,7 @@ static int wma_decode_block(WMADecodeContext *s)
1169	int nb_coefs[MAX_CHANNELS];	1169	int nb_coefs[MAX_CHANNELS];
1170	fixed32 mdct_norm;	1170	fixed32 mdct_norm;
1171		1171
1172	DEBUGF("***decode_block: %d of (%d samples) (%d)\n", s->block_num, s->frame_len, s->block_len);	1172	DEBUGF("***decode_block: %d of (%d samples) (%d)\n", s->block_num, s->frame_len, s->block_len);
1173		1173
1174	/* compute current block length */	1174	/* compute current block length */
1175	if (s->use_variable_block_len)	1175	if (s->use_variable_block_len)
@@ -1456,14 +1456,22 @@ static int wma_decode_block(WMADecodeContext *s)
1456	coefs1 = s->coefs1[ch];	1456	coefs1 = s->coefs1[ch];
1457	exponents = s->exponents[ch];	1457	exponents = s->exponents[ch];
1458	esize = s->exponents_bsize[ch];	1458	esize = s->exponents_bsize[ch];
1459	mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
1460	mult = fixmul64byfixed(mult, mdct_norm); //what the hell? This is actually fixed64*2^16!
1461	coefs = (*(s->coefs))[ch];	1459	coefs = (*(s->coefs))[ch];
1462		1460
1463	n=0;	1461	n=0;
		1462
		1463	/*
		1464	* Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could
		1465	* overflow here as well, so switch to 17.15 during coefs calculation.
		1466	*/
		1467
1464		1468
1465	if (s->use_noise_coding)	1469	if (s->use_noise_coding)
1466	{	1470	{
		1471	/TODO: mult should be converted to 32 bit to speed up noise coding/
		1472
		1473	mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
		1474	mult = mult* mdct_norm; //what the hell? This is actually fixed64*2^16!
1467	mult1 = mult;	1475	mult1 = mult;
1468		1476
1469	/* very low freqs : noise */	1477	/* very low freqs : noise */
@@ -1565,29 +1573,23 @@ static int wma_decode_block(WMADecodeContext *s)
1565	}	1573	}
1566	else	1574	else
1567	{	1575	{
		1576	/Noise coding not used, simply convert from exp to fixed representation/
1568		1577
1569	/* XXX: optimize more */	1578
		1579	fixed32 mult3 = (fixed32)(fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch])));
		1580	mult3 = fixmul32(mult3, mdct_norm);
1570		1581
1571	n = nb_coefs[ch];	1582	n = nb_coefs[ch];
1572		1583
		1584	/* XXX: optimize more, unrolling this loop in asm might be a good idea */
		1585
1573	for(i = 0;i < n; ++i)	1586	for(i = 0;i < n; ++i)
1574	{	1587	{
1575	/*	1588	atemp = (coefs1[i] * mult3)>>1;
1576	* Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could	1589	*coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
1577	* overflow here as well, so switch to 17.15 now. As a bonus, this saves us a shift later on.	1590	}
1578	*/
1579
1580
1581	atemp = (fixed32)(coefs1[i]*mult>>17);
1582	//this "works" in the sense that the mdcts converge
1583	//atemp= ftofix32(coefs1[i] * fixtof64(exponents[i]) * fixtof64(mult>>16));
1584
1585	*coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
1586
1587	}
1588	n = s->block_len - s->coefs_end[bsize];	1591	n = s->block_len - s->coefs_end[bsize];
1589	for(i = 0;i < n; ++i)	1592	memset(coefs, 0, n*sizeof(fixed32));
1590	*coefs++ = 0;
1591	}	1593	}
1592	}	1594	}
1593	}	1595	}