Move MDCT reconstruction window code over to new trig function. Improves accuracy significantly and slightly reduces code size. Codec SNR now appears to be limited by truncation to 16 bit. Comparison to MS decoder gives > 91 dB of agreement, and a lower RMS error verses the source wav then MS. Additionally, move one commonly accessed table into IRAM.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13813 a1c6a512-1295-4272-9138-f99709370657
author: Michael Giacomelli <giac2000@hotmail.com> 2007-07-08 05:16:24 +0000
committer: Michael Giacomelli <giac2000@hotmail.com> 2007-07-08 05:16:24 +0000
commit: f5114daa730c05c6967bb4d908c934d0e3a8f994 (patch)
tree: 441a631837abe3a603f82298e77968a861587ddd /apps/codecs/libwma/wmadeci.c
parent: 68d70b35d8b8208bf4894825e65bdb8f110c9150 (diff)
download: rockbox-f5114daa730c05c6967bb4d908c934d0e3a8f994.tar.gz
rockbox-f5114daa730c05c6967bb4d908c934d0e3a8f994.zip
1 files changed, 10 insertions, 41 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 1857e6a70d..bb9b96abe0 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -114,7 +114,9 @@ fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];        //the
 fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
 FFTComplex *exparray[5];                                    //these are the fft lookup tables
 uint16_t *revarray[5];
 FFTComplex  exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32];    //folded these in!
 uint16_t revtab0[1024], revtab1[512], revtab2[256], revtab3[128], revtab4[64];
@@ -122,6 +124,7 @@ uint16_t *runtabarray[2], *levtabarray[2];
 uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336];                //these could be made smaller since only one can be 1336
+FFTComplex mdct_tmp[BLOCK_MAX_SIZE] IBSS_ATTR;                  /* temporary storage for imdct */
 //may also be too large by ~ 1KB each?
 static VLC_TYPE vlcbuf1[6144][2];
@@ -1080,6 +1083,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
    exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;
    revarray[0]=revtab0; revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;
+        s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
    for(i = 0; i < s->nb_block_sizes; ++i)
    {
        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
@@ -1108,12 +1112,13 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
        //fixed32 n2 = itofix32(n<<1);        //2x the window length
        //alpha = fixdiv32(M_PI_F, n2);        //PI / (2x Window length) == PI<<(s->frame_len_bits - i+1)
-        //printf("two values of alpha %16.10lf %16.10lf\n", fixtof64(alpha), fixtof64(M_PI_F>>(s->frame_len_bits - i+1)));
-        alpha = M_PI_F>>(s->frame_len_bits - i+1);
+        //alpha = M_PI_F>>(s->frame_len_bits - i+1);
+        alpha = (1<<15)>>(s->frame_len_bits - i+1);     /* this calculates 0.5/(2*n) */
        for(j=0;j<n;++j)
        {
            fixed32 j2 = itofix32(j) + 0x8000;
-            window[j] = fixsin32(fixmul32(j2,alpha));        //alpha between 0 and pi/2
+             window[j] = fsincos(fixmul32(j2,alpha)<<16, 0);        //alpha between 0 and pi/2
        }
        //printf("created window\n");
@@ -1192,43 +1197,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
    return 0;
 }
-#if 0
-/* interpolate values for a bigger or smaller block. The block must
-   have multiple sizes */
-static void interpolate_array(fixed32 *scale, int old_size, int new_size)
-{
-    int i, j, jincr, k;
-    fixed32 v;
-    if (new_size > old_size)
-    {
-        jincr = new_size / old_size;
-        j = new_size;
-        for(i = old_size - 1; i >=0; --i)
-        {
-            v = scale[i];
-            k = jincr;
-            do
-            {
-                scale[--j] = v;
-            }
-            while (--k);
-        }
-    }
-    else if (new_size < old_size)
-    {
-        j = 0;
-        jincr = old_size / new_size;
-        for(i = 0; i < new_size; ++i)
-        {
-            scale[i] = scale[j];
-            j += jincr;
-        }
-    }
-}
-#endif
 /* compute x^-0.25 with an exponent and mantissa table. We use linear
   interpolation to reduce the mantissa table size at a small speed
   expense (linear interpolation approximately doubles the number of
@@ -1958,9 +1927,9 @@ static int wma_decode_frame(WMADecodeContext *s, int16_t *samples)
 }
 int wma_decode_superframe(WMADecodeContext* s,
-                                 void *data,
+                                 void *data,    /*output*/
                                 int *data_size,
-                                 uint8_t *buf,
+                                 uint8_t *buf,  /*input*/
                                 int buf_size)
 {
    //WMADecodeContext *s = avctx->priv_data;
author	Michael Giacomelli <giac2000@hotmail.com>	2007-07-08 05:16:24 +0000
committer	Michael Giacomelli <giac2000@hotmail.com>	2007-07-08 05:16:24 +0000
commit	f5114daa730c05c6967bb4d908c934d0e3a8f994 (patch)
tree	441a631837abe3a603f82298e77968a861587ddd /apps/codecs/libwma/wmadeci.c
parent	68d70b35d8b8208bf4894825e65bdb8f110c9150 (diff)
download	rockbox-f5114daa730c05c6967bb4d908c934d0e3a8f994.tar.gz rockbox-f5114daa730c05c6967bb4d908c934d0e3a8f994.zip

diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 1857e6a70d..bb9b96abe0 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c
@@ -114,7 +114,9 @@ fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //the
114	fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];	114	fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
115		115
116	FFTComplex *exparray[5]; //these are the fft lookup tables	116	FFTComplex *exparray[5]; //these are the fft lookup tables
		117
117	uint16_t *revarray[5];	118	uint16_t *revarray[5];
		119
118	FFTComplex exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32]; //folded these in!	120	FFTComplex exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32]; //folded these in!
119	uint16_t revtab0[1024], revtab1[512], revtab2[256], revtab3[128], revtab4[64];	121	uint16_t revtab0[1024], revtab1[512], revtab2[256], revtab3[128], revtab4[64];
120		122
@@ -122,6 +124,7 @@ uint16_t runtabarray[2], levtabarray[2];
122		124
123	uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336	125	uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336
124		126
		127	FFTComplex mdct_tmp[BLOCK_MAX_SIZE] IBSS_ATTR; /* temporary storage for imdct */
125		128
126	//may also be too large by ~ 1KB each?	129	//may also be too large by ~ 1KB each?
127	static VLC_TYPE vlcbuf1[6144][2];	130	static VLC_TYPE vlcbuf1[6144][2];
@@ -1080,6 +1083,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
1080	exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;	1083	exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;
1081	revarray[0]=revtab0; revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;	1084	revarray[0]=revtab0; revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;
1082		1085
		1086	s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
1083	for(i = 0; i < s->nb_block_sizes; ++i)	1087	for(i = 0; i < s->nb_block_sizes; ++i)
1084	{	1088	{
1085	ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);	1089	ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
@@ -1108,12 +1112,13 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
1108		1112
1109	//fixed32 n2 = itofix32(n<<1); //2x the window length	1113	//fixed32 n2 = itofix32(n<<1); //2x the window length
1110	//alpha = fixdiv32(M_PI_F, n2); //PI / (2x Window length) == PI<<(s->frame_len_bits - i+1)	1114	//alpha = fixdiv32(M_PI_F, n2); //PI / (2x Window length) == PI<<(s->frame_len_bits - i+1)
1111	//printf("two values of alpha %16.10lf %16.10lf\n", fixtof64(alpha), fixtof64(M_PI_F>>(s->frame_len_bits - i+1)));	1115
1112	alpha = M_PI_F>>(s->frame_len_bits - i+1);	1116	//alpha = M_PI_F>>(s->frame_len_bits - i+1);
		1117	alpha = (1<<15)>>(s->frame_len_bits - i+1); /* this calculates 0.5/(2n) /
1113	for(j=0;j<n;++j)	1118	for(j=0;j<n;++j)
1114	{	1119	{
1115	fixed32 j2 = itofix32(j) + 0x8000;	1120	fixed32 j2 = itofix32(j) + 0x8000;
1116	window[j] = fixsin32(fixmul32(j2,alpha)); //alpha between 0 and pi/2	1121	window[j] = fsincos(fixmul32(j2,alpha)<<16, 0); //alpha between 0 and pi/2
1117		1122
1118	}	1123	}
1119	//printf("created window\n");	1124	//printf("created window\n");
@@ -1192,43 +1197,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
1192	return 0;	1197	return 0;
1193	}	1198	}
1194		1199
1195	#if 0
1196	/* interpolate values for a bigger or smaller block. The block must
1197	have multiple sizes */
1198	static void interpolate_array(fixed32 *scale, int old_size, int new_size)
1199	{
1200	int i, j, jincr, k;
1201	fixed32 v;
1202
1203		1200
1204
1205	if (new_size > old_size)
1206	{
1207	jincr = new_size / old_size;
1208	j = new_size;
1209	for(i = old_size - 1; i >=0; --i)
1210	{
1211	v = scale[i];
1212	k = jincr;
1213	do
1214	{
1215	scale[--j] = v;
1216	}
1217	while (--k);
1218	}
1219	}
1220	else if (new_size < old_size)
1221	{
1222	j = 0;
1223	jincr = old_size / new_size;
1224	for(i = 0; i < new_size; ++i)
1225	{
1226	scale[i] = scale[j];
1227	j += jincr;
1228	}
1229	}
1230	}
1231	#endif
1232	/* compute x^-0.25 with an exponent and mantissa table. We use linear	1201	/* compute x^-0.25 with an exponent and mantissa table. We use linear
1233	interpolation to reduce the mantissa table size at a small speed	1202	interpolation to reduce the mantissa table size at a small speed
1234	expense (linear interpolation approximately doubles the number of	1203	expense (linear interpolation approximately doubles the number of
@@ -1958,9 +1927,9 @@ static int wma_decode_frame(WMADecodeContext s, int16_t samples)
1958	}	1927	}
1959		1928
1960	int wma_decode_superframe(WMADecodeContext* s,	1929	int wma_decode_superframe(WMADecodeContext* s,
1961	void *data,	1930	void data, /output*/
1962	int *data_size,	1931	int *data_size,
1963	uint8_t *buf,	1932	uint8_t buf, /input*/
1964	int buf_size)	1933	int buf_size)
1965	{	1934	{
1966	//WMADecodeContext *s = avctx->priv_data;	1935	//WMADecodeContext *s = avctx->priv_data;