1 files changed, 33 insertions, 17 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index f018cfbc53..f296a516f8 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -118,7 +118,7 @@ FFTComplex *exparray[5];                                    //these are the fft
 uint16_t *revarray[5];
 FFTComplex  exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32];    //folded these in!
-uint16_t revtab0[1024], revtab1[512], revtab2[256], revtab3[128], revtab4[64];
+uint16_t revtab0[1024];//, revtab1[512], revtab2[256], revtab3[128], revtab4[64];
 uint16_t *runtabarray[2], *levtabarray[2];                                        //these are VLC lookup tables
@@ -142,7 +142,7 @@ static VLC_TYPE vlcbuf3[1536][2] IBSS_ATTR;    //small so lets try iram
 */
 int fft_inits(FFTContext *s, int nbits, int inverse)
 {
-    int i, j, m, n;
+    int i, n;
    fixed32 c1, s1;
    int s2;
@@ -153,7 +153,9 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
    //s->exptab = av_malloc((n >> 1) * sizeof(FFTComplex));
    //if (!s->exptab)
    //    goto fail;
-    s->revtab = revarray[10-nbits];
+    //s->revtab = revarray[10-nbits];
    //s->revtab = av_malloc(n * sizeof(uint16_t));
    //if (!s->revtab)
    //    goto fail;
@@ -196,7 +198,7 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
    /* compute bit reverse table */
+/*
    for(i=0;i<n;i++)
    {
        m=0;
@@ -207,7 +209,7 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
        }
        s->revtab[i]=m;
-    }
+    } */
    return 0;
 //fail:
 //   av_freep(&s->revtab);
@@ -586,6 +588,7 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
  }
    if (fft_inits(&s->fft, s->nbits - 2, inverse) < 0)
        goto fail;
    return 0;
 fail:
 //    av_freep(&s->tcos);
@@ -605,11 +608,11 @@ void ff_imdct_calc(MDCTContext *s,
                   FFTComplex *tmp)
 {
    int k, n8, n4, n2, n, j,scale;
-    const uint16_t *revtab = s->fft.revtab;
    const fixed32 *tcos = s->tcos;
    const fixed32 *tsin = s->tsin;
    const fixed32 *in1, *in2;
    FFTComplex *z = (FFTComplex *)tmp;
+    int revtabshift = 12 - s->nbits;
    n = 1 << s->nbits;
@@ -624,7 +627,7 @@ void ff_imdct_calc(MDCTContext *s,
    for(k = 0; k < n4; k++)
    {
-        j=revtab[k];
+        j=revtab0[k<<revtabshift];
        CMUL(&z[j].re, &z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
@@ -680,14 +683,14 @@ void ff_mdct_end(MDCTContext *s)
 /*
 * Helper functions for wma_window.
- * TODO:  Optimize these to work with 1.31 format trig functions
+ *
- *        as was done for the MDCT rotation code
+ *
 */
-static void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, const fixed32 *src2, int src3, int len, int step){
+static void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
    int i;
    for(i=0; i<len; i++)
-        dst[i*step] = fixmul32b(src0[i], src1[i]) + src2[i] + src3;
+        dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
 }
 static void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
@@ -713,16 +716,14 @@ static void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32
         block_len = s->block_len;
         bsize = s->frame_len_bits - s->block_len_bits;
-         vector_fmul_add_add(out, in, s->windows[bsize],
+         vector_fmul_add_add(out, in, s->windows[bsize], block_len);
-                                    out, 0, block_len, 1);
     } else {
         block_len = 1 << s->prev_block_len_bits;
         n = (s->block_len - block_len) / 2;
         bsize = s->frame_len_bits - s->prev_block_len_bits;
-         vector_fmul_add_add(out+n, in+n, s->windows[bsize],
+         vector_fmul_add_add(out+n, in+n, s->windows[bsize],  block_len);
-                                    out+n, 0, block_len, 1);
         memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));
     }
@@ -795,7 +796,7 @@ static void init_coef_vlc(VLC *vlc,
 int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
 {
    //WMADecodeContext *s = avctx->priv_data;
-    int i, flags1, flags2;
+    int i, m, j, flags1, flags2;
    fixed32 *window;
    uint8_t *extradata;
    fixed64 bps1;
@@ -1068,11 +1069,13 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
    }
    /* init MDCT */
+    /*TODO:  figure out how to fold this up into one array*/
    tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
    tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
+        /*these are folded up now*/
    exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;
-    revarray[0]=revtab0; revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;
+    revarray[0]=revtab0; //revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;
        s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
    for(i = 0; i < s->nb_block_sizes; ++i)
@@ -1080,6 +1083,19 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
    }
+    /* init the MDCT bit reverse table here rather then in fft_init */
+        for(i=0;i<1024;i++)             /*hard coded to a 2048 bit rotation*/
+        {                                       /*smaller sizes can reuse the largest*/
+                 m=0;
+                 for(j=0;j<10;j++)
+                 {
+                     m |= ((i >> j) & 1) << (10-j-1);
+                 }
+         revtab0[i]=m;
+       }
    /*ffmpeg uses malloc to only allocate as many window sizes as needed.  However, we're really only interested in the worst case memory usage.
    * In the worst case you can have 5 window sizes, 128 doubling up 2048
    * Smaller windows are handled differently.

diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index f018cfbc53..f296a516f8 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c
@@ -118,7 +118,7 @@ FFTComplex *exparray[5]; //these are the fft
118	uint16_t *revarray[5];	118	uint16_t *revarray[5];
119		119
120	FFTComplex exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32]; //folded these in!	120	FFTComplex exptab0[512] IBSS_ATTR;//, exptab1[256], exptab2[128], exptab3[64], exptab4[32]; //folded these in!
121	uint16_t revtab0[1024], revtab1[512], revtab2[256], revtab3[128], revtab4[64];	121	uint16_t revtab0[1024];//, revtab1[512], revtab2[256], revtab3[128], revtab4[64];
122		122
123	uint16_t runtabarray[2], levtabarray[2]; //these are VLC lookup tables	123	uint16_t runtabarray[2], levtabarray[2]; //these are VLC lookup tables
124		124
@@ -142,7 +142,7 @@ static VLC_TYPE vlcbuf3[1536][2] IBSS_ATTR; //small so lets try iram
142	*/	142	*/
143	int fft_inits(FFTContext *s, int nbits, int inverse)	143	int fft_inits(FFTContext *s, int nbits, int inverse)
144	{	144	{
145	int i, j, m, n;	145	int i, n;
146	fixed32 c1, s1;	146	fixed32 c1, s1;
147	int s2;	147	int s2;
148		148
@@ -153,7 +153,9 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
153	//s->exptab = av_malloc((n >> 1) * sizeof(FFTComplex));	153	//s->exptab = av_malloc((n >> 1) * sizeof(FFTComplex));
154	//if (!s->exptab)	154	//if (!s->exptab)
155	// goto fail;	155	// goto fail;
156	s->revtab = revarray[10-nbits];	156
		157	//s->revtab = revarray[10-nbits];
		158
157	//s->revtab = av_malloc(n * sizeof(uint16_t));	159	//s->revtab = av_malloc(n * sizeof(uint16_t));
158	//if (!s->revtab)	160	//if (!s->revtab)
159	// goto fail;	161	// goto fail;
@@ -196,7 +198,7 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
196		198
197		199
198	/* compute bit reverse table */	200	/* compute bit reverse table */
199		201	/*
200	for(i=0;i<n;i++)	202	for(i=0;i<n;i++)
201	{	203	{
202	m=0;	204	m=0;
@@ -207,7 +209,7 @@ int fft_inits(FFTContext *s, int nbits, int inverse)
207	}	209	}
208		210
209	s->revtab[i]=m;	211	s->revtab[i]=m;
210	}	212	} */
211	return 0;	213	return 0;
212	//fail:	214	//fail:
213	// av_freep(&s->revtab);	215	// av_freep(&s->revtab);
@@ -586,6 +588,7 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
586	}	588	}
587	if (fft_inits(&s->fft, s->nbits - 2, inverse) < 0)	589	if (fft_inits(&s->fft, s->nbits - 2, inverse) < 0)
588	goto fail;	590	goto fail;
		591
589	return 0;	592	return 0;
590	fail:	593	fail:
591	// av_freep(&s->tcos);	594	// av_freep(&s->tcos);
@@ -605,11 +608,11 @@ void ff_imdct_calc(MDCTContext *s,
605	FFTComplex *tmp)	608	FFTComplex *tmp)
606	{	609	{
607	int k, n8, n4, n2, n, j,scale;	610	int k, n8, n4, n2, n, j,scale;
608	const uint16_t *revtab = s->fft.revtab;
609	const fixed32 *tcos = s->tcos;	611	const fixed32 *tcos = s->tcos;
610	const fixed32 *tsin = s->tsin;	612	const fixed32 *tsin = s->tsin;
611	const fixed32 in1, in2;	613	const fixed32 in1, in2;
612	FFTComplex z = (FFTComplex )tmp;	614	FFTComplex z = (FFTComplex )tmp;
		615	int revtabshift = 12 - s->nbits;
613		616
614	n = 1 << s->nbits;	617	n = 1 << s->nbits;
615		618
@@ -624,7 +627,7 @@ void ff_imdct_calc(MDCTContext *s,
624		627
625	for(k = 0; k < n4; k++)	628	for(k = 0; k < n4; k++)
626	{	629	{
627	j=revtab[k];	630	j=revtab0[k<<revtabshift];
628	CMUL(&z[j].re, &z[j].im, in2, in1, tcos[k], tsin[k]);	631	CMUL(&z[j].re, &z[j].im, in2, in1, tcos[k], tsin[k]);
629	in1 += 2;	632	in1 += 2;
630	in2 -= 2;	633	in2 -= 2;
@@ -680,14 +683,14 @@ void ff_mdct_end(MDCTContext *s)
680		683
681	/*	684	/*
682	* Helper functions for wma_window.	685	* Helper functions for wma_window.
683	* TODO: Optimize these to work with 1.31 format trig functions	686	*
684	* as was done for the MDCT rotation code	687	*
685	*/	688	*/
686		689
687	static void vector_fmul_add_add(fixed32 dst, const fixed32 src0, const fixed32 src1, const fixed32 src2, int src3, int len, int step){	690	static void vector_fmul_add_add(fixed32 dst, const fixed32 src0, const fixed32 *src1, int len){
688	int i;	691	int i;
689	for(i=0; i<len; i++)	692	for(i=0; i<len; i++)
690	dst[i*step] = fixmul32b(src0[i], src1[i]) + src2[i] + src3;	693	dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
691	}	694	}
692		695
693	static void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const fixed32 *src1, int len){	696	static void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const fixed32 *src1, int len){
@@ -713,16 +716,14 @@ static void vector_fmul_reverse(fixed32 dst, const fixed32 src0, const fixed32
713	block_len = s->block_len;	716	block_len = s->block_len;
714	bsize = s->frame_len_bits - s->block_len_bits;	717	bsize = s->frame_len_bits - s->block_len_bits;
715		718
716	vector_fmul_add_add(out, in, s->windows[bsize],	719	vector_fmul_add_add(out, in, s->windows[bsize], block_len);
717	out, 0, block_len, 1);
718		720
719	} else {	721	} else {
720	block_len = 1 << s->prev_block_len_bits;	722	block_len = 1 << s->prev_block_len_bits;
721	n = (s->block_len - block_len) / 2;	723	n = (s->block_len - block_len) / 2;
722	bsize = s->frame_len_bits - s->prev_block_len_bits;	724	bsize = s->frame_len_bits - s->prev_block_len_bits;
723		725
724	vector_fmul_add_add(out+n, in+n, s->windows[bsize],	726	vector_fmul_add_add(out+n, in+n, s->windows[bsize], block_len);
725	out+n, 0, block_len, 1);
726		727
727	memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));	728	memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));
728	}	729	}
@@ -795,7 +796,7 @@ static void init_coef_vlc(VLC *vlc,
795	int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)	796	int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
796	{	797	{
797	//WMADecodeContext *s = avctx->priv_data;	798	//WMADecodeContext *s = avctx->priv_data;
798	int i, flags1, flags2;	799	int i, m, j, flags1, flags2;
799	fixed32 *window;	800	fixed32 *window;
800	uint8_t *extradata;	801	uint8_t *extradata;
801	fixed64 bps1;	802	fixed64 bps1;
@@ -1068,11 +1069,13 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
1068	}	1069	}
1069		1070
1070	/* init MDCT */	1071	/* init MDCT */
		1072	/TODO: figure out how to fold this up into one array/
1071	tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;	1073	tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
1072	tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;	1074	tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
1073		1075
		1076	/these are folded up now/
1074	exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;	1077	exparray[0] = exptab0; //exparray[1] = exptab1; exparray[2] = exptab2; exparray[3] = exptab3; exparray[4] = exptab4;
1075	revarray[0]=revtab0; revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;	1078	revarray[0]=revtab0; //revarray[1]=revtab1; revarray[2]=revtab2; revarray[3]=revtab3; revarray[4]=revtab4;
1076		1079
1077	s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */	1080	s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
1078	for(i = 0; i < s->nb_block_sizes; ++i)	1081	for(i = 0; i < s->nb_block_sizes; ++i)
@@ -1080,6 +1083,19 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
1080	ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);	1083	ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
1081	}	1084	}
1082		1085
		1086	/* init the MDCT bit reverse table here rather then in fft_init */
		1087
		1088	for(i=0;i<1024;i++) /hard coded to a 2048 bit rotation/
		1089	{ /smaller sizes can reuse the largest/
		1090	m=0;
		1091	for(j=0;j<10;j++)
		1092	{
		1093	m \|= ((i >> j) & 1) << (10-j-1);
		1094	}
		1095
		1096	revtab0[i]=m;
		1097	}
		1098
1083	/*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage.	1099	/*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage.
1084	* In the worst case you can have 5 window sizes, 128 doubling up 2048	1100	* In the worst case you can have 5 window sizes, 128 doubling up 2048
1085	* Smaller windows are handled differently.	1101	* Smaller windows are handled differently.