summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/libwma/SOURCES2
-rw-r--r--apps/codecs/libwma/fft.c152
-rw-r--r--apps/codecs/libwma/fft.h22
-rw-r--r--apps/codecs/libwma/mdct.c164
-rw-r--r--apps/codecs/libwma/mdct.h23
-rw-r--r--apps/codecs/libwma/wmadeci.c331
-rw-r--r--apps/codecs/libwma/wmafixed.h66
7 files changed, 432 insertions, 328 deletions
diff --git a/apps/codecs/libwma/SOURCES b/apps/codecs/libwma/SOURCES
index 967577d0db..b9d4cc1882 100644
--- a/apps/codecs/libwma/SOURCES
+++ b/apps/codecs/libwma/SOURCES
@@ -1,3 +1,5 @@
1wmadeci.c 1wmadeci.c
2wmafixed.c 2wmafixed.c
3bitstream.c 3bitstream.c
4fft.c
5mdct.c
diff --git a/apps/codecs/libwma/fft.c b/apps/codecs/libwma/fft.c
new file mode 100644
index 0000000000..3def74d92c
--- /dev/null
+++ b/apps/codecs/libwma/fft.c
@@ -0,0 +1,152 @@
1/*
2 * WMA compatible decoder
3 * Copyright (c) 2002 The FFmpeg Project.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include "wmadec.h"
21#include "wmafixed.h"
22
23FFTComplex exptab0[512] IBSS_ATTR;
24
25/* butter fly op */
26#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
27{\
28 fixed32 ax, ay, bx, by;\
29 bx=pre1;\
30 by=pim1;\
31 ax=qre1;\
32 ay=qim1;\
33 pre = (bx + ax);\
34 pim = (by + ay);\
35 qre = (bx - ax);\
36 qim = (by - ay);\
37}
38
39
40int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
41{
42 int ln = s->nbits;
43 int j, np, np2;
44 int nblocks, nloops;
45 register FFTComplex *p, *q;
46 // FFTComplex *exptab = s->exptab;
47 int l;
48 fixed32 tmp_re, tmp_im;
49 int tabshift = 10-ln;
50
51 np = 1 << ln;
52
53
54 /* pass 0 */
55
56 p=&z[0];
57 j=(np >> 1);
58 do
59 {
60 BF(p[0].re, p[0].im, p[1].re, p[1].im,
61 p[0].re, p[0].im, p[1].re, p[1].im);
62 p+=2;
63 }
64 while (--j != 0);
65
66 /* pass 1 */
67
68
69 p=&z[0];
70 j=np >> 2;
71 if (s->inverse)
72 {
73 do
74 {
75 BF(p[0].re, p[0].im, p[2].re, p[2].im,
76 p[0].re, p[0].im, p[2].re, p[2].im);
77 BF(p[1].re, p[1].im, p[3].re, p[3].im,
78 p[1].re, p[1].im, -p[3].im, p[3].re);
79 p+=4;
80 }
81 while (--j != 0);
82 }
83 else
84 {
85 do
86 {
87 BF(p[0].re, p[0].im, p[2].re, p[2].im,
88 p[0].re, p[0].im, p[2].re, p[2].im);
89 BF(p[1].re, p[1].im, p[3].re, p[3].im,
90 p[1].re, p[1].im, p[3].im, -p[3].re);
91 p+=4;
92 }
93 while (--j != 0);
94 }
95 /* pass 2 .. ln-1 */
96
97 nblocks = np >> 3;
98 nloops = 1 << 2;
99 np2 = np >> 1;
100 do
101 {
102 p = z;
103 q = z + nloops;
104 for (j = 0; j < nblocks; ++j)
105 {
106 BF(p->re, p->im, q->re, q->im,
107 p->re, p->im, q->re, q->im);
108
109 p++;
110 q++;
111 for(l = nblocks; l < np2; l += nblocks)
112 {
113 CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
114 //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
115 BF(p->re, p->im, q->re, q->im,
116 p->re, p->im, tmp_re, tmp_im);
117 p++;
118 q++;
119 }
120
121 p += nloops;
122 q += nloops;
123 }
124 nblocks = nblocks >> 1;
125 nloops = nloops << 1;
126 }
127 while (nblocks != 0);
128 return 0;
129}
130
131int fft_init_global()
132{
133 int i, n;
134 fixed32 c1, s1, s2;
135
136 n=1<<10;
137 s2 = 1 ? 1 : -1;
138 for(i=0;i<(n/2);++i)
139 {
140 fixed32 ifix = itofix32(i);
141 fixed32 nfix = itofix32(n);
142 fixed32 res = fixdiv32(ifix,nfix);
143
144 s1 = fsincos(res<<16, &c1);
145
146 exptab0[i].re = c1;
147 exptab0[i].im = s1*s2;
148 }
149
150 return 0;
151}
152
diff --git a/apps/codecs/libwma/fft.h b/apps/codecs/libwma/fft.h
new file mode 100644
index 0000000000..dd962cc171
--- /dev/null
+++ b/apps/codecs/libwma/fft.h
@@ -0,0 +1,22 @@
1/*
2 * WMA compatible decoder
3 * Copyright (c) 2002 The FFmpeg Project.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
21int fft_init_global();
22
diff --git a/apps/codecs/libwma/mdct.c b/apps/codecs/libwma/mdct.c
new file mode 100644
index 0000000000..00a160ecef
--- /dev/null
+++ b/apps/codecs/libwma/mdct.c
@@ -0,0 +1,164 @@
1/*
2 * WMA compatible decoder
3 * Copyright (c) 2002 The FFmpeg Project.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <codecs/lib/codeclib.h>
21#include "wmadec.h"
22#include "wmafixed.h"
23#include "fft.h"
24
25fixed32 *tcosarray[5], *tsinarray[5];
26fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //these are the sin and cos rotations used by the MDCT
27fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
28
29uint16_t revtab0[1024];
30
31/**
32 * init MDCT or IMDCT computation.
33 */
34int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
35{
36 int n, n4, i;
37 // fixed32 alpha;
38
39
40 memset(s, 0, sizeof(*s));
41 n = 1 << nbits; //nbits ranges from 12 to 8 inclusive
42 s->nbits = nbits;
43 s->n = n;
44 n4 = n >> 2;
45 s->tcos = tcosarray[12-nbits];
46 s->tsin = tsinarray[12-nbits];
47 for(i=0;i<n4;i++)
48 {
49 //fixed32 pi2 = fixmul32(0x20000, M_PI_F);
50 fixed32 ip = itofix32(i) + 0x2000;
51 ip = ip >> nbits;
52 //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize
53 //alpha = fixmul32(TWO_M_PI_F, ip);
54 //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2
55 //s->tsin[i] = -fixsin32(alpha);
56
57 s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); //I can't remember why this works, but it seems to agree for ~24 bits, maybe more!
58 s->tcos[i] *=-1;
59 }
60 (&s->fft)->nbits = nbits-2;
61
62 (&s->fft)->inverse = inverse;
63
64 return 0;
65
66}
67
68/**
69 * Compute inverse MDCT of size N = 2^nbits
70 * @param output N samples
71 * @param input N/2 samples
72 * @param tmp N/2 samples
73 */
74void ff_imdct_calc(MDCTContext *s,
75 fixed32 *output,
76 fixed32 *input)
77{
78 int k, n8, n4, n2, n, j,scale;
79 const fixed32 *tcos = s->tcos;
80 const fixed32 *tsin = s->tsin;
81 const fixed32 *in1, *in2;
82 FFTComplex *z1 = (FFTComplex *)output;
83 FFTComplex *z2 = (FFTComplex *)input;
84 int revtabshift = 12 - s->nbits;
85
86 n = 1 << s->nbits;
87
88 n2 = n >> 1;
89 n4 = n >> 2;
90 n8 = n >> 3;
91
92
93 /* pre rotation */
94 in1 = input;
95 in2 = input + n2 - 1;
96
97 for(k = 0; k < n4; k++)
98 {
99 j=revtab0[k<<revtabshift];
100 CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
101 in1 += 2;
102 in2 -= 2;
103 }
104
105 scale = fft_calc_unscaled(&s->fft, z1);
106
107 /* post rotation + reordering */
108
109 for(k = 0; k < n4; k++)
110 {
111 CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
112 }
113
114 for(k = 0; k < n8; k++)
115 {
116 fixed32 r1,r2,r3,r4,r1n,r2n,r3n;
117
118 r1 = z2[n8 + k].im;
119 r1n = r1 * -1;
120 r2 = z2[n8-1-k].re;
121 r2n = r2 * -1;
122 r3 = z2[k+n8].re;
123 r3n = r3 * -1;
124 r4 = z2[n8-k-1].im;
125
126 output[2*k] = r1n;
127 output[n2-1-2*k] = r1;
128
129 output[2*k+1] = r2;
130 output[n2-1-2*k-1] = r2n;
131
132 output[n2 + 2*k]= r3n;
133 output[n-1- 2*k]= r3n;
134
135 output[n2 + 2*k+1]= r4;
136 output[n-2 - 2 * k] = r4;
137 }
138}
139
140int mdct_init_global()
141{
142 int i,j,m;
143 /* init MDCT */
144 /*TODO: figure out how to fold this up into one array*/
145 tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
146 tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
147 /* init the MDCT bit reverse table here rather then in fft_init */
148
149 for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
150 { /*smaller sizes can reuse the largest*/
151 m=0;
152 for(j=0;j<10;j++)
153 {
154 m |= ((i >> j) & 1) << (10-j-1);
155 }
156
157 revtab0[i]=m;
158 }
159
160 fft_init_global();
161
162 return 0;
163}
164
diff --git a/apps/codecs/libwma/mdct.h b/apps/codecs/libwma/mdct.h
new file mode 100644
index 0000000000..1c050204bd
--- /dev/null
+++ b/apps/codecs/libwma/mdct.h
@@ -0,0 +1,23 @@
1/*
2 * WMA compatible decoder
3 * Copyright (c) 2002 The FFmpeg Project.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
21void ff_imdct_calc(MDCTContext *s, fixed32 *output, fixed32 *input);
22int mdct_init_global();
23
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 99a098ea1f..bc3c11f9f2 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -28,6 +28,7 @@
28#include "wmadec.h" 28#include "wmadec.h"
29#include "wmafixed.h" 29#include "wmafixed.h"
30#include "bitstream.h" 30#include "bitstream.h"
31#include "mdct.h"
31 32
32 33
33#define VLCBITS 7 /*7 is the lowest without glitching*/ 34#define VLCBITS 7 /*7 is the lowest without glitching*/
@@ -40,71 +41,6 @@
40#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) 41#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
41 42
42 43
43#ifdef CPU_ARM
44static inline
45void CMUL(fixed32 *x, fixed32 *y,
46 fixed32 a, fixed32 b,
47 fixed32 t, fixed32 v)
48{
49 /* This version loses one bit of precision. Could be solved at the cost
50 * of 2 extra cycles if it becomes an issue. */
51 int x1, y1, l;
52 asm(
53 "smull %[l], %[y1], %[b], %[t] \n"
54 "smlal %[l], %[y1], %[a], %[v] \n"
55 "rsb %[b], %[b], #0 \n"
56 "smull %[l], %[x1], %[a], %[t] \n"
57 "smlal %[l], %[x1], %[b], %[v] \n"
58 : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
59 : [a] "r" (a), [t] "r" (t), [v] "r" (v)
60 : "cc"
61 );
62 *x = x1 << 1;
63 *y = y1 << 1;
64}
65#elif defined CPU_COLDFIRE
66static inline
67void CMUL(fixed32 *x, fixed32 *y,
68 fixed32 a, fixed32 b,
69 fixed32 t, fixed32 v)
70{
71 asm volatile ("mac.l %[a], %[t], %%acc0;"
72 "msac.l %[b], %[v], %%acc0;"
73 "mac.l %[b], %[t], %%acc1;"
74 "mac.l %[a], %[v], %%acc1;"
75 "movclr.l %%acc0, %[a];"
76 "move.l %[a], (%[x]);"
77 "movclr.l %%acc1, %[a];"
78 "move.l %[a], (%[y]);"
79 : [a] "+&r" (a)
80 : [x] "a" (x), [y] "a" (y),
81 [b] "r" (b), [t] "r" (t), [v] "r" (v)
82 : "cc", "memory");
83}
84#else
85// PJJ : reinstate macro
86void CMUL(fixed32 *pre,
87 fixed32 *pim,
88 fixed32 are,
89 fixed32 aim,
90 fixed32 bre,
91 fixed32 bim)
92{
93 //int64_t x,y;
94 fixed32 _aref = are;
95 fixed32 _aimf = aim;
96 fixed32 _bref = bre;
97 fixed32 _bimf = bim;
98 fixed32 _r1 = fixmul32b(_bref, _aref);
99 fixed32 _r2 = fixmul32b(_bimf, _aimf);
100 fixed32 _r3 = fixmul32b(_bref, _aimf);
101 fixed32 _r4 = fixmul32b(_bimf, _aref);
102 *pre = _r1 - _r2;
103 *pim = _r3 + _r4;
104
105}
106#endif
107
108typedef struct CoefVLCTable 44typedef struct CoefVLCTable
109{ 45{
110 int n; /* total number of codes */ 46 int n; /* total number of codes */
@@ -121,13 +57,6 @@ fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR;
121//static variables that replace malloced stuff 57//static variables that replace malloced stuff
122fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows 58fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows
123 59
124fixed32 *tcosarray[5], *tsinarray[5];
125fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //these are the sin and cos rotations used by the MDCT
126fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
127
128FFTComplex exptab0[512] IBSS_ATTR;
129uint16_t revtab0[1024];
130
131uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables 60uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables
132 61
133uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 62uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336
@@ -146,225 +75,6 @@ VLC_TYPE vlcbuf4[540][2];
146#include "wmadata.h" // PJJ 75#include "wmadata.h" // PJJ
147 76
148 77
149/* butter fly op */
150#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
151{\
152 fixed32 ax, ay, bx, by;\
153 bx=pre1;\
154 by=pim1;\
155 ax=qre1;\
156 ay=qim1;\
157 pre = (bx + ax);\
158 pim = (by + ay);\
159 qre = (bx - ax);\
160 qim = (by - ay);\
161}
162
163
164int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
165{
166 int ln = s->nbits;
167 int j, np, np2;
168 int nblocks, nloops;
169 register FFTComplex *p, *q;
170 // FFTComplex *exptab = s->exptab;
171 int l;
172 fixed32 tmp_re, tmp_im;
173 int tabshift = 10-ln;
174
175 np = 1 << ln;
176
177
178 /* pass 0 */
179
180 p=&z[0];
181 j=(np >> 1);
182 do
183 {
184 BF(p[0].re, p[0].im, p[1].re, p[1].im,
185 p[0].re, p[0].im, p[1].re, p[1].im);
186 p+=2;
187 }
188 while (--j != 0);
189
190 /* pass 1 */
191
192
193 p=&z[0];
194 j=np >> 2;
195 if (s->inverse)
196 {
197 do
198 {
199 BF(p[0].re, p[0].im, p[2].re, p[2].im,
200 p[0].re, p[0].im, p[2].re, p[2].im);
201 BF(p[1].re, p[1].im, p[3].re, p[3].im,
202 p[1].re, p[1].im, -p[3].im, p[3].re);
203 p+=4;
204 }
205 while (--j != 0);
206 }
207 else
208 {
209 do
210 {
211 BF(p[0].re, p[0].im, p[2].re, p[2].im,
212 p[0].re, p[0].im, p[2].re, p[2].im);
213 BF(p[1].re, p[1].im, p[3].re, p[3].im,
214 p[1].re, p[1].im, p[3].im, -p[3].re);
215 p+=4;
216 }
217 while (--j != 0);
218 }
219 /* pass 2 .. ln-1 */
220
221 nblocks = np >> 3;
222 nloops = 1 << 2;
223 np2 = np >> 1;
224 do
225 {
226 p = z;
227 q = z + nloops;
228 for (j = 0; j < nblocks; ++j)
229 {
230 BF(p->re, p->im, q->re, q->im,
231 p->re, p->im, q->re, q->im);
232
233 p++;
234 q++;
235 for(l = nblocks; l < np2; l += nblocks)
236 {
237 CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
238 //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
239 BF(p->re, p->im, q->re, q->im,
240 p->re, p->im, tmp_re, tmp_im);
241 p++;
242 q++;
243 }
244
245 p += nloops;
246 q += nloops;
247 }
248 nblocks = nblocks >> 1;
249 nloops = nloops << 1;
250 }
251 while (nblocks != 0);
252 return 0;
253}
254
255/**
256 * init MDCT or IMDCT computation.
257 */
258int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
259{
260 int n, n4, i;
261 // fixed32 alpha;
262
263
264 memset(s, 0, sizeof(*s));
265 n = 1 << nbits; //nbits ranges from 12 to 8 inclusive
266 s->nbits = nbits;
267 s->n = n;
268 n4 = n >> 2;
269 s->tcos = tcosarray[12-nbits];
270 s->tsin = tsinarray[12-nbits];
271 for(i=0;i<n4;i++)
272 {
273 //fixed32 pi2 = fixmul32(0x20000, M_PI_F);
274 fixed32 ip = itofix32(i) + 0x2000;
275 ip = ip >> nbits;
276 //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize
277 //alpha = fixmul32(TWO_M_PI_F, ip);
278 //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2
279 //s->tsin[i] = -fixsin32(alpha);
280
281 s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); //I can't remember why this works, but it seems to agree for ~24 bits, maybe more!
282 s->tcos[i] *=-1;
283 }
284 (&s->fft)->nbits = nbits-2;
285
286 (&s->fft)->inverse = inverse;
287
288 return 0;
289
290}
291
292/**
293 * Compute inverse MDCT of size N = 2^nbits
294 * @param output N samples
295 * @param input N/2 samples
296 * @param tmp N/2 samples
297 */
298void ff_imdct_calc(MDCTContext *s,
299 fixed32 *output,
300 fixed32 *input)
301{
302 int k, n8, n4, n2, n, j,scale;
303 const fixed32 *tcos = s->tcos;
304 const fixed32 *tsin = s->tsin;
305 const fixed32 *in1, *in2;
306 FFTComplex *z1 = (FFTComplex *)output;
307 FFTComplex *z2 = (FFTComplex *)input;
308 int revtabshift = 12 - s->nbits;
309
310 n = 1 << s->nbits;
311
312 n2 = n >> 1;
313 n4 = n >> 2;
314 n8 = n >> 3;
315
316
317 /* pre rotation */
318 in1 = input;
319 in2 = input + n2 - 1;
320
321 for(k = 0; k < n4; k++)
322 {
323 j=revtab0[k<<revtabshift];
324 CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
325 in1 += 2;
326 in2 -= 2;
327 }
328
329 scale = fft_calc_unscaled(&s->fft, z1);
330
331 /* post rotation + reordering */
332
333 for(k = 0; k < n4; k++)
334 {
335 CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
336 }
337
338 for(k = 0; k < n8; k++)
339 {
340 fixed32 r1,r2,r3,r4,r1n,r2n,r3n;
341
342 r1 = z2[n8 + k].im;
343 r1n = r1 * -1;
344 r2 = z2[n8-1-k].re;
345 r2n = r2 * -1;
346 r3 = z2[k+n8].re;
347 r3n = r3 * -1;
348 r4 = z2[n8-k-1].im;
349
350 output[2*k] = r1n;
351 output[n2-1-2*k] = r1;
352
353 output[2*k+1] = r2;
354 output[n2-1-2*k-1] = r2n;
355
356 output[n2 + 2*k]= r3n;
357 output[n-1- 2*k]= r3n;
358
359 output[n2 + 2*k+1]= r4;
360 output[n-2 - 2 * k] = r4;
361 }
362
363
364
365
366}
367
368 78
369/* 79/*
370 * Helper functions for wma_window. 80 * Helper functions for wma_window.
@@ -524,7 +234,7 @@ static void init_coef_vlc(VLC *vlc,
524int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) 234int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
525{ 235{
526 //WMADecodeContext *s = avctx->priv_data; 236 //WMADecodeContext *s = avctx->priv_data;
527 int i, m, j, flags1, flags2; 237 int i, flags1, flags2;
528 fixed32 *window; 238 fixed32 *window;
529 uint8_t *extradata; 239 uint8_t *extradata;
530 fixed64 bps1; 240 fixed64 bps1;
@@ -800,10 +510,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
800 } 510 }
801 } 511 }
802 512
803 /* init MDCT */ 513 mdct_init_global();
804 /*TODO: figure out how to fold this up into one array*/
805 tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
806 tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
807 514
808 s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */ 515 s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */
809 for(i = 0; i < s->nb_block_sizes; ++i) 516 for(i = 0; i < s->nb_block_sizes; ++i)
@@ -811,38 +518,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
811 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); 518 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
812 } 519 }
813 520
814 {
815 int i, n;
816 fixed32 c1, s1, s2;
817
818 n=1<<10;
819 s2 = 1 ? 1 : -1;
820 for(i=0;i<(n/2);++i)
821 {
822 fixed32 ifix = itofix32(i);
823 fixed32 nfix = itofix32(n);
824 fixed32 res = fixdiv32(ifix,nfix);
825
826 s1 = fsincos(res<<16, &c1);
827
828 exptab0[i].re = c1;
829 exptab0[i].im = s1*s2;
830 }
831 }
832
833 /* init the MDCT bit reverse table here rather then in fft_init */
834
835 for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
836 { /*smaller sizes can reuse the largest*/
837 m=0;
838 for(j=0;j<10;j++)
839 {
840 m |= ((i >> j) & 1) << (10-j-1);
841 }
842
843 revtab0[i]=m;
844 }
845
846 /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. 521 /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage.
847 * In the worst case you can have 5 window sizes, 128 doubling up 2048 522 * In the worst case you can have 5 window sizes, 128 doubling up 2048
848 * Smaller windows are handled differently. 523 * Smaller windows are handled differently.
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h
index db7529f681..0a6e8f61e0 100644
--- a/apps/codecs/libwma/wmafixed.h
+++ b/apps/codecs/libwma/wmafixed.h
@@ -102,3 +102,69 @@ fixed32 fixmul32(fixed32 x, fixed32 y);
102fixed32 fixmul32b(fixed32 x, fixed32 y); 102fixed32 fixmul32b(fixed32 x, fixed32 y);
103#endif 103#endif
104 104
105
106#ifdef CPU_ARM
107static inline
108void CMUL(fixed32 *x, fixed32 *y,
109 fixed32 a, fixed32 b,
110 fixed32 t, fixed32 v)
111{
112 /* This version loses one bit of precision. Could be solved at the cost
113 * of 2 extra cycles if it becomes an issue. */
114 int x1, y1, l;
115 asm(
116 "smull %[l], %[y1], %[b], %[t] \n"
117 "smlal %[l], %[y1], %[a], %[v] \n"
118 "rsb %[b], %[b], #0 \n"
119 "smull %[l], %[x1], %[a], %[t] \n"
120 "smlal %[l], %[x1], %[b], %[v] \n"
121 : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
122 : [a] "r" (a), [t] "r" (t), [v] "r" (v)
123 : "cc"
124 );
125 *x = x1 << 1;
126 *y = y1 << 1;
127}
128#elif defined CPU_COLDFIRE
129static inline
130void CMUL(fixed32 *x, fixed32 *y,
131 fixed32 a, fixed32 b,
132 fixed32 t, fixed32 v)
133{
134 asm volatile ("mac.l %[a], %[t], %%acc0;"
135 "msac.l %[b], %[v], %%acc0;"
136 "mac.l %[b], %[t], %%acc1;"
137 "mac.l %[a], %[v], %%acc1;"
138 "movclr.l %%acc0, %[a];"
139 "move.l %[a], (%[x]);"
140 "movclr.l %%acc1, %[a];"
141 "move.l %[a], (%[y]);"
142 : [a] "+&r" (a)
143 : [x] "a" (x), [y] "a" (y),
144 [b] "r" (b), [t] "r" (t), [v] "r" (v)
145 : "cc", "memory");
146}
147#else
148// PJJ : reinstate macro
149static inline
150void CMUL(fixed32 *pre,
151 fixed32 *pim,
152 fixed32 are,
153 fixed32 aim,
154 fixed32 bre,
155 fixed32 bim)
156{
157 //int64_t x,y;
158 fixed32 _aref = are;
159 fixed32 _aimf = aim;
160 fixed32 _bref = bre;
161 fixed32 _bimf = bim;
162 fixed32 _r1 = fixmul32b(_bref, _aref);
163 fixed32 _r2 = fixmul32b(_bimf, _aimf);
164 fixed32 _r3 = fixmul32b(_bref, _aimf);
165 fixed32 _r4 = fixmul32b(_bimf, _aref);
166 *pre = _r1 - _r2;
167 *pim = _r3 + _r4;
168
169}
170#endif