diff options
Diffstat (limited to 'apps/codecs/libwma')
-rw-r--r-- | apps/codecs/libwma/SOURCES | 2 | ||||
-rw-r--r-- | apps/codecs/libwma/fft.c | 152 | ||||
-rw-r--r-- | apps/codecs/libwma/fft.h | 22 | ||||
-rw-r--r-- | apps/codecs/libwma/mdct.c | 164 | ||||
-rw-r--r-- | apps/codecs/libwma/mdct.h | 23 | ||||
-rw-r--r-- | apps/codecs/libwma/wmadeci.c | 331 | ||||
-rw-r--r-- | apps/codecs/libwma/wmafixed.h | 66 |
7 files changed, 432 insertions, 328 deletions
diff --git a/apps/codecs/libwma/SOURCES b/apps/codecs/libwma/SOURCES index 967577d0db..b9d4cc1882 100644 --- a/apps/codecs/libwma/SOURCES +++ b/apps/codecs/libwma/SOURCES | |||
@@ -1,3 +1,5 @@ | |||
1 | wmadeci.c | 1 | wmadeci.c |
2 | wmafixed.c | 2 | wmafixed.c |
3 | bitstream.c | 3 | bitstream.c |
4 | fft.c | ||
5 | mdct.c | ||
diff --git a/apps/codecs/libwma/fft.c b/apps/codecs/libwma/fft.c new file mode 100644 index 0000000000..3def74d92c --- /dev/null +++ b/apps/codecs/libwma/fft.c | |||
@@ -0,0 +1,152 @@ | |||
1 | /* | ||
2 | * WMA compatible decoder | ||
3 | * Copyright (c) 2002 The FFmpeg Project. | ||
4 | * | ||
5 | * This library is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU Lesser General Public | ||
7 | * License as published by the Free Software Foundation; either | ||
8 | * version 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This library is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Lesser General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Lesser General Public | ||
16 | * License along with this library; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include "wmadec.h" | ||
21 | #include "wmafixed.h" | ||
22 | |||
23 | FFTComplex exptab0[512] IBSS_ATTR; | ||
24 | |||
25 | /* butter fly op */ | ||
26 | #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ | ||
27 | {\ | ||
28 | fixed32 ax, ay, bx, by;\ | ||
29 | bx=pre1;\ | ||
30 | by=pim1;\ | ||
31 | ax=qre1;\ | ||
32 | ay=qim1;\ | ||
33 | pre = (bx + ax);\ | ||
34 | pim = (by + ay);\ | ||
35 | qre = (bx - ax);\ | ||
36 | qim = (by - ay);\ | ||
37 | } | ||
38 | |||
39 | |||
40 | int fft_calc_unscaled(FFTContext *s, FFTComplex *z) | ||
41 | { | ||
42 | int ln = s->nbits; | ||
43 | int j, np, np2; | ||
44 | int nblocks, nloops; | ||
45 | register FFTComplex *p, *q; | ||
46 | // FFTComplex *exptab = s->exptab; | ||
47 | int l; | ||
48 | fixed32 tmp_re, tmp_im; | ||
49 | int tabshift = 10-ln; | ||
50 | |||
51 | np = 1 << ln; | ||
52 | |||
53 | |||
54 | /* pass 0 */ | ||
55 | |||
56 | p=&z[0]; | ||
57 | j=(np >> 1); | ||
58 | do | ||
59 | { | ||
60 | BF(p[0].re, p[0].im, p[1].re, p[1].im, | ||
61 | p[0].re, p[0].im, p[1].re, p[1].im); | ||
62 | p+=2; | ||
63 | } | ||
64 | while (--j != 0); | ||
65 | |||
66 | /* pass 1 */ | ||
67 | |||
68 | |||
69 | p=&z[0]; | ||
70 | j=np >> 2; | ||
71 | if (s->inverse) | ||
72 | { | ||
73 | do | ||
74 | { | ||
75 | BF(p[0].re, p[0].im, p[2].re, p[2].im, | ||
76 | p[0].re, p[0].im, p[2].re, p[2].im); | ||
77 | BF(p[1].re, p[1].im, p[3].re, p[3].im, | ||
78 | p[1].re, p[1].im, -p[3].im, p[3].re); | ||
79 | p+=4; | ||
80 | } | ||
81 | while (--j != 0); | ||
82 | } | ||
83 | else | ||
84 | { | ||
85 | do | ||
86 | { | ||
87 | BF(p[0].re, p[0].im, p[2].re, p[2].im, | ||
88 | p[0].re, p[0].im, p[2].re, p[2].im); | ||
89 | BF(p[1].re, p[1].im, p[3].re, p[3].im, | ||
90 | p[1].re, p[1].im, p[3].im, -p[3].re); | ||
91 | p+=4; | ||
92 | } | ||
93 | while (--j != 0); | ||
94 | } | ||
95 | /* pass 2 .. ln-1 */ | ||
96 | |||
97 | nblocks = np >> 3; | ||
98 | nloops = 1 << 2; | ||
99 | np2 = np >> 1; | ||
100 | do | ||
101 | { | ||
102 | p = z; | ||
103 | q = z + nloops; | ||
104 | for (j = 0; j < nblocks; ++j) | ||
105 | { | ||
106 | BF(p->re, p->im, q->re, q->im, | ||
107 | p->re, p->im, q->re, q->im); | ||
108 | |||
109 | p++; | ||
110 | q++; | ||
111 | for(l = nblocks; l < np2; l += nblocks) | ||
112 | { | ||
113 | CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im); | ||
114 | //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); | ||
115 | BF(p->re, p->im, q->re, q->im, | ||
116 | p->re, p->im, tmp_re, tmp_im); | ||
117 | p++; | ||
118 | q++; | ||
119 | } | ||
120 | |||
121 | p += nloops; | ||
122 | q += nloops; | ||
123 | } | ||
124 | nblocks = nblocks >> 1; | ||
125 | nloops = nloops << 1; | ||
126 | } | ||
127 | while (nblocks != 0); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | int fft_init_global() | ||
132 | { | ||
133 | int i, n; | ||
134 | fixed32 c1, s1, s2; | ||
135 | |||
136 | n=1<<10; | ||
137 | s2 = 1 ? 1 : -1; | ||
138 | for(i=0;i<(n/2);++i) | ||
139 | { | ||
140 | fixed32 ifix = itofix32(i); | ||
141 | fixed32 nfix = itofix32(n); | ||
142 | fixed32 res = fixdiv32(ifix,nfix); | ||
143 | |||
144 | s1 = fsincos(res<<16, &c1); | ||
145 | |||
146 | exptab0[i].re = c1; | ||
147 | exptab0[i].im = s1*s2; | ||
148 | } | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
diff --git a/apps/codecs/libwma/fft.h b/apps/codecs/libwma/fft.h new file mode 100644 index 0000000000..dd962cc171 --- /dev/null +++ b/apps/codecs/libwma/fft.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* | ||
2 | * WMA compatible decoder | ||
3 | * Copyright (c) 2002 The FFmpeg Project. | ||
4 | * | ||
5 | * This library is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU Lesser General Public | ||
7 | * License as published by the Free Software Foundation; either | ||
8 | * version 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This library is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Lesser General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Lesser General Public | ||
16 | * License along with this library; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | int fft_calc_unscaled(FFTContext *s, FFTComplex *z); | ||
21 | int fft_init_global(); | ||
22 | |||
diff --git a/apps/codecs/libwma/mdct.c b/apps/codecs/libwma/mdct.c new file mode 100644 index 0000000000..00a160ecef --- /dev/null +++ b/apps/codecs/libwma/mdct.c | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * WMA compatible decoder | ||
3 | * Copyright (c) 2002 The FFmpeg Project. | ||
4 | * | ||
5 | * This library is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU Lesser General Public | ||
7 | * License as published by the Free Software Foundation; either | ||
8 | * version 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This library is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Lesser General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Lesser General Public | ||
16 | * License along with this library; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <codecs/lib/codeclib.h> | ||
21 | #include "wmadec.h" | ||
22 | #include "wmafixed.h" | ||
23 | #include "fft.h" | ||
24 | |||
25 | fixed32 *tcosarray[5], *tsinarray[5]; | ||
26 | fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //these are the sin and cos rotations used by the MDCT | ||
27 | fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64]; | ||
28 | |||
29 | uint16_t revtab0[1024]; | ||
30 | |||
31 | /** | ||
32 | * init MDCT or IMDCT computation. | ||
33 | */ | ||
34 | int ff_mdct_init(MDCTContext *s, int nbits, int inverse) | ||
35 | { | ||
36 | int n, n4, i; | ||
37 | // fixed32 alpha; | ||
38 | |||
39 | |||
40 | memset(s, 0, sizeof(*s)); | ||
41 | n = 1 << nbits; //nbits ranges from 12 to 8 inclusive | ||
42 | s->nbits = nbits; | ||
43 | s->n = n; | ||
44 | n4 = n >> 2; | ||
45 | s->tcos = tcosarray[12-nbits]; | ||
46 | s->tsin = tsinarray[12-nbits]; | ||
47 | for(i=0;i<n4;i++) | ||
48 | { | ||
49 | //fixed32 pi2 = fixmul32(0x20000, M_PI_F); | ||
50 | fixed32 ip = itofix32(i) + 0x2000; | ||
51 | ip = ip >> nbits; | ||
52 | //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize | ||
53 | //alpha = fixmul32(TWO_M_PI_F, ip); | ||
54 | //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2 | ||
55 | //s->tsin[i] = -fixsin32(alpha); | ||
56 | |||
57 | s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); //I can't remember why this works, but it seems to agree for ~24 bits, maybe more! | ||
58 | s->tcos[i] *=-1; | ||
59 | } | ||
60 | (&s->fft)->nbits = nbits-2; | ||
61 | |||
62 | (&s->fft)->inverse = inverse; | ||
63 | |||
64 | return 0; | ||
65 | |||
66 | } | ||
67 | |||
68 | /** | ||
69 | * Compute inverse MDCT of size N = 2^nbits | ||
70 | * @param output N samples | ||
71 | * @param input N/2 samples | ||
72 | * @param tmp N/2 samples | ||
73 | */ | ||
74 | void ff_imdct_calc(MDCTContext *s, | ||
75 | fixed32 *output, | ||
76 | fixed32 *input) | ||
77 | { | ||
78 | int k, n8, n4, n2, n, j,scale; | ||
79 | const fixed32 *tcos = s->tcos; | ||
80 | const fixed32 *tsin = s->tsin; | ||
81 | const fixed32 *in1, *in2; | ||
82 | FFTComplex *z1 = (FFTComplex *)output; | ||
83 | FFTComplex *z2 = (FFTComplex *)input; | ||
84 | int revtabshift = 12 - s->nbits; | ||
85 | |||
86 | n = 1 << s->nbits; | ||
87 | |||
88 | n2 = n >> 1; | ||
89 | n4 = n >> 2; | ||
90 | n8 = n >> 3; | ||
91 | |||
92 | |||
93 | /* pre rotation */ | ||
94 | in1 = input; | ||
95 | in2 = input + n2 - 1; | ||
96 | |||
97 | for(k = 0; k < n4; k++) | ||
98 | { | ||
99 | j=revtab0[k<<revtabshift]; | ||
100 | CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]); | ||
101 | in1 += 2; | ||
102 | in2 -= 2; | ||
103 | } | ||
104 | |||
105 | scale = fft_calc_unscaled(&s->fft, z1); | ||
106 | |||
107 | /* post rotation + reordering */ | ||
108 | |||
109 | for(k = 0; k < n4; k++) | ||
110 | { | ||
111 | CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]); | ||
112 | } | ||
113 | |||
114 | for(k = 0; k < n8; k++) | ||
115 | { | ||
116 | fixed32 r1,r2,r3,r4,r1n,r2n,r3n; | ||
117 | |||
118 | r1 = z2[n8 + k].im; | ||
119 | r1n = r1 * -1; | ||
120 | r2 = z2[n8-1-k].re; | ||
121 | r2n = r2 * -1; | ||
122 | r3 = z2[k+n8].re; | ||
123 | r3n = r3 * -1; | ||
124 | r4 = z2[n8-k-1].im; | ||
125 | |||
126 | output[2*k] = r1n; | ||
127 | output[n2-1-2*k] = r1; | ||
128 | |||
129 | output[2*k+1] = r2; | ||
130 | output[n2-1-2*k-1] = r2n; | ||
131 | |||
132 | output[n2 + 2*k]= r3n; | ||
133 | output[n-1- 2*k]= r3n; | ||
134 | |||
135 | output[n2 + 2*k+1]= r4; | ||
136 | output[n-2 - 2 * k] = r4; | ||
137 | } | ||
138 | } | ||
139 | |||
140 | int mdct_init_global() | ||
141 | { | ||
142 | int i,j,m; | ||
143 | /* init MDCT */ | ||
144 | /*TODO: figure out how to fold this up into one array*/ | ||
145 | tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4; | ||
146 | tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4; | ||
147 | /* init the MDCT bit reverse table here rather then in fft_init */ | ||
148 | |||
149 | for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/ | ||
150 | { /*smaller sizes can reuse the largest*/ | ||
151 | m=0; | ||
152 | for(j=0;j<10;j++) | ||
153 | { | ||
154 | m |= ((i >> j) & 1) << (10-j-1); | ||
155 | } | ||
156 | |||
157 | revtab0[i]=m; | ||
158 | } | ||
159 | |||
160 | fft_init_global(); | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
diff --git a/apps/codecs/libwma/mdct.h b/apps/codecs/libwma/mdct.h new file mode 100644 index 0000000000..1c050204bd --- /dev/null +++ b/apps/codecs/libwma/mdct.h | |||
@@ -0,0 +1,23 @@ | |||
1 | /* | ||
2 | * WMA compatible decoder | ||
3 | * Copyright (c) 2002 The FFmpeg Project. | ||
4 | * | ||
5 | * This library is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU Lesser General Public | ||
7 | * License as published by the Free Software Foundation; either | ||
8 | * version 2 of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This library is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Lesser General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Lesser General Public | ||
16 | * License along with this library; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | int ff_mdct_init(MDCTContext *s, int nbits, int inverse); | ||
21 | void ff_imdct_calc(MDCTContext *s, fixed32 *output, fixed32 *input); | ||
22 | int mdct_init_global(); | ||
23 | |||
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 99a098ea1f..bc3c11f9f2 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "wmadec.h" | 28 | #include "wmadec.h" |
29 | #include "wmafixed.h" | 29 | #include "wmafixed.h" |
30 | #include "bitstream.h" | 30 | #include "bitstream.h" |
31 | #include "mdct.h" | ||
31 | 32 | ||
32 | 33 | ||
33 | #define VLCBITS 7 /*7 is the lowest without glitching*/ | 34 | #define VLCBITS 7 /*7 is the lowest without glitching*/ |
@@ -40,71 +41,6 @@ | |||
40 | #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) | 41 | #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) |
41 | 42 | ||
42 | 43 | ||
43 | #ifdef CPU_ARM | ||
44 | static inline | ||
45 | void CMUL(fixed32 *x, fixed32 *y, | ||
46 | fixed32 a, fixed32 b, | ||
47 | fixed32 t, fixed32 v) | ||
48 | { | ||
49 | /* This version loses one bit of precision. Could be solved at the cost | ||
50 | * of 2 extra cycles if it becomes an issue. */ | ||
51 | int x1, y1, l; | ||
52 | asm( | ||
53 | "smull %[l], %[y1], %[b], %[t] \n" | ||
54 | "smlal %[l], %[y1], %[a], %[v] \n" | ||
55 | "rsb %[b], %[b], #0 \n" | ||
56 | "smull %[l], %[x1], %[a], %[t] \n" | ||
57 | "smlal %[l], %[x1], %[b], %[v] \n" | ||
58 | : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b) | ||
59 | : [a] "r" (a), [t] "r" (t), [v] "r" (v) | ||
60 | : "cc" | ||
61 | ); | ||
62 | *x = x1 << 1; | ||
63 | *y = y1 << 1; | ||
64 | } | ||
65 | #elif defined CPU_COLDFIRE | ||
66 | static inline | ||
67 | void CMUL(fixed32 *x, fixed32 *y, | ||
68 | fixed32 a, fixed32 b, | ||
69 | fixed32 t, fixed32 v) | ||
70 | { | ||
71 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
72 | "msac.l %[b], %[v], %%acc0;" | ||
73 | "mac.l %[b], %[t], %%acc1;" | ||
74 | "mac.l %[a], %[v], %%acc1;" | ||
75 | "movclr.l %%acc0, %[a];" | ||
76 | "move.l %[a], (%[x]);" | ||
77 | "movclr.l %%acc1, %[a];" | ||
78 | "move.l %[a], (%[y]);" | ||
79 | : [a] "+&r" (a) | ||
80 | : [x] "a" (x), [y] "a" (y), | ||
81 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
82 | : "cc", "memory"); | ||
83 | } | ||
84 | #else | ||
85 | // PJJ : reinstate macro | ||
86 | void CMUL(fixed32 *pre, | ||
87 | fixed32 *pim, | ||
88 | fixed32 are, | ||
89 | fixed32 aim, | ||
90 | fixed32 bre, | ||
91 | fixed32 bim) | ||
92 | { | ||
93 | //int64_t x,y; | ||
94 | fixed32 _aref = are; | ||
95 | fixed32 _aimf = aim; | ||
96 | fixed32 _bref = bre; | ||
97 | fixed32 _bimf = bim; | ||
98 | fixed32 _r1 = fixmul32b(_bref, _aref); | ||
99 | fixed32 _r2 = fixmul32b(_bimf, _aimf); | ||
100 | fixed32 _r3 = fixmul32b(_bref, _aimf); | ||
101 | fixed32 _r4 = fixmul32b(_bimf, _aref); | ||
102 | *pre = _r1 - _r2; | ||
103 | *pim = _r3 + _r4; | ||
104 | |||
105 | } | ||
106 | #endif | ||
107 | |||
108 | typedef struct CoefVLCTable | 44 | typedef struct CoefVLCTable |
109 | { | 45 | { |
110 | int n; /* total number of codes */ | 46 | int n; /* total number of codes */ |
@@ -121,13 +57,6 @@ fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; | |||
121 | //static variables that replace malloced stuff | 57 | //static variables that replace malloced stuff |
122 | fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows | 58 | fixed32 stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128]; //these are the MDCT reconstruction windows |
123 | 59 | ||
124 | fixed32 *tcosarray[5], *tsinarray[5]; | ||
125 | fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64]; //these are the sin and cos rotations used by the MDCT | ||
126 | fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64]; | ||
127 | |||
128 | FFTComplex exptab0[512] IBSS_ATTR; | ||
129 | uint16_t revtab0[1024]; | ||
130 | |||
131 | uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables | 60 | uint16_t *runtabarray[2], *levtabarray[2]; //these are VLC lookup tables |
132 | 61 | ||
133 | uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 | 62 | uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336]; //these could be made smaller since only one can be 1336 |
@@ -146,225 +75,6 @@ VLC_TYPE vlcbuf4[540][2]; | |||
146 | #include "wmadata.h" // PJJ | 75 | #include "wmadata.h" // PJJ |
147 | 76 | ||
148 | 77 | ||
149 | /* butter fly op */ | ||
150 | #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ | ||
151 | {\ | ||
152 | fixed32 ax, ay, bx, by;\ | ||
153 | bx=pre1;\ | ||
154 | by=pim1;\ | ||
155 | ax=qre1;\ | ||
156 | ay=qim1;\ | ||
157 | pre = (bx + ax);\ | ||
158 | pim = (by + ay);\ | ||
159 | qre = (bx - ax);\ | ||
160 | qim = (by - ay);\ | ||
161 | } | ||
162 | |||
163 | |||
164 | int fft_calc_unscaled(FFTContext *s, FFTComplex *z) | ||
165 | { | ||
166 | int ln = s->nbits; | ||
167 | int j, np, np2; | ||
168 | int nblocks, nloops; | ||
169 | register FFTComplex *p, *q; | ||
170 | // FFTComplex *exptab = s->exptab; | ||
171 | int l; | ||
172 | fixed32 tmp_re, tmp_im; | ||
173 | int tabshift = 10-ln; | ||
174 | |||
175 | np = 1 << ln; | ||
176 | |||
177 | |||
178 | /* pass 0 */ | ||
179 | |||
180 | p=&z[0]; | ||
181 | j=(np >> 1); | ||
182 | do | ||
183 | { | ||
184 | BF(p[0].re, p[0].im, p[1].re, p[1].im, | ||
185 | p[0].re, p[0].im, p[1].re, p[1].im); | ||
186 | p+=2; | ||
187 | } | ||
188 | while (--j != 0); | ||
189 | |||
190 | /* pass 1 */ | ||
191 | |||
192 | |||
193 | p=&z[0]; | ||
194 | j=np >> 2; | ||
195 | if (s->inverse) | ||
196 | { | ||
197 | do | ||
198 | { | ||
199 | BF(p[0].re, p[0].im, p[2].re, p[2].im, | ||
200 | p[0].re, p[0].im, p[2].re, p[2].im); | ||
201 | BF(p[1].re, p[1].im, p[3].re, p[3].im, | ||
202 | p[1].re, p[1].im, -p[3].im, p[3].re); | ||
203 | p+=4; | ||
204 | } | ||
205 | while (--j != 0); | ||
206 | } | ||
207 | else | ||
208 | { | ||
209 | do | ||
210 | { | ||
211 | BF(p[0].re, p[0].im, p[2].re, p[2].im, | ||
212 | p[0].re, p[0].im, p[2].re, p[2].im); | ||
213 | BF(p[1].re, p[1].im, p[3].re, p[3].im, | ||
214 | p[1].re, p[1].im, p[3].im, -p[3].re); | ||
215 | p+=4; | ||
216 | } | ||
217 | while (--j != 0); | ||
218 | } | ||
219 | /* pass 2 .. ln-1 */ | ||
220 | |||
221 | nblocks = np >> 3; | ||
222 | nloops = 1 << 2; | ||
223 | np2 = np >> 1; | ||
224 | do | ||
225 | { | ||
226 | p = z; | ||
227 | q = z + nloops; | ||
228 | for (j = 0; j < nblocks; ++j) | ||
229 | { | ||
230 | BF(p->re, p->im, q->re, q->im, | ||
231 | p->re, p->im, q->re, q->im); | ||
232 | |||
233 | p++; | ||
234 | q++; | ||
235 | for(l = nblocks; l < np2; l += nblocks) | ||
236 | { | ||
237 | CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im); | ||
238 | //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); | ||
239 | BF(p->re, p->im, q->re, q->im, | ||
240 | p->re, p->im, tmp_re, tmp_im); | ||
241 | p++; | ||
242 | q++; | ||
243 | } | ||
244 | |||
245 | p += nloops; | ||
246 | q += nloops; | ||
247 | } | ||
248 | nblocks = nblocks >> 1; | ||
249 | nloops = nloops << 1; | ||
250 | } | ||
251 | while (nblocks != 0); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | /** | ||
256 | * init MDCT or IMDCT computation. | ||
257 | */ | ||
258 | int ff_mdct_init(MDCTContext *s, int nbits, int inverse) | ||
259 | { | ||
260 | int n, n4, i; | ||
261 | // fixed32 alpha; | ||
262 | |||
263 | |||
264 | memset(s, 0, sizeof(*s)); | ||
265 | n = 1 << nbits; //nbits ranges from 12 to 8 inclusive | ||
266 | s->nbits = nbits; | ||
267 | s->n = n; | ||
268 | n4 = n >> 2; | ||
269 | s->tcos = tcosarray[12-nbits]; | ||
270 | s->tsin = tsinarray[12-nbits]; | ||
271 | for(i=0;i<n4;i++) | ||
272 | { | ||
273 | //fixed32 pi2 = fixmul32(0x20000, M_PI_F); | ||
274 | fixed32 ip = itofix32(i) + 0x2000; | ||
275 | ip = ip >> nbits; | ||
276 | //ip = fixdiv32(ip,itofix32(n)); // PJJ optimize | ||
277 | //alpha = fixmul32(TWO_M_PI_F, ip); | ||
278 | //s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2 | ||
279 | //s->tsin[i] = -fixsin32(alpha); | ||
280 | |||
281 | s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i])); //I can't remember why this works, but it seems to agree for ~24 bits, maybe more! | ||
282 | s->tcos[i] *=-1; | ||
283 | } | ||
284 | (&s->fft)->nbits = nbits-2; | ||
285 | |||
286 | (&s->fft)->inverse = inverse; | ||
287 | |||
288 | return 0; | ||
289 | |||
290 | } | ||
291 | |||
292 | /** | ||
293 | * Compute inverse MDCT of size N = 2^nbits | ||
294 | * @param output N samples | ||
295 | * @param input N/2 samples | ||
296 | * @param tmp N/2 samples | ||
297 | */ | ||
298 | void ff_imdct_calc(MDCTContext *s, | ||
299 | fixed32 *output, | ||
300 | fixed32 *input) | ||
301 | { | ||
302 | int k, n8, n4, n2, n, j,scale; | ||
303 | const fixed32 *tcos = s->tcos; | ||
304 | const fixed32 *tsin = s->tsin; | ||
305 | const fixed32 *in1, *in2; | ||
306 | FFTComplex *z1 = (FFTComplex *)output; | ||
307 | FFTComplex *z2 = (FFTComplex *)input; | ||
308 | int revtabshift = 12 - s->nbits; | ||
309 | |||
310 | n = 1 << s->nbits; | ||
311 | |||
312 | n2 = n >> 1; | ||
313 | n4 = n >> 2; | ||
314 | n8 = n >> 3; | ||
315 | |||
316 | |||
317 | /* pre rotation */ | ||
318 | in1 = input; | ||
319 | in2 = input + n2 - 1; | ||
320 | |||
321 | for(k = 0; k < n4; k++) | ||
322 | { | ||
323 | j=revtab0[k<<revtabshift]; | ||
324 | CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]); | ||
325 | in1 += 2; | ||
326 | in2 -= 2; | ||
327 | } | ||
328 | |||
329 | scale = fft_calc_unscaled(&s->fft, z1); | ||
330 | |||
331 | /* post rotation + reordering */ | ||
332 | |||
333 | for(k = 0; k < n4; k++) | ||
334 | { | ||
335 | CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]); | ||
336 | } | ||
337 | |||
338 | for(k = 0; k < n8; k++) | ||
339 | { | ||
340 | fixed32 r1,r2,r3,r4,r1n,r2n,r3n; | ||
341 | |||
342 | r1 = z2[n8 + k].im; | ||
343 | r1n = r1 * -1; | ||
344 | r2 = z2[n8-1-k].re; | ||
345 | r2n = r2 * -1; | ||
346 | r3 = z2[k+n8].re; | ||
347 | r3n = r3 * -1; | ||
348 | r4 = z2[n8-k-1].im; | ||
349 | |||
350 | output[2*k] = r1n; | ||
351 | output[n2-1-2*k] = r1; | ||
352 | |||
353 | output[2*k+1] = r2; | ||
354 | output[n2-1-2*k-1] = r2n; | ||
355 | |||
356 | output[n2 + 2*k]= r3n; | ||
357 | output[n-1- 2*k]= r3n; | ||
358 | |||
359 | output[n2 + 2*k+1]= r4; | ||
360 | output[n-2 - 2 * k] = r4; | ||
361 | } | ||
362 | |||
363 | |||
364 | |||
365 | |||
366 | } | ||
367 | |||
368 | 78 | ||
369 | /* | 79 | /* |
370 | * Helper functions for wma_window. | 80 | * Helper functions for wma_window. |
@@ -524,7 +234,7 @@ static void init_coef_vlc(VLC *vlc, | |||
524 | int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) | 234 | int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) |
525 | { | 235 | { |
526 | //WMADecodeContext *s = avctx->priv_data; | 236 | //WMADecodeContext *s = avctx->priv_data; |
527 | int i, m, j, flags1, flags2; | 237 | int i, flags1, flags2; |
528 | fixed32 *window; | 238 | fixed32 *window; |
529 | uint8_t *extradata; | 239 | uint8_t *extradata; |
530 | fixed64 bps1; | 240 | fixed64 bps1; |
@@ -800,10 +510,7 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) | |||
800 | } | 510 | } |
801 | } | 511 | } |
802 | 512 | ||
803 | /* init MDCT */ | 513 | mdct_init_global(); |
804 | /*TODO: figure out how to fold this up into one array*/ | ||
805 | tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4; | ||
806 | tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4; | ||
807 | 514 | ||
808 | s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */ | 515 | s->mdct_tmp = mdct_tmp; /* temporary storage for imdct */ |
809 | for(i = 0; i < s->nb_block_sizes; ++i) | 516 | for(i = 0; i < s->nb_block_sizes; ++i) |
@@ -811,38 +518,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) | |||
811 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); | 518 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); |
812 | } | 519 | } |
813 | 520 | ||
814 | { | ||
815 | int i, n; | ||
816 | fixed32 c1, s1, s2; | ||
817 | |||
818 | n=1<<10; | ||
819 | s2 = 1 ? 1 : -1; | ||
820 | for(i=0;i<(n/2);++i) | ||
821 | { | ||
822 | fixed32 ifix = itofix32(i); | ||
823 | fixed32 nfix = itofix32(n); | ||
824 | fixed32 res = fixdiv32(ifix,nfix); | ||
825 | |||
826 | s1 = fsincos(res<<16, &c1); | ||
827 | |||
828 | exptab0[i].re = c1; | ||
829 | exptab0[i].im = s1*s2; | ||
830 | } | ||
831 | } | ||
832 | |||
833 | /* init the MDCT bit reverse table here rather then in fft_init */ | ||
834 | |||
835 | for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/ | ||
836 | { /*smaller sizes can reuse the largest*/ | ||
837 | m=0; | ||
838 | for(j=0;j<10;j++) | ||
839 | { | ||
840 | m |= ((i >> j) & 1) << (10-j-1); | ||
841 | } | ||
842 | |||
843 | revtab0[i]=m; | ||
844 | } | ||
845 | |||
846 | /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. | 521 | /*ffmpeg uses malloc to only allocate as many window sizes as needed. However, we're really only interested in the worst case memory usage. |
847 | * In the worst case you can have 5 window sizes, 128 doubling up 2048 | 522 | * In the worst case you can have 5 window sizes, 128 doubling up 2048 |
848 | * Smaller windows are handled differently. | 523 | * Smaller windows are handled differently. |
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index db7529f681..0a6e8f61e0 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h | |||
@@ -102,3 +102,69 @@ fixed32 fixmul32(fixed32 x, fixed32 y); | |||
102 | fixed32 fixmul32b(fixed32 x, fixed32 y); | 102 | fixed32 fixmul32b(fixed32 x, fixed32 y); |
103 | #endif | 103 | #endif |
104 | 104 | ||
105 | |||
106 | #ifdef CPU_ARM | ||
107 | static inline | ||
108 | void CMUL(fixed32 *x, fixed32 *y, | ||
109 | fixed32 a, fixed32 b, | ||
110 | fixed32 t, fixed32 v) | ||
111 | { | ||
112 | /* This version loses one bit of precision. Could be solved at the cost | ||
113 | * of 2 extra cycles if it becomes an issue. */ | ||
114 | int x1, y1, l; | ||
115 | asm( | ||
116 | "smull %[l], %[y1], %[b], %[t] \n" | ||
117 | "smlal %[l], %[y1], %[a], %[v] \n" | ||
118 | "rsb %[b], %[b], #0 \n" | ||
119 | "smull %[l], %[x1], %[a], %[t] \n" | ||
120 | "smlal %[l], %[x1], %[b], %[v] \n" | ||
121 | : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b) | ||
122 | : [a] "r" (a), [t] "r" (t), [v] "r" (v) | ||
123 | : "cc" | ||
124 | ); | ||
125 | *x = x1 << 1; | ||
126 | *y = y1 << 1; | ||
127 | } | ||
128 | #elif defined CPU_COLDFIRE | ||
129 | static inline | ||
130 | void CMUL(fixed32 *x, fixed32 *y, | ||
131 | fixed32 a, fixed32 b, | ||
132 | fixed32 t, fixed32 v) | ||
133 | { | ||
134 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
135 | "msac.l %[b], %[v], %%acc0;" | ||
136 | "mac.l %[b], %[t], %%acc1;" | ||
137 | "mac.l %[a], %[v], %%acc1;" | ||
138 | "movclr.l %%acc0, %[a];" | ||
139 | "move.l %[a], (%[x]);" | ||
140 | "movclr.l %%acc1, %[a];" | ||
141 | "move.l %[a], (%[y]);" | ||
142 | : [a] "+&r" (a) | ||
143 | : [x] "a" (x), [y] "a" (y), | ||
144 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
145 | : "cc", "memory"); | ||
146 | } | ||
147 | #else | ||
148 | // PJJ : reinstate macro | ||
149 | static inline | ||
150 | void CMUL(fixed32 *pre, | ||
151 | fixed32 *pim, | ||
152 | fixed32 are, | ||
153 | fixed32 aim, | ||
154 | fixed32 bre, | ||
155 | fixed32 bim) | ||
156 | { | ||
157 | //int64_t x,y; | ||
158 | fixed32 _aref = are; | ||
159 | fixed32 _aimf = aim; | ||
160 | fixed32 _bref = bre; | ||
161 | fixed32 _bimf = bim; | ||
162 | fixed32 _r1 = fixmul32b(_bref, _aref); | ||
163 | fixed32 _r2 = fixmul32b(_bimf, _aimf); | ||
164 | fixed32 _r3 = fixmul32b(_bref, _aimf); | ||
165 | fixed32 _r4 = fixmul32b(_bimf, _aref); | ||
166 | *pre = _r1 - _r2; | ||
167 | *pim = _r3 + _r4; | ||
168 | |||
169 | } | ||
170 | #endif | ||