summaryrefslogtreecommitdiff
path: root/apps/codecs/libwmapro/fft.c
diff options
context:
space:
mode:
authorMohamed Tarek <mt@rockbox.org>2010-04-30 11:11:56 +0000
committerMohamed Tarek <mt@rockbox.org>2010-04-30 11:11:56 +0000
commitcf43e5083b9e0f87de262ea31fd8067225ebfcda (patch)
tree073e6f4cd9561564d85e410a35432e1f4ead5b11 /apps/codecs/libwmapro/fft.c
parentbc3c5c16571487bf71fed8c22b30ee40481e156e (diff)
downloadrockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.tar.gz
rockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.zip
Add libwmapro to apps/codecs. These files comprise a set of unmodified files needed from ffmpeg's libavcodec and libavutil to compile and use the wma pro decoder standalone. The files were taken from ffmpeg's svn r22886 dated 15 April 2010.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25763 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmapro/fft.c')
-rw-r--r--apps/codecs/libwmapro/fft.c368
1 files changed, 368 insertions, 0 deletions
diff --git a/apps/codecs/libwmapro/fft.c b/apps/codecs/libwmapro/fft.c
new file mode 100644
index 0000000000..7275d98e9f
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.c
@@ -0,0 +1,368 @@
1/*
2 * FFT/IFFT transforms
3 * Copyright (c) 2008 Loren Merritt
4 * Copyright (c) 2002 Fabrice Bellard
5 * Partly based on libdjbfft by D. J. Bernstein
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/**
25 * @file libavcodec/fft.c
26 * FFT/IFFT transforms.
27 */
28
29#include <stdlib.h>
30#include <string.h>
31#include "libavutil/mathematics.h"
32#include "fft.h"
33
34/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
35#if !CONFIG_HARDCODED_TABLES
36COSTABLE(16);
37COSTABLE(32);
38COSTABLE(64);
39COSTABLE(128);
40COSTABLE(256);
41COSTABLE(512);
42COSTABLE(1024);
43COSTABLE(2048);
44COSTABLE(4096);
45COSTABLE(8192);
46COSTABLE(16384);
47COSTABLE(32768);
48COSTABLE(65536);
49#endif
50COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
51 NULL, NULL, NULL, NULL,
52 ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
53 ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
54};
55
56static int split_radix_permutation(int i, int n, int inverse)
57{
58 int m;
59 if(n <= 2) return i&1;
60 m = n >> 1;
61 if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
62 m >>= 1;
63 if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
64 else return split_radix_permutation(i, m, inverse)*4 - 1;
65}
66
67av_cold void ff_init_ff_cos_tabs(int index)
68{
69#if !CONFIG_HARDCODED_TABLES
70 int i;
71 int m = 1<<index;
72 double freq = 2*M_PI/m;
73 FFTSample *tab = ff_cos_tabs[index];
74 for(i=0; i<=m/4; i++)
75 tab[i] = cos(i*freq);
76 for(i=1; i<m/4; i++)
77 tab[m/2-i] = tab[i];
78#endif
79}
80
81av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
82{
83 int i, j, m, n;
84 float alpha, c1, s1, s2;
85 int av_unused has_vectors;
86
87 if (nbits < 2 || nbits > 16)
88 goto fail;
89 s->nbits = nbits;
90 n = 1 << nbits;
91
92 s->tmp_buf = NULL;
93 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
94 if (!s->exptab)
95 goto fail;
96 s->revtab = av_malloc(n * sizeof(uint16_t));
97 if (!s->revtab)
98 goto fail;
99 s->inverse = inverse;
100
101 s2 = inverse ? 1.0 : -1.0;
102
103 s->fft_permute = ff_fft_permute_c;
104 s->fft_calc = ff_fft_calc_c;
105#if CONFIG_MDCT
106 s->imdct_calc = ff_imdct_calc_c;
107 s->imdct_half = ff_imdct_half_c;
108 s->mdct_calc = ff_mdct_calc_c;
109#endif
110 s->exptab1 = NULL;
111 s->split_radix = 1;
112
113 if (ARCH_ARM) ff_fft_init_arm(s);
114 if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
115 if (HAVE_MMX) ff_fft_init_mmx(s);
116
117 if (s->split_radix) {
118 for(j=4; j<=nbits; j++) {
119 ff_init_ff_cos_tabs(j);
120 }
121 for(i=0; i<n; i++)
122 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
123 s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
124 } else {
125 int np, nblocks, np2, l;
126 FFTComplex *q;
127
128 for(i=0; i<(n/2); i++) {
129 alpha = 2 * M_PI * (float)i / (float)n;
130 c1 = cos(alpha);
131 s1 = sin(alpha) * s2;
132 s->exptab[i].re = c1;
133 s->exptab[i].im = s1;
134 }
135
136 np = 1 << nbits;
137 nblocks = np >> 3;
138 np2 = np >> 1;
139 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
140 if (!s->exptab1)
141 goto fail;
142 q = s->exptab1;
143 do {
144 for(l = 0; l < np2; l += 2 * nblocks) {
145 *q++ = s->exptab[l];
146 *q++ = s->exptab[l + nblocks];
147
148 q->re = -s->exptab[l].im;
149 q->im = s->exptab[l].re;
150 q++;
151 q->re = -s->exptab[l + nblocks].im;
152 q->im = s->exptab[l + nblocks].re;
153 q++;
154 }
155 nblocks = nblocks >> 1;
156 } while (nblocks != 0);
157 av_freep(&s->exptab);
158
159 /* compute bit reverse table */
160 for(i=0;i<n;i++) {
161 m=0;
162 for(j=0;j<nbits;j++) {
163 m |= ((i >> j) & 1) << (nbits-j-1);
164 }
165 s->revtab[i]=m;
166 }
167 }
168
169 return 0;
170 fail:
171 av_freep(&s->revtab);
172 av_freep(&s->exptab);
173 av_freep(&s->exptab1);
174 av_freep(&s->tmp_buf);
175 return -1;
176}
177
178void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
179{
180 int j, k, np;
181 FFTComplex tmp;
182 const uint16_t *revtab = s->revtab;
183 np = 1 << s->nbits;
184
185 if (s->tmp_buf) {
186 /* TODO: handle split-radix permute in a more optimal way, probably in-place */
187 for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
188 memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
189 return;
190 }
191
192 /* reverse */
193 for(j=0;j<np;j++) {
194 k = revtab[j];
195 if (k < j) {
196 tmp = z[k];
197 z[k] = z[j];
198 z[j] = tmp;
199 }
200 }
201}
202
203av_cold void ff_fft_end(FFTContext *s)
204{
205 av_freep(&s->revtab);
206 av_freep(&s->exptab);
207 av_freep(&s->exptab1);
208 av_freep(&s->tmp_buf);
209}
210
211#define sqrthalf (float)M_SQRT1_2
212
213#define BF(x,y,a,b) {\
214 x = a - b;\
215 y = a + b;\
216}
217
218#define BUTTERFLIES(a0,a1,a2,a3) {\
219 BF(t3, t5, t5, t1);\
220 BF(a2.re, a0.re, a0.re, t5);\
221 BF(a3.im, a1.im, a1.im, t3);\
222 BF(t4, t6, t2, t6);\
223 BF(a3.re, a1.re, a1.re, t4);\
224 BF(a2.im, a0.im, a0.im, t6);\
225}
226
227// force loading all the inputs before storing any.
228// this is slightly slower for small data, but avoids store->load aliasing
229// for addresses separated by large powers of 2.
230#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
231 FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
232 BF(t3, t5, t5, t1);\
233 BF(a2.re, a0.re, r0, t5);\
234 BF(a3.im, a1.im, i1, t3);\
235 BF(t4, t6, t2, t6);\
236 BF(a3.re, a1.re, r1, t4);\
237 BF(a2.im, a0.im, i0, t6);\
238}
239
240#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
241 t1 = a2.re * wre + a2.im * wim;\
242 t2 = a2.im * wre - a2.re * wim;\
243 t5 = a3.re * wre - a3.im * wim;\
244 t6 = a3.im * wre + a3.re * wim;\
245 BUTTERFLIES(a0,a1,a2,a3)\
246}
247
248#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
249 t1 = a2.re;\
250 t2 = a2.im;\
251 t5 = a3.re;\
252 t6 = a3.im;\
253 BUTTERFLIES(a0,a1,a2,a3)\
254}
255
256/* z[0...8n-1], w[1...2n-1] */
257#define PASS(name)\
258static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
259{\
260 FFTSample t1, t2, t3, t4, t5, t6;\
261 int o1 = 2*n;\
262 int o2 = 4*n;\
263 int o3 = 6*n;\
264 const FFTSample *wim = wre+o1;\
265 n--;\
266\
267 TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
268 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
269 do {\
270 z += 2;\
271 wre += 2;\
272 wim -= 2;\
273 TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
274 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
275 } while(--n);\
276}
277
278PASS(pass)
279#undef BUTTERFLIES
280#define BUTTERFLIES BUTTERFLIES_BIG
281PASS(pass_big)
282
283#define DECL_FFT(n,n2,n4)\
284static void fft##n(FFTComplex *z)\
285{\
286 fft##n2(z);\
287 fft##n4(z+n4*2);\
288 fft##n4(z+n4*3);\
289 pass(z,ff_cos_##n,n4/2);\
290}
291
292static void fft4(FFTComplex *z)
293{
294 FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
295
296 BF(t3, t1, z[0].re, z[1].re);
297 BF(t8, t6, z[3].re, z[2].re);
298 BF(z[2].re, z[0].re, t1, t6);
299 BF(t4, t2, z[0].im, z[1].im);
300 BF(t7, t5, z[2].im, z[3].im);
301 BF(z[3].im, z[1].im, t4, t8);
302 BF(z[3].re, z[1].re, t3, t7);
303 BF(z[2].im, z[0].im, t2, t5);
304}
305
306static void fft8(FFTComplex *z)
307{
308 FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
309
310 fft4(z);
311
312 BF(t1, z[5].re, z[4].re, -z[5].re);
313 BF(t2, z[5].im, z[4].im, -z[5].im);
314 BF(t3, z[7].re, z[6].re, -z[7].re);
315 BF(t4, z[7].im, z[6].im, -z[7].im);
316 BF(t8, t1, t3, t1);
317 BF(t7, t2, t2, t4);
318 BF(z[4].re, z[0].re, z[0].re, t1);
319 BF(z[4].im, z[0].im, z[0].im, t2);
320 BF(z[6].re, z[2].re, z[2].re, t7);
321 BF(z[6].im, z[2].im, z[2].im, t8);
322
323 TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
324}
325
326#if !CONFIG_SMALL
327static void fft16(FFTComplex *z)
328{
329 FFTSample t1, t2, t3, t4, t5, t6;
330
331 fft8(z);
332 fft4(z+8);
333 fft4(z+12);
334
335 TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
336 TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
337 TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
338 TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
339}
340#else
341DECL_FFT(16,8,4)
342#endif
343DECL_FFT(32,16,8)
344DECL_FFT(64,32,16)
345DECL_FFT(128,64,32)
346DECL_FFT(256,128,64)
347DECL_FFT(512,256,128)
348#if !CONFIG_SMALL
349#define pass pass_big
350#endif
351DECL_FFT(1024,512,256)
352DECL_FFT(2048,1024,512)
353DECL_FFT(4096,2048,1024)
354DECL_FFT(8192,4096,2048)
355DECL_FFT(16384,8192,4096)
356DECL_FFT(32768,16384,8192)
357DECL_FFT(65536,32768,16384)
358
359static void (* const fft_dispatch[])(FFTComplex*) = {
360 fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
361 fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
362};
363
364void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
365{
366 fft_dispatch[s->nbits-2](z);
367}
368