Add libwmapro to apps/codecs. These files comprise a set of unmodified files needed from ffmpeg's libavcodec and libavutil to compile and use the wma pro decoder standalone. The files were taken from ffmpeg's svn r22886 dated 15 April 2010.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25763 a1c6a512-1295-4272-9138-f99709370657
author: Mohamed Tarek <mt@rockbox.org> 2010-04-30 11:11:56 +0000
committer: Mohamed Tarek <mt@rockbox.org> 2010-04-30 11:11:56 +0000
commit: cf43e5083b9e0f87de262ea31fd8067225ebfcda (patch)
tree: 073e6f4cd9561564d85e410a35432e1f4ead5b11 /apps/codecs/libwmapro/fft.c
parent: bc3c5c16571487bf71fed8c22b30ee40481e156e (diff)
download: rockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.tar.gz
rockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.zip
1 files changed, 368 insertions, 0 deletions
diff --git a/apps/codecs/libwmapro/fft.c b/apps/codecs/libwmapro/fft.c
new file mode 100644
index 0000000000..7275d98e9f
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.c
@@ -0,0 +1,368 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+/**
+ * @file libavcodec/fft.c
+ * FFT/IFFT transforms.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+#endif
+COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
+    NULL, NULL, NULL, NULL,
+    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+#if !CONFIG_HARDCODED_TABLES
+    int i;
+    int m = 1<<index;
+    double freq = 2*M_PI/m;
+    FFTSample *tab = ff_cos_tabs[index];
+    for(i=0; i<=m/4; i++)
+        tab[i] = cos(i*freq);
+    for(i=1; i<m/4; i++)
+        tab[m/2-i] = tab[i];
+#endif
+}
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, m, n;
+    float alpha, c1, s1, s2;
+    int av_unused has_vectors;
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+    s->tmp_buf = NULL;
+    s->exptab  = av_malloc((n / 2) * sizeof(FFTComplex));
+    if (!s->exptab)
+        goto fail;
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->inverse = inverse;
+    s2 = inverse ? 1.0 : -1.0;
+    s->fft_permute = ff_fft_permute_c;
+    s->fft_calc    = ff_fft_calc_c;
+#if CONFIG_MDCT
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->mdct_calc   = ff_mdct_calc_c;
+#endif
+    s->exptab1     = NULL;
+    s->split_radix = 1;
+    if (ARCH_ARM)     ff_fft_init_arm(s);
+    if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
+    if (HAVE_MMX)     ff_fft_init_mmx(s);
+    if (s->split_radix) {
+        for(j=4; j<=nbits; j++) {
+            ff_init_ff_cos_tabs(j);
+        }
+        for(i=0; i<n; i++)
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+        s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    } else {
+        int np, nblocks, np2, l;
+        FFTComplex *q;
+        for(i=0; i<(n/2); i++) {
+            alpha = 2 * M_PI * (float)i / (float)n;
+            c1 = cos(alpha);
+            s1 = sin(alpha) * s2;
+            s->exptab[i].re = c1;
+            s->exptab[i].im = s1;
+        }
+        np = 1 << nbits;
+        nblocks = np >> 3;
+        np2 = np >> 1;
+        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+        if (!s->exptab1)
+            goto fail;
+        q = s->exptab1;
+        do {
+            for(l = 0; l < np2; l += 2 * nblocks) {
+                *q++ = s->exptab[l];
+                *q++ = s->exptab[l + nblocks];
+                q->re = -s->exptab[l].im;
+                q->im = s->exptab[l].re;
+                q++;
+                q->re = -s->exptab[l + nblocks].im;
+                q->im = s->exptab[l + nblocks].re;
+                q++;
+            }
+            nblocks = nblocks >> 1;
+        } while (nblocks != 0);
+        av_freep(&s->exptab);
+        /* compute bit reverse table */
+        for(i=0;i<n;i++) {
+            m=0;
+            for(j=0;j<nbits;j++) {
+                m |= ((i >> j) & 1) << (nbits-j-1);
+            }
+            s->revtab[i]=m;
+        }
+    }
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    if (s->tmp_buf) {
+        /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+        memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+        return;
+    }
+    /* reverse */
+    for(j=0;j<np;j++) {
+        k = revtab[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+}
+#define sqrthalf (float)M_SQRT1_2
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    t1 = a2.re * wre + a2.im * wim;\
+    t2 = a2.im * wre - a2.re * wim;\
+    t5 = a3.re * wre - a3.im * wim;\
+    t6 = a3.im * wre + a3.re * wim;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTSample t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,ff_cos_##n,n4/2);\
+}
+static void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+static void fft8(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+    fft4(z);
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6;
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+static void (* const fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
author	Mohamed Tarek <mt@rockbox.org>	2010-04-30 11:11:56 +0000
committer	Mohamed Tarek <mt@rockbox.org>	2010-04-30 11:11:56 +0000
commit	cf43e5083b9e0f87de262ea31fd8067225ebfcda (patch)
tree	073e6f4cd9561564d85e410a35432e1f4ead5b11 /apps/codecs/libwmapro/fft.c
parent	bc3c5c16571487bf71fed8c22b30ee40481e156e (diff)
download	rockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.tar.gz rockbox-cf43e5083b9e0f87de262ea31fd8067225ebfcda.zip

diff --git a/apps/codecs/libwmapro/fft.c b/apps/codecs/libwmapro/fft.c new file mode 100644 index 0000000000..7275d98e9f --- /dev/null +++ b/apps/codecs/libwmapro/fft.c
@@ -0,0 +1,368 @@
	1	/*
	2	* FFT/IFFT transforms
	3	* Copyright (c) 2008 Loren Merritt
	4	* Copyright (c) 2002 Fabrice Bellard
	5	* Partly based on libdjbfft by D. J. Bernstein
	6	*
	7	* This file is part of FFmpeg.
	8	*
	9	* FFmpeg is free software; you can redistribute it and/or
	10	* modify it under the terms of the GNU Lesser General Public
	11	* License as published by the Free Software Foundation; either
	12	* version 2.1 of the License, or (at your option) any later version.
	13	*
	14	* FFmpeg is distributed in the hope that it will be useful,
	15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	17	* Lesser General Public License for more details.
	18	*
	19	* You should have received a copy of the GNU Lesser General Public
	20	* License along with FFmpeg; if not, write to the Free Software
	21	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	22	*/
	23
	24	/**
	25	* @file libavcodec/fft.c
	26	* FFT/IFFT transforms.
	27	*/
	28
	29	#include <stdlib.h>
	30	#include <string.h>
	31	#include "libavutil/mathematics.h"
	32	#include "fft.h"
	33
	34	/* cos(2pix/n) for 0<=x<=n/4, followed by its reverse */
	35	#if !CONFIG_HARDCODED_TABLES
	36	COSTABLE(16);
	37	COSTABLE(32);
	38	COSTABLE(64);
	39	COSTABLE(128);
	40	COSTABLE(256);
	41	COSTABLE(512);
	42	COSTABLE(1024);
	43	COSTABLE(2048);
	44	COSTABLE(4096);
	45	COSTABLE(8192);
	46	COSTABLE(16384);
	47	COSTABLE(32768);
	48	COSTABLE(65536);
	49	#endif
	50	COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
	51	NULL, NULL, NULL, NULL,
	52	ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
	53	ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
	54	};
	55
	56	static int split_radix_permutation(int i, int n, int inverse)
	57	{
	58	int m;
	59	if(n <= 2) return i&1;
	60	m = n >> 1;
	61	if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
	62	m >>= 1;
	63	if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
	64	else return split_radix_permutation(i, m, inverse)*4 - 1;
	65	}
	66
	67	av_cold void ff_init_ff_cos_tabs(int index)
	68	{
	69	#if !CONFIG_HARDCODED_TABLES
	70	int i;
	71	int m = 1<<index;
	72	double freq = 2*M_PI/m;
	73	FFTSample *tab = ff_cos_tabs[index];
	74	for(i=0; i<=m/4; i++)
	75	tab[i] = cos(i*freq);
	76	for(i=1; i<m/4; i++)
	77	tab[m/2-i] = tab[i];
	78	#endif
	79	}
	80
	81	av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
	82	{
	83	int i, j, m, n;
	84	float alpha, c1, s1, s2;
	85	int av_unused has_vectors;
	86
	87	if (nbits < 2 \|\| nbits > 16)
	88	goto fail;
	89	s->nbits = nbits;
	90	n = 1 << nbits;
	91
	92	s->tmp_buf = NULL;
	93	s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
	94	if (!s->exptab)
	95	goto fail;
	96	s->revtab = av_malloc(n * sizeof(uint16_t));
	97	if (!s->revtab)
	98	goto fail;
	99	s->inverse = inverse;
	100
	101	s2 = inverse ? 1.0 : -1.0;
	102
	103	s->fft_permute = ff_fft_permute_c;
	104	s->fft_calc = ff_fft_calc_c;
	105	#if CONFIG_MDCT
	106	s->imdct_calc = ff_imdct_calc_c;
	107	s->imdct_half = ff_imdct_half_c;
	108	s->mdct_calc = ff_mdct_calc_c;
	109	#endif
	110	s->exptab1 = NULL;
	111	s->split_radix = 1;
	112
	113	if (ARCH_ARM) ff_fft_init_arm(s);
	114	if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
	115	if (HAVE_MMX) ff_fft_init_mmx(s);
	116
	117	if (s->split_radix) {
	118	for(j=4; j<=nbits; j++) {
	119	ff_init_ff_cos_tabs(j);
	120	}
	121	for(i=0; i<n; i++)
	122	s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
	123	s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
	124	} else {
	125	int np, nblocks, np2, l;
	126	FFTComplex *q;
	127
	128	for(i=0; i<(n/2); i++) {
	129	alpha = 2 * M_PI * (float)i / (float)n;
	130	c1 = cos(alpha);
	131	s1 = sin(alpha) * s2;
	132	s->exptab[i].re = c1;
	133	s->exptab[i].im = s1;
	134	}
	135
	136	np = 1 << nbits;
	137	nblocks = np >> 3;
	138	np2 = np >> 1;
	139	s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
	140	if (!s->exptab1)
	141	goto fail;
	142	q = s->exptab1;
	143	do {
	144	for(l = 0; l < np2; l += 2 * nblocks) {
	145	*q++ = s->exptab[l];
	146	*q++ = s->exptab[l + nblocks];
	147
	148	q->re = -s->exptab[l].im;
	149	q->im = s->exptab[l].re;
	150	q++;
	151	q->re = -s->exptab[l + nblocks].im;
	152	q->im = s->exptab[l + nblocks].re;
	153	q++;
	154	}
	155	nblocks = nblocks >> 1;
	156	} while (nblocks != 0);
	157	av_freep(&s->exptab);
	158
	159	/* compute bit reverse table */
	160	for(i=0;i<n;i++) {
	161	m=0;
	162	for(j=0;j<nbits;j++) {
	163	m \|= ((i >> j) & 1) << (nbits-j-1);
	164	}
	165	s->revtab[i]=m;
	166	}
	167	}
	168
	169	return 0;
	170	fail:
	171	av_freep(&s->revtab);
	172	av_freep(&s->exptab);
	173	av_freep(&s->exptab1);
	174	av_freep(&s->tmp_buf);
	175	return -1;
	176	}
	177
	178	void ff_fft_permute_c(FFTContext s, FFTComplex z)
	179	{
	180	int j, k, np;
	181	FFTComplex tmp;
	182	const uint16_t *revtab = s->revtab;
	183	np = 1 << s->nbits;
	184
	185	if (s->tmp_buf) {
	186	/* TODO: handle split-radix permute in a more optimal way, probably in-place */
	187	for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
	188	memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
	189	return;
	190	}
	191
	192	/* reverse */
	193	for(j=0;j<np;j++) {
	194	k = revtab[j];
	195	if (k < j) {
	196	tmp = z[k];
	197	z[k] = z[j];
	198	z[j] = tmp;
	199	}
	200	}
	201	}
	202
	203	av_cold void ff_fft_end(FFTContext *s)
	204	{
	205	av_freep(&s->revtab);
	206	av_freep(&s->exptab);
	207	av_freep(&s->exptab1);
	208	av_freep(&s->tmp_buf);
	209	}
	210
	211	#define sqrthalf (float)M_SQRT1_2
	212
	213	#define BF(x,y,a,b) {\
	214	x = a - b;\
	215	y = a + b;\
	216	}
	217
	218	#define BUTTERFLIES(a0,a1,a2,a3) {\
	219	BF(t3, t5, t5, t1);\
	220	BF(a2.re, a0.re, a0.re, t5);\
	221	BF(a3.im, a1.im, a1.im, t3);\
	222	BF(t4, t6, t2, t6);\
	223	BF(a3.re, a1.re, a1.re, t4);\
	224	BF(a2.im, a0.im, a0.im, t6);\
	225	}
	226
	227	// force loading all the inputs before storing any.
	228	// this is slightly slower for small data, but avoids store->load aliasing
	229	// for addresses separated by large powers of 2.
	230	#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
	231	FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
	232	BF(t3, t5, t5, t1);\
	233	BF(a2.re, a0.re, r0, t5);\
	234	BF(a3.im, a1.im, i1, t3);\
	235	BF(t4, t6, t2, t6);\
	236	BF(a3.re, a1.re, r1, t4);\
	237	BF(a2.im, a0.im, i0, t6);\
	238	}
	239
	240	#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
	241	t1 = a2.re * wre + a2.im * wim;\
	242	t2 = a2.im * wre - a2.re * wim;\
	243	t5 = a3.re * wre - a3.im * wim;\
	244	t6 = a3.im * wre + a3.re * wim;\
	245	BUTTERFLIES(a0,a1,a2,a3)\
	246	}
	247
	248	#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
	249	t1 = a2.re;\
	250	t2 = a2.im;\
	251	t5 = a3.re;\
	252	t6 = a3.im;\
	253	BUTTERFLIES(a0,a1,a2,a3)\
	254	}
	255
	256	/* z[0...8n-1], w[1...2n-1] */
	257	#define PASS(name)\
	258	static void name(FFTComplex z, const FFTSample wre, unsigned int n)\
	259	{\
	260	FFTSample t1, t2, t3, t4, t5, t6;\
	261	int o1 = 2*n;\
	262	int o2 = 4*n;\
	263	int o3 = 6*n;\
	264	const FFTSample *wim = wre+o1;\
	265	n--;\
	266	\
	267	TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
	268	TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
	269	do {\
	270	z += 2;\
	271	wre += 2;\
	272	wim -= 2;\
	273	TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
	274	TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
	275	} while(--n);\
	276	}
	277
	278	PASS(pass)
	279	#undef BUTTERFLIES
	280	#define BUTTERFLIES BUTTERFLIES_BIG
	281	PASS(pass_big)
	282
	283	#define DECL_FFT(n,n2,n4)\
	284	static void fft##n(FFTComplex *z)\
	285	{\
	286	fft##n2(z);\
	287	fft##n4(z+n4*2);\
	288	fft##n4(z+n4*3);\
	289	pass(z,ff_cos_##n,n4/2);\
	290	}
	291
	292	static void fft4(FFTComplex *z)
	293	{
	294	FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
	295
	296	BF(t3, t1, z[0].re, z[1].re);
	297	BF(t8, t6, z[3].re, z[2].re);
	298	BF(z[2].re, z[0].re, t1, t6);
	299	BF(t4, t2, z[0].im, z[1].im);
	300	BF(t7, t5, z[2].im, z[3].im);
	301	BF(z[3].im, z[1].im, t4, t8);
	302	BF(z[3].re, z[1].re, t3, t7);
	303	BF(z[2].im, z[0].im, t2, t5);
	304	}
	305
	306	static void fft8(FFTComplex *z)
	307	{
	308	FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
	309
	310	fft4(z);
	311
	312	BF(t1, z[5].re, z[4].re, -z[5].re);
	313	BF(t2, z[5].im, z[4].im, -z[5].im);
	314	BF(t3, z[7].re, z[6].re, -z[7].re);
	315	BF(t4, z[7].im, z[6].im, -z[7].im);
	316	BF(t8, t1, t3, t1);
	317	BF(t7, t2, t2, t4);
	318	BF(z[4].re, z[0].re, z[0].re, t1);
	319	BF(z[4].im, z[0].im, z[0].im, t2);
	320	BF(z[6].re, z[2].re, z[2].re, t7);
	321	BF(z[6].im, z[2].im, z[2].im, t8);
	322
	323	TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
	324	}
	325
	326	#if !CONFIG_SMALL
	327	static void fft16(FFTComplex *z)
	328	{
	329	FFTSample t1, t2, t3, t4, t5, t6;
	330
	331	fft8(z);
	332	fft4(z+8);
	333	fft4(z+12);
	334
	335	TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
	336	TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
	337	TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
	338	TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
	339	}
	340	#else
	341	DECL_FFT(16,8,4)
	342	#endif
	343	DECL_FFT(32,16,8)
	344	DECL_FFT(64,32,16)
	345	DECL_FFT(128,64,32)
	346	DECL_FFT(256,128,64)
	347	DECL_FFT(512,256,128)
	348	#if !CONFIG_SMALL
	349	#define pass pass_big
	350	#endif
	351	DECL_FFT(1024,512,256)
	352	DECL_FFT(2048,1024,512)
	353	DECL_FFT(4096,2048,1024)
	354	DECL_FFT(8192,4096,2048)
	355	DECL_FFT(16384,8192,4096)
	356	DECL_FFT(32768,16384,8192)
	357	DECL_FFT(65536,32768,16384)
	358
	359	static void (* const fft_dispatch[])(FFTComplex*) = {
	360	fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
	361	fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
	362	};
	363
	364	void ff_fft_calc_c(FFTContext s, FFTComplex z)
	365	{
	366	fft_dispatch[s->nbits-2](z);
	367	}
	368