1 files changed, 332 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
new file mode 100644
index 0000000000..abf4e79d8d
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -0,0 +1,332 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2008 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/* This is a simple MDCT implementation that uses a N/4 complex FFT
+   to do most of the work. It should be relatively straightforward to
+   plug in pretty much and FFT here.
+   This replaces the Vorbis FFT (and uses the exact same API), which
+   was a bit too messy and that was ending up duplicating code
+   (might as well use the same FFT everywhere).
+   The algorithm is similar to (and inspired from) Fabrice Bellard's
+   MDCT implementation in FFMPEG, but has differences in signs, ordering
+   and scaling in many places.
+*/
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "opus_config.h"
+#endif
+#endif
+#include "mdct.h"
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include <math.h>
+#include "os_support.h"
+#include "mathops.h"
+#include "stack_alloc.h"
+#ifdef CUSTOM_MODES
+int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
+{
+   int i;
+   int N4;
+   kiss_twiddle_scalar *trig;
+#if defined(FIXED_POINT)
+   int N2=N>>1;
+#endif
+   l->n = N;
+   N4 = N>>2;
+   l->maxshift = maxshift;
+   for (i=0;i<=maxshift;i++)
+   {
+      if (i==0)
+         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
+      else
+         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
+#ifndef ENABLE_TI_DSPLIB55
+      if (l->kfft[i]==NULL)
+         return 0;
+#endif
+   }
+   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
+   if (l->trig==NULL)
+     return 0;
+   /* We have enough points that sine isn't necessary */
+#if defined(FIXED_POINT)
+   for (i=0;i<=N4;i++)
+      trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
+#else
+   for (i=0;i<=N4;i++)
+      trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
+#endif
+   return 1;
+}
+void clt_mdct_clear(mdct_lookup *l)
+{
+   int i;
+   for (i=0;i<=l->maxshift;i++)
+      opus_fft_free(l->kfft[i]);
+   opus_free((kiss_twiddle_scalar*)l->trig);
+}
+#endif /* CUSTOM_MODES */
+/* Forward MDCT trashes the input array */
+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 *window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<(overlap>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-(overlap>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         re = yp[0];
+         im = yp[1];
+         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);
+         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr + S_MUL(yi,sine);
+         *yp++ = yi - S_MUL(yr,sine);
+      }
+   }
+   /* N/4 complex FFT, down-scales by 4/N */
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in);
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT fp = in;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
+         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp1 = yr - S_MUL(yi,sine);
+         *yp2 = yi + S_MUL(yr,sine);;
+         fp += 2;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
+{
+   int i;
+   int N, N2, N4;
+   kiss_twiddle_scalar sine;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK;
+   N = l->n;
+   N >>= shift;
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
+   /* sin(x) ~= x here */
+#ifdef FIXED_POINT
+   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
+#else
+   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
+#endif
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
+         /* works because the cos is nearly one */
+         *yp++ = yr - S_MUL(yi,sine);
+         *yp++ = yi + S_MUL(yr,sine);
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
+   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f);
+   /* Post-rotate */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT fp = f;
+      const kiss_twiddle_scalar *t = &l->trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         re = fp[0];
+         im = fp[1];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
+         yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
+         /* works because the cos is nearly one */
+         *fp++ = yr - S_MUL(yi,sine);
+         *fp++ = yi + S_MUL(yr,sine);
+      }
+   }
+   /* De-shuffle the components for the middle of the window only */
+   {
+      const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
+      const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
+      for(i = 0; i < N4; i++)
+      {
+         *yp++ =-*fp1;
+         *yp++ = *fp2;
+         fp1 += 2;
+         fp2 -= 2;
+      }
+   }
+   out -= (N2-overlap)>>1;
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+      for(i = 0; i< N4-overlap/2; i++)
+      {
+         *xp1 = *fp1;
+         xp1--;
+         fp1--;
+      }
+      for(; i < N4; i++)
+      {
+         kiss_fft_scalar x1;
+         x1 = *fp1--;
+         *yp1++ +=-MULT16_32_Q15(*wp1, x1);
+         *xp1-- += MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   {
+      kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4;
+      kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+      for(i = 0; i< N4-overlap/2; i++)
+      {
+         *xp2 = *fp2;
+         xp2++;
+         fp2++;
+      }
+      for(; i < N4; i++)
+      {
+         kiss_fft_scalar x2;
+         x2 = *fp2++;
+         *yp2--  = MULT16_32_Q15(*wp1, x2);
+         *xp2++  = MULT16_32_Q15(*wp2, x2);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}

diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c new file mode 100644 index 0000000000..abf4e79d8d --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -0,0 +1,332 @@
	1	/* Copyright (c) 2007-2008 CSIRO
	2	Copyright (c) 2007-2008 Xiph.Org Foundation
	3	Written by Jean-Marc Valin */
	4	/*
	5	Redistribution and use in source and binary forms, with or without
	6	modification, are permitted provided that the following conditions
	7	are met:
	8
	9	- Redistributions of source code must retain the above copyright
	10	notice, this list of conditions and the following disclaimer.
	11
	12	- Redistributions in binary form must reproduce the above copyright
	13	notice, this list of conditions and the following disclaimer in the
	14	documentation and/or other materials provided with the distribution.
	15
	16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	17	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	18	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	19	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
	20	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	21	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	22	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	23	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	24	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	25	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	26	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
	29	/* This is a simple MDCT implementation that uses a N/4 complex FFT
	30	to do most of the work. It should be relatively straightforward to
	31	plug in pretty much and FFT here.
	32
	33	This replaces the Vorbis FFT (and uses the exact same API), which
	34	was a bit too messy and that was ending up duplicating code
	35	(might as well use the same FFT everywhere).
	36
	37	The algorithm is similar to (and inspired from) Fabrice Bellard's
	38	MDCT implementation in FFMPEG, but has differences in signs, ordering
	39	and scaling in many places.
	40	*/
	41
	42	#ifndef SKIP_CONFIG_H
	43	#ifdef HAVE_CONFIG_H
	44	#include "opus_config.h"
	45	#endif
	46	#endif
	47
	48	#include "mdct.h"
	49	#include "kiss_fft.h"
	50	#include "_kiss_fft_guts.h"
	51	#include <math.h>
	52	#include "os_support.h"
	53	#include "mathops.h"
	54	#include "stack_alloc.h"
	55
	56	#ifdef CUSTOM_MODES
	57
	58	int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
	59	{
	60	int i;
	61	int N4;
	62	kiss_twiddle_scalar *trig;
	63	#if defined(FIXED_POINT)
	64	int N2=N>>1;
	65	#endif
	66	l->n = N;
	67	N4 = N>>2;
	68	l->maxshift = maxshift;
	69	for (i=0;i<=maxshift;i++)
	70	{
	71	if (i==0)
	72	l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
	73	else
	74	l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
	75	#ifndef ENABLE_TI_DSPLIB55
	76	if (l->kfft[i]==NULL)
	77	return 0;
	78	#endif
	79	}
	80	l->trig = trig = (kiss_twiddle_scalar)opus_alloc((N4+1)sizeof(kiss_twiddle_scalar));
	81	if (l->trig==NULL)
	82	return 0;
	83	/* We have enough points that sine isn't necessary */
	84	#if defined(FIXED_POINT)
	85	for (i=0;i<=N4;i++)
	86	trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
	87	#else
	88	for (i=0;i<=N4;i++)
	89	trig[i] = (kiss_twiddle_scalar)cos(2PIi/N);
	90	#endif
	91	return 1;
	92	}
	93
	94	void clt_mdct_clear(mdct_lookup *l)
	95	{
	96	int i;
	97	for (i=0;i<=l->maxshift;i++)
	98	opus_fft_free(l->kfft[i]);
	99	opus_free((kiss_twiddle_scalar*)l->trig);
	100	}
	101
	102	#endif /* CUSTOM_MODES */
	103
	104	/* Forward MDCT trashes the input array */
	105	void clt_mdct_forward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
	106	const opus_val16 *window, int overlap, int shift, int stride)
	107	{
	108	int i;
	109	int N, N2, N4;
	110	kiss_twiddle_scalar sine;
	111	VARDECL(kiss_fft_scalar, f);
	112	SAVE_STACK;
	113	N = l->n;
	114	N >>= shift;
	115	N2 = N>>1;
	116	N4 = N>>2;
	117	ALLOC(f, N2, kiss_fft_scalar);
	118	/* sin(x) ~= x here */
	119	#ifdef FIXED_POINT
	120	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
	121	#else
	122	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
	123	#endif
	124
	125	/* Consider the input to be composed of four blocks: [a, b, c, d] */
	126	/* Window, shuffle, fold */
	127	{
	128	/* Temp pointers to make it really clear to the compiler what we're doing */
	129	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
	130	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
	131	kiss_fft_scalar * OPUS_RESTRICT yp = f;
	132	const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
	133	const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
	134	for(i=0;i<(overlap>>2);i++)
	135	{
	136	/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
	137	yp++ = MULT16_32_Q15(wp2, xp1[N2]) + MULT16_32_Q15(wp1,xp2);
	138	yp++ = MULT16_32_Q15(wp1, xp1) - MULT16_32_Q15(wp2, xp2[-N2]);
	139	xp1+=2;
	140	xp2-=2;
	141	wp1+=2;
	142	wp2-=2;
	143	}
	144	wp1 = window;
	145	wp2 = window+overlap-1;
	146	for(;i<N4-(overlap>>2);i++)
	147	{
	148	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
	149	yp++ = xp2;
	150	yp++ = xp1;
	151	xp1+=2;
	152	xp2-=2;
	153	}
	154	for(;i<N4;i++)
	155	{
	156	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
	157	yp++ = -MULT16_32_Q15(wp1, xp1[-N2]) + MULT16_32_Q15(wp2, xp2);
	158	yp++ = MULT16_32_Q15(wp2, xp1) + MULT16_32_Q15(wp1, xp2[N2]);
	159	xp1+=2;
	160	xp2-=2;
	161	wp1+=2;
	162	wp2-=2;
	163	}
	164	}
	165	/* Pre-rotation */
	166	{
	167	kiss_fft_scalar * OPUS_RESTRICT yp = f;
	168	const kiss_twiddle_scalar *t = &l->trig[0];
	169	for(i=0;i<N4;i++)
	170	{
	171	kiss_fft_scalar re, im, yr, yi;
	172	re = yp[0];
	173	im = yp[1];
	174	yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
	175	yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
	176	/* works because the cos is nearly one */
	177	*yp++ = yr + S_MUL(yi,sine);
	178	*yp++ = yi - S_MUL(yr,sine);
	179	}
	180	}
	181
	182	/* N/4 complex FFT, down-scales by 4/N */
	183	opus_fft(l->kfft[shift], (kiss_fft_cpx )f, (kiss_fft_cpx )in);
	184
	185	/* Post-rotate */
	186	{
	187	/* Temp pointers to make it really clear to the compiler what we're doing */
	188	const kiss_fft_scalar * OPUS_RESTRICT fp = in;
	189	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
	190	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
	191	const kiss_twiddle_scalar *t = &l->trig[0];
	192	/* Temp pointers to make it really clear to the compiler what we're doing */
	193	for(i=0;i<N4;i++)
	194	{
	195	kiss_fft_scalar yr, yi;
	196	yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
	197	yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
	198	/* works because the cos is nearly one */
	199	*yp1 = yr - S_MUL(yi,sine);
	200	*yp2 = yi + S_MUL(yr,sine);;
	201	fp += 2;
	202	yp1 += 2*stride;
	203	yp2 -= 2*stride;
	204	}
	205	}
	206	RESTORE_STACK;
	207	}
	208
	209	void clt_mdct_backward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,
	210	const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
	211	{
	212	int i;
	213	int N, N2, N4;
	214	kiss_twiddle_scalar sine;
	215	VARDECL(kiss_fft_scalar, f);
	216	VARDECL(kiss_fft_scalar, f2);
	217	SAVE_STACK;
	218	N = l->n;
	219	N >>= shift;
	220	N2 = N>>1;
	221	N4 = N>>2;
	222	ALLOC(f, N2, kiss_fft_scalar);
	223	ALLOC(f2, N2, kiss_fft_scalar);
	224	/* sin(x) ~= x here */
	225	#ifdef FIXED_POINT
	226	sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
	227	#else
	228	sine = (kiss_twiddle_scalar)2PI(.125f)/N;
	229	#endif
	230
	231	/* Pre-rotate */
	232	{
	233	/* Temp pointers to make it really clear to the compiler what we're doing */
	234	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
	235	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
	236	kiss_fft_scalar * OPUS_RESTRICT yp = f2;
	237	const kiss_twiddle_scalar *t = &l->trig[0];
	238	for(i=0;i<N4;i++)
	239	{
	240	kiss_fft_scalar yr, yi;
	241	yr = -S_MUL(xp2, t[i<<shift]) + S_MUL(xp1,t[(N4-i)<<shift]);
	242	yi = -S_MUL(xp2, t[(N4-i)<<shift]) - S_MUL(xp1,t[i<<shift]);
	243	/* works because the cos is nearly one */
	244	*yp++ = yr - S_MUL(yi,sine);
	245	*yp++ = yi + S_MUL(yr,sine);
	246	xp1+=2*stride;
	247	xp2-=2*stride;
	248	}
	249	}
	250
	251	/* Inverse N/4 complex FFT. This one should not downscale even in fixed-point */
	252	opus_ifft(l->kfft[shift], (kiss_fft_cpx )f2, (kiss_fft_cpx )f);
	253
	254	/* Post-rotate */
	255	{
	256	kiss_fft_scalar * OPUS_RESTRICT fp = f;
	257	const kiss_twiddle_scalar *t = &l->trig[0];
	258
	259	for(i=0;i<N4;i++)
	260	{
	261	kiss_fft_scalar re, im, yr, yi;
	262	re = fp[0];
	263	im = fp[1];
	264	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
	265	yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
	266	yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
	267	/* works because the cos is nearly one */
	268	*fp++ = yr - S_MUL(yi,sine);
	269	*fp++ = yi + S_MUL(yr,sine);
	270	}
	271	}
	272	/* De-shuffle the components for the middle of the window only */
	273	{
	274	const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
	275	const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
	276	kiss_fft_scalar * OPUS_RESTRICT yp = f2;
	277	for(i = 0; i < N4; i++)
	278	{
	279	yp++ =-fp1;
	280	yp++ = fp2;
	281	fp1 += 2;
	282	fp2 -= 2;
	283	}
	284	}
	285	out -= (N2-overlap)>>1;
	286	/* Mirror on both sides for TDAC */
	287	{
	288	kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
	289	kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
	290	kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
	291	const opus_val16 * OPUS_RESTRICT wp1 = window;
	292	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
	293	for(i = 0; i< N4-overlap/2; i++)
	294	{
	295	xp1 = fp1;
	296	xp1--;
	297	fp1--;
	298	}
	299	for(; i < N4; i++)
	300	{
	301	kiss_fft_scalar x1;
	302	x1 = *fp1--;
	303	yp1++ +=-MULT16_32_Q15(wp1, x1);
	304	xp1-- += MULT16_32_Q15(wp2, x1);
	305	wp1++;
	306	wp2--;
	307	}
	308	}
	309	{
	310	kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4;
	311	kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2;
	312	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
	313	const opus_val16 * OPUS_RESTRICT wp1 = window;
	314	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
	315	for(i = 0; i< N4-overlap/2; i++)
	316	{
	317	xp2 = fp2;
	318	xp2++;
	319	fp2++;
	320	}
	321	for(; i < N4; i++)
	322	{
	323	kiss_fft_scalar x2;
	324	x2 = *fp2++;
	325	yp2-- = MULT16_32_Q15(wp1, x2);
	326	xp2++ = MULT16_32_Q15(wp2, x2);
	327	wp1++;
	328	wp2--;
	329	}
	330	}
	331	RESTORE_STACK;
	332	}