1 files changed, 258 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c
new file mode 100644
index 0000000000..3531d02d10
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2015 Xiph.Org Foundation
+   Written by Viswanath Puttagunta */
+/**
+   @file celt_mdct_ne10.c
+   @brief ARM Neon optimizations for mdct using NE10 library
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef SKIP_CONFIG_H
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+#include "kiss_fft.h"
+#include "_kiss_fft_guts.h"
+#include "mdct.h"
+#include "stack_alloc.h"
+void clt_mdct_forward_neon(const mdct_lookup *l,
+                           kiss_fft_scalar *in,
+                           kiss_fft_scalar * OPUS_RESTRICT out,
+                           const opus_val16 *window,
+                           int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_cpx, f2);
+   const kiss_fft_state *st = l->kfft[shift];
+   const kiss_twiddle_scalar *trig;
+   SAVE_STACK;
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N4, kiss_fft_cpx);
+   /* Consider the input to be composed of four blocks: [a, b, c, d] */
+   /* Window, shuffle, fold */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
+      for(i=0;i<((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
+         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
+         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+      wp1 = window;
+      wp2 = window+overlap-1;
+      for(;i<N4-((overlap+3)>>2);i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ = *xp2;
+         *yp++ = *xp1;
+         xp1+=2;
+         xp2-=2;
+      }
+      for(;i<N4;i++)
+      {
+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
+         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
+         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
+         xp1+=2;
+         xp2-=2;
+         wp1+=2;
+         wp2-=2;
+      }
+   }
+   /* Pre-rotation */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar *t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_cpx yc;
+         kiss_twiddle_scalar t0, t1;
+         kiss_fft_scalar re, im, yr, yi;
+         t0 = t[i];
+         t1 = t[N4+i];
+         re = *yp++;
+         im = *yp++;
+         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
+         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
+         yc.r = yr;
+         yc.i = yi;
+         f2[i] = yc;
+      }
+   }
+   opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
+   /* Post-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
+         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
+         *yp1 = yr;
+         *yp2 = yi;
+         fp++;
+         yp1 += 2*stride;
+         yp2 -= 2*stride;
+      }
+   }
+   RESTORE_STACK;
+}
+void clt_mdct_backward_neon(const mdct_lookup *l,
+                            kiss_fft_scalar *in,
+                            kiss_fft_scalar * OPUS_RESTRICT out,
+                            const opus_val16 * OPUS_RESTRICT window,
+                            int overlap, int shift, int stride, int arch)
+{
+   int i;
+   int N, N2, N4;
+   VARDECL(kiss_fft_scalar, f);
+   const kiss_twiddle_scalar *trig;
+   const kiss_fft_state *st = l->kfft[shift];
+   N = l->n;
+   trig = l->trig;
+   for (i=0;i<shift;i++)
+   {
+      N >>= 1;
+      trig += N;
+   }
+   N2 = N>>1;
+   N4 = N>>2;
+   ALLOC(f, N2, kiss_fft_scalar);
+   /* Pre-rotate */
+   {
+      /* Temp pointers to make it really clear to the compiler what we're doing */
+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
+      kiss_fft_scalar * OPUS_RESTRICT yp = f;
+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
+      for(i=0;i<N4;i++)
+      {
+         kiss_fft_scalar yr, yi;
+         yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
+         yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
+         yp[2*i] = yr;
+         yp[2*i+1] = yi;
+         xp1+=2*stride;
+         xp2-=2*stride;
+      }
+   }
+   opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
+   {
+      kiss_fft_scalar * yp0 = out+(overlap>>1);
+      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
+      const kiss_twiddle_scalar *t = &trig[0];
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
+      {
+         kiss_fft_scalar re, im, yr, yi;
+         kiss_twiddle_scalar t0, t1;
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i];
+         t1 = t[N4+i];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         re = yp1[0];
+         im = yp1[1];
+         yp0[0] = yr;
+         yp1[1] = yi;
+         t0 = t[(N4-i-1)];
+         t1 = t[(N2-i-1)];
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
+         yr = S_MUL(re,t0) + S_MUL(im,t1);
+         yi = S_MUL(re,t1) - S_MUL(im,t0);
+         yp1[0] = yr;
+         yp0[1] = yi;
+         yp0 += 2;
+         yp1 -= 2;
+      }
+   }
+   /* Mirror on both sides for TDAC */
+   {
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
+      const opus_val16 * OPUS_RESTRICT wp1 = window;
+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
+      for(i = 0; i < overlap/2; i++)
+      {
+         kiss_fft_scalar x1, x2;
+         x1 = *xp1;
+         x2 = *yp1;
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
+         wp1++;
+         wp2--;
+      }
+   }
+   RESTORE_STACK;
+}

diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c new file mode 100644 index 0000000000..3531d02d10 --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c
@@ -0,0 +1,258 @@
	1	/* Copyright (c) 2015 Xiph.Org Foundation
	2	Written by Viswanath Puttagunta */
	3	/**
	4	@file celt_mdct_ne10.c
	5	@brief ARM Neon optimizations for mdct using NE10 library
	6	*/
	7
	8	/*
	9	Redistribution and use in source and binary forms, with or without
	10	modification, are permitted provided that the following conditions
	11	are met:
	12
	13	- Redistributions of source code must retain the above copyright
	14	notice, this list of conditions and the following disclaimer.
	15
	16	- Redistributions in binary form must reproduce the above copyright
	17	notice, this list of conditions and the following disclaimer in the
	18	documentation and/or other materials provided with the distribution.
	19
	20	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	21	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	22	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	23	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
	24	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	25	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	26	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	27	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	28	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	29	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	30	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
	33	#ifndef SKIP_CONFIG_H
	34	#ifdef HAVE_CONFIG_H
	35	#include "config.h"
	36	#endif
	37	#endif
	38
	39	#include "kiss_fft.h"
	40	#include "_kiss_fft_guts.h"
	41	#include "mdct.h"
	42	#include "stack_alloc.h"
	43
	44	void clt_mdct_forward_neon(const mdct_lookup *l,
	45	kiss_fft_scalar *in,
	46	kiss_fft_scalar * OPUS_RESTRICT out,
	47	const opus_val16 *window,
	48	int overlap, int shift, int stride, int arch)
	49	{
	50	int i;
	51	int N, N2, N4;
	52	VARDECL(kiss_fft_scalar, f);
	53	VARDECL(kiss_fft_cpx, f2);
	54	const kiss_fft_state *st = l->kfft[shift];
	55	const kiss_twiddle_scalar *trig;
	56
	57	SAVE_STACK;
	58
	59	N = l->n;
	60	trig = l->trig;
	61	for (i=0;i<shift;i++)
	62	{
	63	N >>= 1;
	64	trig += N;
	65	}
	66	N2 = N>>1;
	67	N4 = N>>2;
	68
	69	ALLOC(f, N2, kiss_fft_scalar);
	70	ALLOC(f2, N4, kiss_fft_cpx);
	71
	72	/* Consider the input to be composed of four blocks: [a, b, c, d] */
	73	/* Window, shuffle, fold */
	74	{
	75	/* Temp pointers to make it really clear to the compiler what we're doing */
	76	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
	77	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
	78	kiss_fft_scalar * OPUS_RESTRICT yp = f;
	79	const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
	80	const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
	81	for(i=0;i<((overlap+3)>>2);i++)
	82	{
	83	/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
	84	yp++ = MULT16_32_Q15(wp2, xp1[N2]) + MULT16_32_Q15(wp1,xp2);
	85	yp++ = MULT16_32_Q15(wp1, xp1) - MULT16_32_Q15(wp2, xp2[-N2]);
	86	xp1+=2;
	87	xp2-=2;
	88	wp1+=2;
	89	wp2-=2;
	90	}
	91	wp1 = window;
	92	wp2 = window+overlap-1;
	93	for(;i<N4-((overlap+3)>>2);i++)
	94	{
	95	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
	96	yp++ = xp2;
	97	yp++ = xp1;
	98	xp1+=2;
	99	xp2-=2;
	100	}
	101	for(;i<N4;i++)
	102	{
	103	/* Real part arranged as a-bR, Imag part arranged as -c-dR */
	104	yp++ = -MULT16_32_Q15(wp1, xp1[-N2]) + MULT16_32_Q15(wp2, xp2);
	105	yp++ = MULT16_32_Q15(wp2, xp1) + MULT16_32_Q15(wp1, xp2[N2]);
	106	xp1+=2;
	107	xp2-=2;
	108	wp1+=2;
	109	wp2-=2;
	110	}
	111	}
	112	/* Pre-rotation */
	113	{
	114	kiss_fft_scalar * OPUS_RESTRICT yp = f;
	115	const kiss_twiddle_scalar *t = &trig[0];
	116	for(i=0;i<N4;i++)
	117	{
	118	kiss_fft_cpx yc;
	119	kiss_twiddle_scalar t0, t1;
	120	kiss_fft_scalar re, im, yr, yi;
	121	t0 = t[i];
	122	t1 = t[N4+i];
	123	re = *yp++;
	124	im = *yp++;
	125	yr = S_MUL(re,t0) - S_MUL(im,t1);
	126	yi = S_MUL(im,t0) + S_MUL(re,t1);
	127	yc.r = yr;
	128	yc.i = yi;
	129	f2[i] = yc;
	130	}
	131	}
	132
	133	opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
	134
	135	/* Post-rotate */
	136	{
	137	/* Temp pointers to make it really clear to the compiler what we're doing */
	138	const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
	139	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
	140	kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
	141	const kiss_twiddle_scalar *t = &trig[0];
	142	/* Temp pointers to make it really clear to the compiler what we're doing */
	143	for(i=0;i<N4;i++)
	144	{
	145	kiss_fft_scalar yr, yi;
	146	yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
	147	yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
	148	*yp1 = yr;
	149	*yp2 = yi;
	150	fp++;
	151	yp1 += 2*stride;
	152	yp2 -= 2*stride;
	153	}
	154	}
	155	RESTORE_STACK;
	156	}
	157
	158	void clt_mdct_backward_neon(const mdct_lookup *l,
	159	kiss_fft_scalar *in,
	160	kiss_fft_scalar * OPUS_RESTRICT out,
	161	const opus_val16 * OPUS_RESTRICT window,
	162	int overlap, int shift, int stride, int arch)
	163	{
	164	int i;
	165	int N, N2, N4;
	166	VARDECL(kiss_fft_scalar, f);
	167	const kiss_twiddle_scalar *trig;
	168	const kiss_fft_state *st = l->kfft[shift];
	169
	170	N = l->n;
	171	trig = l->trig;
	172	for (i=0;i<shift;i++)
	173	{
	174	N >>= 1;
	175	trig += N;
	176	}
	177	N2 = N>>1;
	178	N4 = N>>2;
	179
	180	ALLOC(f, N2, kiss_fft_scalar);
	181
	182	/* Pre-rotate */
	183	{
	184	/* Temp pointers to make it really clear to the compiler what we're doing */
	185	const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
	186	const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
	187	kiss_fft_scalar * OPUS_RESTRICT yp = f;
	188	const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
	189	for(i=0;i<N4;i++)
	190	{
	191	kiss_fft_scalar yr, yi;
	192	yr = S_MUL(xp2, t[i]) + S_MUL(xp1, t[N4+i]);
	193	yi = S_MUL(xp1, t[i]) - S_MUL(xp2, t[N4+i]);
	194	yp[2*i] = yr;
	195	yp[2*i+1] = yi;
	196	xp1+=2*stride;
	197	xp2-=2*stride;
	198	}
	199	}
	200
	201	opus_ifft(st, (kiss_fft_cpx )f, (kiss_fft_cpx)(out+(overlap>>1)), arch);
	202
	203	/* Post-rotate and de-shuffle from both ends of the buffer at once to make
	204	it in-place. */
	205	{
	206	kiss_fft_scalar * yp0 = out+(overlap>>1);
	207	kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
	208	const kiss_twiddle_scalar *t = &trig[0];
	209	/* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
	210	middle pair will be computed twice. */
	211	for(i=0;i<(N4+1)>>1;i++)
	212	{
	213	kiss_fft_scalar re, im, yr, yi;
	214	kiss_twiddle_scalar t0, t1;
	215	re = yp0[0];
	216	im = yp0[1];
	217	t0 = t[i];
	218	t1 = t[N4+i];
	219	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
	220	yr = S_MUL(re,t0) + S_MUL(im,t1);
	221	yi = S_MUL(re,t1) - S_MUL(im,t0);
	222	re = yp1[0];
	223	im = yp1[1];
	224	yp0[0] = yr;
	225	yp1[1] = yi;
	226
	227	t0 = t[(N4-i-1)];
	228	t1 = t[(N2-i-1)];
	229	/* We'd scale up by 2 here, but instead it's done when mixing the windows */
	230	yr = S_MUL(re,t0) + S_MUL(im,t1);
	231	yi = S_MUL(re,t1) - S_MUL(im,t0);
	232	yp1[0] = yr;
	233	yp0[1] = yi;
	234	yp0 += 2;
	235	yp1 -= 2;
	236	}
	237	}
	238
	239	/* Mirror on both sides for TDAC */
	240	{
	241	kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
	242	kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
	243	const opus_val16 * OPUS_RESTRICT wp1 = window;
	244	const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
	245
	246	for(i = 0; i < overlap/2; i++)
	247	{
	248	kiss_fft_scalar x1, x2;
	249	x1 = *xp1;
	250	x2 = *yp1;
	251	yp1++ = MULT16_32_Q15(wp2, x2) - MULT16_32_Q15(*wp1, x1);
	252	xp1-- = MULT16_32_Q15(wp1, x2) + MULT16_32_Q15(*wp2, x1);
	253	wp1++;
	254	wp2--;
	255	}
	256	}
	257	RESTORE_STACK;
	258	}