1 files changed, 158 insertions, 38 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
index 66aed1de09..7ffe90a357 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt_lpc.h"
 #include "stack_alloc.h"
 #include "mathops.h"
+#include "pitch.h"
 void _celt_lpc(
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
@@ -87,42 +88,71 @@ int          p
 #endif
 }
-void celt_fir(const opus_val16 *x,
+void celt_fir(const opus_val16 *_x,
         const opus_val16 *num,
-         opus_val16 *y,
+         opus_val16 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
      for (j=0;j<ord;j++)
      {
-         sum += MULT16_16(num[j],mem[j]);
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
-      }
-      for (j=ord-1;j>=1;j--)
-      {
-         mem[j]=mem[j-1];
      }
-      mem[0] = x[i];
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
-      y[i] = ROUND16(sum, SIG_SHIFT);
   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
 }
-void celt_iir(const opus_val32 *x,
+void celt_iir(const opus_val32 *_x,
         const opus_val16 *den,
-         opus_val32 *y,
+         opus_val32 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
+#ifdef SMALL_FOOTPRINT
   int i,j;
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = x[i];
+      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
      {
         sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
         mem[j]=mem[j-1];
      }
      mem[0] = ROUND16(sum,SIG_SHIFT);
-      y[i] = sum;
+      _y[i] = sum;
   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
 }
-void _celt_autocorr(
+int _celt_autocorr(
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
                   const opus_val16       *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
                  )
 {
   opus_val32 d;
-   int i;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
   VARDECL(opus_val16, xx);
   SAVE_STACK;
   ALLOC(xx, n, opus_val16);
   celt_assert(n>0);
   celt_assert(overlap>=0);
-   for (i=0;i<n;i++)
+   if (overlap == 0)
-      xx[i] = x[i];
-   for (i=0;i<overlap;i++)
   {
-      xx[i] = MULT16_16_Q15(x[i],window[i]);
+      xptr = x;
-      xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
   }
+   shift=0;
 #ifdef FIXED_POINT
   {
-      opus_val32 ac0=0;
+      opus_val32 ac0;
-      int shift;
+      ac0 = 1+(n<<7);
-      for(i=0;i<n;i++)
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
-         ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
+      for(i=(n&1);i<n;i+=2)
-      ac0 += 1+n;
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
      shift = celt_ilog2(ac0)-30+10;
-      shift = (shift+1)/2;
+      shift = (shift)/2;
-      for(i=0;i<n;i++)
+      if (shift>0)
-         xx[i] = VSHR32(xx[i], shift);
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
   }
 #endif
-   while (lag>=0)
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   for (k=0;k<=lag;k++)
   {
-      for (i = lag, d = 0; i < n; i++)
+      for (i = k+fastN, d = 0; i < n; i++)
-         d += xx[i] * xx[i-lag];
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
-      ac[lag] = d;
+      ac[k] += d;
-      /*printf ("%f ", ac[lag]);*/
-      lag--;
   }
-   /*printf ("\n");*/
+#ifdef FIXED_POINT
-   ac[0] += 10;
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
   RESTORE_STACK;
+   return shift;
 }

diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c index 66aed1de09..7ffe90a357 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c +++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
26	*/	26	*/
27		27
28	#ifdef HAVE_CONFIG_H	28	#ifdef HAVE_CONFIG_H
29	#include "opus_config.h"	29	#include "config.h"
30	#endif	30	#endif
31		31
32	#include "celt_lpc.h"	32	#include "celt_lpc.h"
33	#include "stack_alloc.h"	33	#include "stack_alloc.h"
34	#include "mathops.h"	34	#include "mathops.h"
		35	#include "pitch.h"
35		36
36	void _celt_lpc(	37	void _celt_lpc(
37	opus_val16 _lpc, / out: [0...p-1] LPC coefficients */	38	opus_val16 _lpc, / out: [0...p-1] LPC coefficients */
@@ -87,42 +88,71 @@ int p
87	#endif	88	#endif
88	}	89	}
89		90
90	void celt_fir(const opus_val16 *x,	91	void celt_fir(const opus_val16 *_x,
91	const opus_val16 *num,	92	const opus_val16 *num,
92	opus_val16 *y,	93	opus_val16 *_y,
93	int N,	94	int N,
94	int ord,	95	int ord,
95	opus_val16 *mem)	96	opus_val16 *mem)
96	{	97	{
97	int i,j;	98	int i,j;
		99	VARDECL(opus_val16, rnum);
		100	VARDECL(opus_val16, x);
		101	SAVE_STACK;
98		102
		103	ALLOC(rnum, ord, opus_val16);
		104	ALLOC(x, N+ord, opus_val16);
		105	for(i=0;i<ord;i++)
		106	rnum[i] = num[ord-i-1];
		107	for(i=0;i<ord;i++)
		108	x[i] = mem[ord-i-1];
		109	for (i=0;i<N;i++)
		110	x[i+ord]=_x[i];
		111	for(i=0;i<ord;i++)
		112	mem[i] = _x[N-i-1];
		113	#ifdef SMALL_FOOTPRINT
99	for (i=0;i<N;i++)	114	for (i=0;i<N;i++)
100	{	115	{
101	opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);	116	opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
102	for (j=0;j<ord;j++)	117	for (j=0;j<ord;j++)
103	{	118	{
104	sum += MULT16_16(num[j],mem[j]);	119	sum = MAC16_16(sum,rnum[j],x[i+j]);
105	}
106	for (j=ord-1;j>=1;j--)
107	{
108	mem[j]=mem[j-1];
109	}	120	}
110	mem[0] = x[i];	121	_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
111	y[i] = ROUND16(sum, SIG_SHIFT);
112	}	122	}
		123	#else
		124	for (i=0;i<N-3;i+=4)
		125	{
		126	opus_val32 sum[4]={0,0,0,0};
		127	xcorr_kernel(rnum, x+i, sum, ord);
		128	_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
		129	_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
		130	_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
		131	_y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
		132	}
		133	for (;i<N;i++)
		134	{
		135	opus_val32 sum = 0;
		136	for (j=0;j<ord;j++)
		137	sum = MAC16_16(sum,rnum[j],x[i+j]);
		138	_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
		139	}
		140	#endif
		141	RESTORE_STACK;
113	}	142	}
114		143
115	void celt_iir(const opus_val32 *x,	144	void celt_iir(const opus_val32 *_x,
116	const opus_val16 *den,	145	const opus_val16 *den,
117	opus_val32 *y,	146	opus_val32 *_y,
118	int N,	147	int N,
119	int ord,	148	int ord,
120	opus_val16 *mem)	149	opus_val16 *mem)
121	{	150	{
		151	#ifdef SMALL_FOOTPRINT
122	int i,j;	152	int i,j;
123	for (i=0;i<N;i++)	153	for (i=0;i<N;i++)
124	{	154	{
125	opus_val32 sum = x[i];	155	opus_val32 sum = _x[i];
126	for (j=0;j<ord;j++)	156	for (j=0;j<ord;j++)
127	{	157	{
128	sum -= MULT16_16(den[j],mem[j]);	158	sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
132	mem[j]=mem[j-1];	162	mem[j]=mem[j-1];
133	}	163	}
134	mem[0] = ROUND16(sum,SIG_SHIFT);	164	mem[0] = ROUND16(sum,SIG_SHIFT);
135	y[i] = sum;	165	_y[i] = sum;
136	}	166	}
		167	#else
		168	int i,j;
		169	VARDECL(opus_val16, rden);
		170	VARDECL(opus_val16, y);
		171	SAVE_STACK;
		172
		173	celt_assert((ord&3)==0);
		174	ALLOC(rden, ord, opus_val16);
		175	ALLOC(y, N+ord, opus_val16);
		176	for(i=0;i<ord;i++)
		177	rden[i] = den[ord-i-1];
		178	for(i=0;i<ord;i++)
		179	y[i] = -mem[ord-i-1];
		180	for(;i<N+ord;i++)
		181	y[i]=0;
		182	for (i=0;i<N-3;i+=4)
		183	{
		184	/* Unroll by 4 as if it were an FIR filter */
		185	opus_val32 sum[4];
		186	sum[0]=_x[i];
		187	sum[1]=_x[i+1];
		188	sum[2]=_x[i+2];
		189	sum[3]=_x[i+3];
		190	xcorr_kernel(rden, y+i, sum, ord);
		191
		192	/* Patch up the result to compensate for the fact that this is an IIR */
		193	y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
		194	_y[i ] = sum[0];
		195	sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
		196	y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
		197	_y[i+1] = sum[1];
		198	sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
		199	sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
		200	y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
		201	_y[i+2] = sum[2];
		202
		203	sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
		204	sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
		205	sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
		206	y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
		207	_y[i+3] = sum[3];
		208	}
		209	for (;i<N;i++)
		210	{
		211	opus_val32 sum = _x[i];
		212	for (j=0;j<ord;j++)
		213	sum -= MULT16_16(rden[j],y[i+j]);
		214	y[i+ord] = ROUND16(sum,SIG_SHIFT);
		215	_y[i] = sum;
		216	}
		217	for(i=0;i<ord;i++)
		218	mem[i] = _y[N-i-1];
		219	RESTORE_STACK;
		220	#endif
137	}	221	}
138		222
139	void _celt_autocorr(	223	int _celt_autocorr(
140	const opus_val16 x, / in: [0...n-1] samples x */	224	const opus_val16 x, / in: [0...n-1] samples x */
141	opus_val32 ac, / out: [0...lag-1] ac values */	225	opus_val32 ac, / out: [0...lag-1] ac values */
142	const opus_val16 *window,	226	const opus_val16 *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
146	)	230	)
147	{	231	{
148	opus_val32 d;	232	opus_val32 d;
149	int i;	233	int i, k;
		234	int fastN=n-lag;
		235	int shift;
		236	const opus_val16 *xptr;
150	VARDECL(opus_val16, xx);	237	VARDECL(opus_val16, xx);
151	SAVE_STACK;	238	SAVE_STACK;
152	ALLOC(xx, n, opus_val16);	239	ALLOC(xx, n, opus_val16);
153	celt_assert(n>0);	240	celt_assert(n>0);
154	celt_assert(overlap>=0);	241	celt_assert(overlap>=0);
155	for (i=0;i<n;i++)	242	if (overlap == 0)
156	xx[i] = x[i];
157	for (i=0;i<overlap;i++)
158	{	243	{
159	xx[i] = MULT16_16_Q15(x[i],window[i]);	244	xptr = x;
160	xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);	245	} else {
		246	for (i=0;i<n;i++)
		247	xx[i] = x[i];
		248	for (i=0;i<overlap;i++)
		249	{
		250	xx[i] = MULT16_16_Q15(x[i],window[i]);
		251	xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
		252	}
		253	xptr = xx;
161	}	254	}
		255	shift=0;
162	#ifdef FIXED_POINT	256	#ifdef FIXED_POINT
163	{	257	{
164	opus_val32 ac0=0;	258	opus_val32 ac0;
165	int shift;	259	ac0 = 1+(n<<7);
166	for(i=0;i<n;i++)	260	if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
167	ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);	261	for(i=(n&1);i<n;i+=2)
168	ac0 += 1+n;	262	{
		263	ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
		264	ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
		265	}
169		266
170	shift = celt_ilog2(ac0)-30+10;	267	shift = celt_ilog2(ac0)-30+10;
171	shift = (shift+1)/2;	268	shift = (shift)/2;
172	for(i=0;i<n;i++)	269	if (shift>0)
173	xx[i] = VSHR32(xx[i], shift);	270	{
		271	for(i=0;i<n;i++)
		272	xx[i] = PSHR32(xptr[i], shift);
		273	xptr = xx;
		274	} else
		275	shift = 0;
174	}	276	}
175	#endif	277	#endif
176	while (lag>=0)	278	celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
		279	for (k=0;k<=lag;k++)
177	{	280	{
178	for (i = lag, d = 0; i < n; i++)	281	for (i = k+fastN, d = 0; i < n; i++)
179	d += xx[i] * xx[i-lag];	282	d = MAC16_16(d, xptr[i], xptr[i-k]);
180	ac[lag] = d;	283	ac[k] += d;
181	/printf ("%f ", ac[lag]);/
182	lag--;
183	}	284	}
184	/printf ("\n");/	285	#ifdef FIXED_POINT
185	ac[0] += 10;	286	shift = 2*shift;
		287	if (shift<=0)
		288	ac[0] += SHL32((opus_int32)1, -shift);
		289	if (ac[0] < 268435456)
		290	{
		291	int shift2 = 29 - EC_ILOG(ac[0]);
		292	for (i=0;i<=lag;i++)
		293	ac[i] = SHL32(ac[i], shift2);
		294	shift -= shift2;
		295	} else if (ac[0] >= 536870912)
		296	{
		297	int shift2=1;
		298	if (ac[0] >= 1073741824)
		299	shift2++;
		300	for (i=0;i<=lag;i++)
		301	ac[i] = SHR32(ac[i], shift2);
		302	shift += shift2;
		303	}
		304	#endif
186		305
187	RESTORE_STACK;	306	RESTORE_STACK;
		307	return shift;
188	}	308	}