Sync opus codec to upstream git

Sync opus codec to upstream commit 02fed471a4568852d6618e041c4f2af0d7730ee2 (August 30 2013) This brings in a lot of optimizations but also makes the diff between our codec and the upstream much smaller as most of our optimizations have been upstreamed or supeceded. Speedups across the board for CELT mode files: 64kbps 128kbps H300 9.82MHz 15.48MHz c200 4.86MHz 9.63MHz fuze v1 10.32MHz 15.92MHz For the silk mode test file (16kbps) arm targets get a speedup of about 2MHz while the H300 is 7.8MHz slower, likely because it's now using the pseudostack more rather than the real stack which is in iram. Patches to get around that are upcomming. Change-Id: Ifecf963e461c51ac42e09dac1e91bc4bc3b12fa3
author: Nils Wallménius <nils@rockbox.org> 2013-05-20 22:25:57 +0200
committer: Nils Wallménius <nils@rockbox.org> 2013-08-31 08:30:51 +0200
commit: 580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree: 807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt/pitch.c
parent: 74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download: rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz
rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip
1 files changed, 156 insertions, 34 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index 1b7efd945d..0d8be13025 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -32,7 +32,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "pitch.h"
@@ -77,7 +77,7 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
 #ifndef FIXED_POINT
         /* Considering the range of xcorr16, this should avoid both underflows
            and overflows (inf) when squaring xcorr16 */
-         xcorr16 *= 1e-12;
+         xcorr16 *= 1e-12f;
 #endif
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
@@ -102,13 +102,57 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
   }
 }
+static void celt_fir5(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         opus_val16 *mem)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=mem[0];
+   mem1=mem[1];
+   mem2=mem[2];
+   mem3=mem[3];
+   mem4=mem[4];
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   mem[0]=mem0;
+   mem[1]=mem1;
+   mem[2]=mem2;
+   mem[3]=mem3;
+   mem[4]=mem4;
+}
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C)
 {
   int i;
   opus_val32 ac[5];
   opus_val16 tmp=Q15ONE;
-   opus_val16 lpc[4], mem[4]={0,0,0,0};
+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
 #ifdef FIXED_POINT
   int shift;
   opus_val32 maxabs = celt_maxabs32(x[0], len);
@@ -161,14 +205,89 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
   }
-   celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
-   mem[0]=0;
+#if 0 /* This is a simple version of the pitch correlation that should work
-   lpc[0]=QCONST16(.8f,12);
+         well on DSPs like Blackfin and TI C5x/C6x */
-   celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i, j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, x[j],y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
 }
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i,j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+}
+#endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                  int len, int max_pitch, int *pitch)
 {
@@ -179,8 +298,8 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   VARDECL(opus_val16, y_lp4);
   VARDECL(opus_val32, xcorr);
 #ifdef FIXED_POINT
-   opus_val32 maxcorr=1;
+   opus_val32 maxcorr;
-   opus_val16 xmax, ymax;
+   opus_val32 xmax, ymax;
   int shift=0;
 #endif
   int offset;
@@ -204,7 +323,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #ifdef FIXED_POINT
   xmax = celt_maxabs16(x_lp4, len>>2);
   ymax = celt_maxabs16(y_lp4, lag>>2);
-   shift = celt_ilog2(MAX16(1, MAX16(xmax, ymax)))-11;
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
   if (shift>0)
   {
      for (j=0;j<len>>2;j++)
@@ -220,16 +339,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   /* Coarse search with 4x decimation */
-   for (i=0;i<max_pitch>>2;i++)
-   {
-      opus_val32 sum = 0;
-      for (j=0;j<len>>2;j++)
-         sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]);
-      xcorr[i] = MAX32(-1, sum);
 #ifdef FIXED_POINT
-      maxcorr = MAX32(maxcorr, sum);
+   maxcorr =
 #endif
-   }
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
 #ifdef FIXED_POINT
                   , 0, maxcorr
@@ -288,11 +402,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   int k, i, T, T0;
   opus_val16 g, g0;
   opus_val16 pg;
-   opus_val32 xy,xx,yy;
+   opus_val32 xy,xx,yy,xy2;
   opus_val32 xcorr[3];
   opus_val32 best_xy, best_yy;
   int offset;
   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
   minperiod0 = minperiod;
   maxperiod /= 2;
@@ -305,13 +421,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      *T0_=maxperiod-1;
   T = T0 = *T0_;
-   xx=xy=yy=0;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
-   for (i=0;i<N;i++)
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
   {
-      xy = MAC16_16(xy, x[i], x[i-T0]);
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
-      xx = MAC16_16(xx, x[i], x[i]);
+      yy_lookup[i] = MAX32(0, yy);
-      yy = MAC16_16(yy, x[i-T0],x[i-T0]);
   }
+   yy = yy_lookup[T0];
   best_xy = xy;
   best_yy = yy;
 #ifdef FIXED_POINT
@@ -332,6 +451,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int T1, T1b;
      opus_val16 g1;
      opus_val16 cont=0;
+      opus_val16 thresh;
      T1 = (2*T0+k)/(2*k);
      if (T1 < minperiod)
         break;
@@ -346,15 +466,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      {
         T1b = (2*second_check[k]*T0+k)/(2*k);
      }
-      xy=yy=0;
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
-      for (i=0;i<N;i++)
+      xy += xy2;
-      {
+      yy = yy_lookup[T1] + yy_lookup[T1b];
-         xy = MAC16_16(xy, x[i], x[i-T1]);
-         yy = MAC16_16(yy, x[i-T1], x[i-T1]);
-         xy = MAC16_16(xy, x[i], x[i-T1b]);
-         yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
-      }
 #ifdef FIXED_POINT
      {
         opus_val32 x2y2;
@@ -373,7 +487,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
         cont = HALF32(prev_gain);
      else
         cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
      {
         best_xy = xy;
         best_yy = yy;
@@ -407,6 +528,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   if (*T0_<minperiod0)
      *T0_=minperiod0;
+   RESTORE_STACK;
   return pg;
 }
 #endif
author	Nils Wallménius <nils@rockbox.org>	2013-05-20 22:25:57 +0200
committer	Nils Wallménius <nils@rockbox.org>	2013-08-31 08:30:51 +0200
commit	580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree	807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt/pitch.c
parent	74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download	rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip

diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c index 1b7efd945d..0d8be13025 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.c +++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -32,7 +32,7 @@
32	*/	32	*/
33		33
34	#ifdef HAVE_CONFIG_H	34	#ifdef HAVE_CONFIG_H
35	#include "opus_config.h"	35	#include "config.h"
36	#endif	36	#endif
37		37
38	#include "pitch.h"	38	#include "pitch.h"
@@ -77,7 +77,7 @@ static void find_best_pitch(opus_val32 xcorr, opus_val16 y, int len,
77	#ifndef FIXED_POINT	77	#ifndef FIXED_POINT
78	/* Considering the range of xcorr16, this should avoid both underflows	78	/* Considering the range of xcorr16, this should avoid both underflows
79	and overflows (inf) when squaring xcorr16 */	79	and overflows (inf) when squaring xcorr16 */
80	xcorr16 *= 1e-12;	80	xcorr16 *= 1e-12f;
81	#endif	81	#endif
82	num = MULT16_16_Q15(xcorr16,xcorr16);	82	num = MULT16_16_Q15(xcorr16,xcorr16);
83	if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))	83	if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
@@ -102,13 +102,57 @@ static void find_best_pitch(opus_val32 xcorr, opus_val16 y, int len,
102	}	102	}
103	}	103	}
104		104
		105	static void celt_fir5(const opus_val16 *x,
		106	const opus_val16 *num,
		107	opus_val16 *y,
		108	int N,
		109	opus_val16 *mem)
		110	{
		111	int i;
		112	opus_val16 num0, num1, num2, num3, num4;
		113	opus_val32 mem0, mem1, mem2, mem3, mem4;
		114	num0=num[0];
		115	num1=num[1];
		116	num2=num[2];
		117	num3=num[3];
		118	num4=num[4];
		119	mem0=mem[0];
		120	mem1=mem[1];
		121	mem2=mem[2];
		122	mem3=mem[3];
		123	mem4=mem[4];
		124	for (i=0;i<N;i++)
		125	{
		126	opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
		127	sum = MAC16_16(sum,num0,mem0);
		128	sum = MAC16_16(sum,num1,mem1);
		129	sum = MAC16_16(sum,num2,mem2);
		130	sum = MAC16_16(sum,num3,mem3);
		131	sum = MAC16_16(sum,num4,mem4);
		132	mem4 = mem3;
		133	mem3 = mem2;
		134	mem2 = mem1;
		135	mem1 = mem0;
		136	mem0 = x[i];
		137	y[i] = ROUND16(sum, SIG_SHIFT);
		138	}
		139	mem[0]=mem0;
		140	mem[1]=mem1;
		141	mem[2]=mem2;
		142	mem[3]=mem3;
		143	mem[4]=mem4;
		144	}
		145
		146
105	void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,	147	void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
106	int len, int C)	148	int len, int C)
107	{	149	{
108	int i;	150	int i;
109	opus_val32 ac[5];	151	opus_val32 ac[5];
110	opus_val16 tmp=Q15ONE;	152	opus_val16 tmp=Q15ONE;
111	opus_val16 lpc[4], mem[4]={0,0,0,0};	153	opus_val16 lpc[4], mem[5]={0,0,0,0,0};
		154	opus_val16 lpc2[5];
		155	opus_val16 c1 = QCONST16(.8f,15);
112	#ifdef FIXED_POINT	156	#ifdef FIXED_POINT
113	int shift;	157	int shift;
114	opus_val32 maxabs = celt_maxabs32(x[0], len);	158	opus_val32 maxabs = celt_maxabs32(x[0], len);
@@ -161,14 +205,89 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
161	tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);	205	tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
162	lpc[i] = MULT16_16_Q15(lpc[i], tmp);	206	lpc[i] = MULT16_16_Q15(lpc[i], tmp);
163	}	207	}
164	celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);	208	/* Add a zero */
		209	lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
		210	lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
		211	lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
		212	lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
		213	lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
		214	celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
		215	}
165		216
166	mem[0]=0;	217	#if 0 /* This is a simple version of the pitch correlation that should work
167	lpc[0]=QCONST16(.8f,12);	218	well on DSPs like Blackfin and TI C5x/C6x */
168	celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
169		219
		220	#ifdef FIXED_POINT
		221	opus_val32
		222	#else
		223	void
		224	#endif
		225	celt_pitch_xcorr(opus_val16 x, opus_val16 y, opus_val32 *xcorr, int len, int max_pitch)
		226	{
		227	int i, j;
		228	#ifdef FIXED_POINT
		229	opus_val32 maxcorr=1;
		230	#endif
		231	for (i=0;i<max_pitch;i++)
		232	{
		233	opus_val32 sum = 0;
		234	for (j=0;j<len;j++)
		235	sum = MAC16_16(sum, x[j],y[i+j]);
		236	xcorr[i] = sum;
		237	#ifdef FIXED_POINT
		238	maxcorr = MAX32(maxcorr, sum);
		239	#endif
		240	}
		241	#ifdef FIXED_POINT
		242	return maxcorr;
		243	#endif
170	}	244	}
171		245
		246	#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
		247
		248	#ifdef FIXED_POINT
		249	opus_val32
		250	#else
		251	void
		252	#endif
		253	celt_pitch_xcorr(const opus_val16 _x, const opus_val16 _y, opus_val32 *xcorr, int len, int max_pitch)
		254	{
		255	int i,j;
		256	#ifdef FIXED_POINT
		257	opus_val32 maxcorr=1;
		258	#endif
		259	for (i=0;i<max_pitch-3;i+=4)
		260	{
		261	opus_val32 sum[4]={0,0,0,0};
		262	xcorr_kernel(_x, _y+i, sum, len);
		263	xcorr[i]=sum[0];
		264	xcorr[i+1]=sum[1];
		265	xcorr[i+2]=sum[2];
		266	xcorr[i+3]=sum[3];
		267	#ifdef FIXED_POINT
		268	sum[0] = MAX32(sum[0], sum[1]);
		269	sum[2] = MAX32(sum[2], sum[3]);
		270	sum[0] = MAX32(sum[0], sum[2]);
		271	maxcorr = MAX32(maxcorr, sum[0]);
		272	#endif
		273	}
		274	/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
		275	for (;i<max_pitch;i++)
		276	{
		277	opus_val32 sum = 0;
		278	for (j=0;j<len;j++)
		279	sum = MAC16_16(sum, _x[j],_y[i+j]);
		280	xcorr[i] = sum;
		281	#ifdef FIXED_POINT
		282	maxcorr = MAX32(maxcorr, sum);
		283	#endif
		284	}
		285	#ifdef FIXED_POINT
		286	return maxcorr;
		287	#endif
		288	}
		289
		290	#endif
172	void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,	291	void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
173	int len, int max_pitch, int *pitch)	292	int len, int max_pitch, int *pitch)
174	{	293	{
@@ -179,8 +298,8 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
179	VARDECL(opus_val16, y_lp4);	298	VARDECL(opus_val16, y_lp4);
180	VARDECL(opus_val32, xcorr);	299	VARDECL(opus_val32, xcorr);
181	#ifdef FIXED_POINT	300	#ifdef FIXED_POINT
182	opus_val32 maxcorr=1;	301	opus_val32 maxcorr;
183	opus_val16 xmax, ymax;	302	opus_val32 xmax, ymax;
184	int shift=0;	303	int shift=0;
185	#endif	304	#endif
186	int offset;	305	int offset;
@@ -204,7 +323,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
204	#ifdef FIXED_POINT	323	#ifdef FIXED_POINT
205	xmax = celt_maxabs16(x_lp4, len>>2);	324	xmax = celt_maxabs16(x_lp4, len>>2);
206	ymax = celt_maxabs16(y_lp4, lag>>2);	325	ymax = celt_maxabs16(y_lp4, lag>>2);
207	shift = celt_ilog2(MAX16(1, MAX16(xmax, ymax)))-11;	326	shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
208	if (shift>0)	327	if (shift>0)
209	{	328	{
210	for (j=0;j<len>>2;j++)	329	for (j=0;j<len>>2;j++)
@@ -220,16 +339,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
220		339
221	/* Coarse search with 4x decimation */	340	/* Coarse search with 4x decimation */
222		341
223	for (i=0;i<max_pitch>>2;i++)
224	{
225	opus_val32 sum = 0;
226	for (j=0;j<len>>2;j++)
227	sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]);
228	xcorr[i] = MAX32(-1, sum);
229	#ifdef FIXED_POINT	342	#ifdef FIXED_POINT
230	maxcorr = MAX32(maxcorr, sum);	343	maxcorr =
231	#endif	344	#endif
232	}	345	celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
		346
233	find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch	347	find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
234	#ifdef FIXED_POINT	348	#ifdef FIXED_POINT
235	, 0, maxcorr	349	, 0, maxcorr
@@ -288,11 +402,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
288	int k, i, T, T0;	402	int k, i, T, T0;
289	opus_val16 g, g0;	403	opus_val16 g, g0;
290	opus_val16 pg;	404	opus_val16 pg;
291	opus_val32 xy,xx,yy;	405	opus_val32 xy,xx,yy,xy2;
292	opus_val32 xcorr[3];	406	opus_val32 xcorr[3];
293	opus_val32 best_xy, best_yy;	407	opus_val32 best_xy, best_yy;
294	int offset;	408	int offset;
295	int minperiod0;	409	int minperiod0;
		410	VARDECL(opus_val32, yy_lookup);
		411	SAVE_STACK;
296		412
297	minperiod0 = minperiod;	413	minperiod0 = minperiod;
298	maxperiod /= 2;	414	maxperiod /= 2;
@@ -305,13 +421,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
305	*T0_=maxperiod-1;	421	*T0_=maxperiod-1;
306		422
307	T = T0 = *T0_;	423	T = T0 = *T0_;
308	xx=xy=yy=0;	424	ALLOC(yy_lookup, maxperiod+1, opus_val32);
309	for (i=0;i<N;i++)	425	dual_inner_prod(x, x, x-T0, N, &xx, &xy);
		426	yy_lookup[0] = xx;
		427	yy=xx;
		428	for (i=1;i<=maxperiod;i++)
310	{	429	{
311	xy = MAC16_16(xy, x[i], x[i-T0]);	430	yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
312	xx = MAC16_16(xx, x[i], x[i]);	431	yy_lookup[i] = MAX32(0, yy);
313	yy = MAC16_16(yy, x[i-T0],x[i-T0]);
314	}	432	}
		433	yy = yy_lookup[T0];
315	best_xy = xy;	434	best_xy = xy;
316	best_yy = yy;	435	best_yy = yy;
317	#ifdef FIXED_POINT	436	#ifdef FIXED_POINT
@@ -332,6 +451,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
332	int T1, T1b;	451	int T1, T1b;
333	opus_val16 g1;	452	opus_val16 g1;
334	opus_val16 cont=0;	453	opus_val16 cont=0;
		454	opus_val16 thresh;
335	T1 = (2T0+k)/(2k);	455	T1 = (2T0+k)/(2k);
336	if (T1 < minperiod)	456	if (T1 < minperiod)
337	break;	457	break;
@@ -346,15 +466,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
346	{	466	{
347	T1b = (2second_check[k]T0+k)/(2*k);	467	T1b = (2second_check[k]T0+k)/(2*k);
348	}	468	}
349	xy=yy=0;	469	dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
350	for (i=0;i<N;i++)	470	xy += xy2;
351	{	471	yy = yy_lookup[T1] + yy_lookup[T1b];
352	xy = MAC16_16(xy, x[i], x[i-T1]);
353	yy = MAC16_16(yy, x[i-T1], x[i-T1]);
354
355	xy = MAC16_16(xy, x[i], x[i-T1b]);
356	yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
357	}
358	#ifdef FIXED_POINT	472	#ifdef FIXED_POINT
359	{	473	{
360	opus_val32 x2y2;	474	opus_val32 x2y2;
@@ -373,7 +487,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
373	cont = HALF32(prev_gain);	487	cont = HALF32(prev_gain);
374	else	488	else
375	cont = 0;	489	cont = 0;
376	if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)	490	thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
		491	/* Bias against very high pitch (very short period) to avoid false-positives
		492	due to short-term correlation */
		493	if (T1<3*minperiod)
		494	thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
		495	else if (T1<2*minperiod)
		496	thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
		497	if (g1 > thresh)
377	{	498	{
378	best_xy = xy;	499	best_xy = xy;
379	best_yy = yy;	500	best_yy = yy;
@@ -407,6 +528,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
407		528
408	if (*T0_<minperiod0)	529	if (*T0_<minperiod0)
409	*T0_=minperiod0;	530	*T0_=minperiod0;
		531	RESTORE_STACK;
410	return pg;	532	return pg;
411	}	533	}
412	#endif	534	#endif