From 9b7ec42403073ee887efc531c153e6b1b6c15bab Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils@rockbox.org>
Date: Sun, 19 Jan 2014 16:31:59 +0100
Subject: Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c

This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.

This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.

Speeds up decoding of the following test files:

                 H300 (cf)   C200 (arm7tdmi)  ipod classic (arm9e)
16 kbps (silk)   14.28 MHz   4.00 MHz         2.61 MHz
64 kbps (celt)   4.09 MHz    8.08 MHz         6.24 MHz
128 kbps (celt)  1.93 MHz    8.83 MHz         6.53 MHz

Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
---
 lib/rbcodec/codecs/libopus/celt/pitch.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

(limited to 'lib/rbcodec/codecs/libopus/celt/pitch.c')
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index c28857297a..ee56a434f0 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -252,15 +252,15 @@ void
 #endif
 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
 {
-   int i,j;
+   int i;
    /*The EDSP version requires that max_pitch is at least 1, and that _x is
       32-bit aligned.
      Since it's hard to put asserts in assembly, put them here.*/
-   celt_assert(max_pitch>0);
-   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
 #endif
+   celt_assert(max_pitch>0);
+   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
@@ -279,9 +279,8 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
    /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
    for (;i<max_pitch;i++)
    {
-      opus_val32 sum = 0;
-      for (j=0;j<len;j++)
-         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      opus_val32 sum;
+      sum = celt_inner_prod(_x, _y+i, len);
       xcorr[i] = sum;
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -361,12 +360,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #endif
    for (i=0;i<max_pitch>>1;i++)
    {
-      opus_val32 sum=0;
+      opus_val32 sum;
       xcorr[i] = 0;
       if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
          continue;
+#ifdef FIXED_POINT
+      sum = 0;
       for (j=0;j<len>>1;j++)
          sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
+#else
+      sum = celt_inner_prod(x_lp, y+i, len>>1);
+#endif
       xcorr[i] = MAX32(-1, sum);
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       opus_val16 g1;
       opus_val16 cont=0;
       opus_val16 thresh;
-      T1 = (2*T0+k)/(2*k);
+      T1 = celt_udiv(2*T0+k, 2*k);
       if (T1 < minperiod)
          break;
       /* Look for another strong correlation at T1b */
@@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
             T1b = T0+T1;
       } else
       {
-         T1b = (2*second_check[k]*T0+k)/(2*k);
+         T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
       }
       dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
       xy += xy2;
@@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       pg = SHR32(frac_div32(best_xy,best_yy+1),16);
 
    for (k=0;k<3;k++)
-   {
-      int T1 = T+k-1;
-      xy = 0;
-      for (i=0;i<N;i++)
-         xy = MAC16_16(xy, x[i], x[i-T1]);
-      xcorr[k] = xy;
-   }
+      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);
    if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
       offset = 1;
    else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
-- 
cgit v1.2.3