Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
author: Nils Wallménius <nils@rockbox.org> 2014-01-19 16:31:59 +0100
committer: Nils Wallménius <nils@rockbox.org> 2014-07-13 11:12:40 +0200
commit: 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree: 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/vq.c
parent: e557951c94c1efa769900257e466900f0ffeb53b (diff)
download: rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
1 files changed, 31 insertions, 38 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index af991bb052..b047b22774 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -37,19 +37,27 @@
 #include "os_support.h"
 #include "bands.h"
 #include "rate.h"
+#include "pitch.h"
+#if defined(MIPSr1_ASM)
+#include "mips/vq_mipsr1.h"
+#endif
+#ifndef OVERRIDE_vq_exp_rotation1
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
   int i;
+   opus_val16 ms;
   celt_norm *Xptr;
   Xptr = X;
+   ms = NEG16(s);
   for (i=0;i<len-stride;i++)
   {
      celt_norm x1, x2;
      x1 = Xptr[0];
      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
   }
   Xptr = &X[len-2*stride-1];
   for (i=len-2*stride-1;i>=0;i--)
@@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
      celt_norm x1, x2;
      x1 = Xptr[0];
      x2 = Xptr[stride];
-      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
-      *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
   }
 }
+#endif /* OVERRIDE_vq_exp_rotation1 */
 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
 {
@@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
   }
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
      extract_collapse_mask().*/
-   len /= stride;
+   len = celt_udiv(len, stride);
   for (i=0;i<stride;i++)
   {
      if (dir < 0)
@@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
      return 1;
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
      exp_rotation().*/
-   N0 = N/B;
+   N0 = celt_udiv(N, B);
   collapse_mask = 0;
   i=0; do {
      int j;
+      unsigned tmp=0;
      j=0; do {
-         collapse_mask |= (iy[i*N0+j]!=0)<<i;
+         tmp |= iy[i*N0+j];
      } while (++j<N0);
+      collapse_mask |= (tmp!=0)<<i;
   } while (++i<B);
   return collapse_mask;
 }
@@ -322,47 +333,34 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
      ec_dec *dec, opus_val16 gain)
 {
-   int i;
   opus_val32 Ryy;
   unsigned collapse_mask;
-/*   VARDECL(int, iy);
+   VARDECL(int, iy);
-   SAVE_STACK; */
+   SAVE_STACK;
-   /* the difference between the last two values of eband5ms shifted by maxLM
-      which is 22 << 3 with the static mode */
-   int iy[176];
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
-/*   ALLOC(iy, N, int); */
+   ALLOC(iy, N, int);
-   decode_pulses(iy, N, K, dec);
+   Ryy = decode_pulses(iy, N, K, dec);
-   Ryy = 0;
-   i=0;
-   do {
-      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
-   } while (++i < N);
   normalise_residual(iy, X, N, Ryy, gain);
   exp_rotation(X, N, -1, B, K, spread);
   collapse_mask = extract_collapse_mask(iy, N, B);
-/*   RESTORE_STACK; */
+   RESTORE_STACK;
   return collapse_mask;
 }
+#ifndef OVERRIDE_renormalise_vector
 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
 {
   int i;
 #ifdef FIXED_POINT
   int k;
 #endif
-   opus_val32 E = EPSILON;
+   opus_val32 E;
   opus_val16 g;
   opus_val32 t;
-   celt_norm *xptr = X;
+   celt_norm *xptr;
-   for (i=0;i<N;i++)
+   E = EPSILON + celt_inner_prod(X, X, N);
-   {
-      E = MAC16_16(E, *xptr, *xptr);
-      xptr++;
-   }
 #ifdef FIXED_POINT
   k = celt_ilog2(E)>>1;
 #endif
@@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
   }
   /*return celt_sqrt(E);*/
 }
+#endif /* OVERRIDE_renormalise_vector */
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
 {
   int i;
   int itheta;
@@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
         Eside = MAC16_16(Eside, s, s);
      }
   } else {
-      for (i=0;i<N;i++)
+      Emid += celt_inner_prod(X, X, N);
-      {
+      Eside += celt_inner_prod(Y, Y, N);
-         celt_norm m, s;
-         m = X[i];
-         s = Y[i];
-         Emid = MAC16_16(Emid, m, m);
-         Eside = MAC16_16(Eside, s, s);
-      }
   }
   mid = celt_sqrt(Emid);
   side = celt_sqrt(Eside);
author	Nils Wallménius <nils@rockbox.org>	2014-01-19 16:31:59 +0100
committer	Nils Wallménius <nils@rockbox.org>	2014-07-13 11:12:40 +0200
commit	9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree	07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/vq.c
parent	e557951c94c1efa769900257e466900f0ffeb53b (diff)
download	rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip