Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
author: Nils Wallménius <nils@rockbox.org> 2014-01-19 16:31:59 +0100
committer: Nils Wallménius <nils@rockbox.org> 2014-07-13 11:12:40 +0200
commit: 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree: 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/bands.c
parent: e557951c94c1efa769900257e466900f0ffeb53b (diff)
download: rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
1 files changed, 111 insertions, 104 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c
index 1ad786d795..caa70163b4 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/bands.c
@@ -93,11 +93,11 @@ static int bitexact_log2tan(int isin,int icos)
 #if 0
 #ifdef FIXED_POINT
 /* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
 {
   int i, c, N;
   const opus_int16 *eBands = m->eBands;
-   N = M*m->shortMdctSize;
+   N = m->shortMdctSize<<LM;
   c=0; do {
      for (i=0;i<end;i++)
      {
@@ -105,18 +105,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
         opus_val32 maxval=0;
         opus_val32 sum = 0;
-         j=M*eBands[i]; do {
+         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
-            maxval = MAX32(maxval, X[j+c*N]);
-            maxval = MAX32(maxval, -X[j+c*N]);
-         } while (++j<M*eBands[i+1]);
         if (maxval > 0)
         {
-            int shift = celt_ilog2(maxval)-10;
+            int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
-            j=M*eBands[i]; do {
+            j=eBands[i]<<LM;
-               sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
+            if (shift>0)
-                                   EXTRACT16(VSHR32(X[j+c*N],shift)));
+            {
-            } while (++j<M*eBands[i+1]);
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
+                        EXTRACT16(SHR32(X[j+c*N],shift)));
+               } while (++j<eBands[i+1]<<LM);
+            } else {
+               do {
+                  sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
+                        EXTRACT16(SHL32(X[j+c*N],-shift)));
+               } while (++j<eBands[i+1]<<LM);
+            }
            /* We're adding one here to ensure the normalized band isn't larger than unity norm */
            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
         } else {
@@ -151,18 +156,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 #else /* FIXED_POINT */
 /* Compute the amplitude (sqrt energy) in each of the bands */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M)
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
 {
   int i, c, N;
   const opus_int16 *eBands = m->eBands;
-   N = M*m->shortMdctSize;
+   N = m->shortMdctSize<<LM;
   c=0; do {
      for (i=0;i<end;i++)
      {
-         int j;
+         opus_val32 sum;
-         opus_val32 sum = 1e-27f;
+         sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
-         for (j=M*eBands[i];j<M*eBands[i+1];j++)
-            sum += X[j+c*N]*X[j+c*N];
         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
      }
@@ -192,74 +195,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
-      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
+      int end, int M, int downsample, int silence)
 {
-   int i, c, N;
+   int i, N;
+   int bound;
+   celt_sig * OPUS_RESTRICT f;
+   const celt_norm * OPUS_RESTRICT x;
   const opus_int16 *eBands = m->eBands;
   N = M*m->shortMdctSize;
-   celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels");
+   bound = M*eBands[end];
-   c=0; do {
+   if (downsample!=1)
-      celt_sig * OPUS_RESTRICT f;
+      bound = IMIN(bound, N/downsample);
-      const celt_norm * OPUS_RESTRICT x;
+   if (silence)
-      f = freq+c*N;
+   {
-      x = X+c*N+M*eBands[start];
+      bound = 0;
-      for (i=0;i<M*eBands[start];i++)
+      start = end = 0;
-         *f++ = 0;
+   }
-      for (i=start;i<end;i++)
+   f = freq;
-      {
+   x = X+M*eBands[start];
-         int j, band_end;
+   for (i=0;i<M*eBands[start];i++)
-         opus_val16 g;
+      *f++ = 0;
-         opus_val16 lg;
+   for (i=start;i<end;i++)
+   {
+      int j, band_end;
+      opus_val16 g;
+      opus_val16 lg;
 #ifdef FIXED_POINT
-         int shift;
+      int shift;
 #endif
-         j=M*eBands[i];
+      j=M*eBands[i];
-         band_end = M*eBands[i+1];
+      band_end = M*eBands[i+1];
-         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+      lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
 #ifndef FIXED_POINT
-         g = celt_exp2(lg);
+      g = celt_exp2(lg);
 #else
-         /* Handle the integer part of the log energy */
+      /* Handle the integer part of the log energy */
-         shift = 16-(lg>>DB_SHIFT);
+      shift = 16-(lg>>DB_SHIFT);
-         if (shift>31)
+      if (shift>31)
-         {
+      {
-            shift=0;
+         shift=0;
-            g=0;
+         g=0;
-         } else {
+      } else {
-            /* Handle the fractional part. */
+         /* Handle the fractional part. */
-            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+         g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
-         }
+      }
-         /* Handle extreme gains with negative shift. */
+      /* Handle extreme gains with negative shift. */
-         if (shift<0)
+      if (shift<0)
-         {
+      {
-            /* For shift < -2 we'd be likely to overflow, so we're capping
+         /* For shift < -2 we'd be likely to overflow, so we're capping
               the gain here. This shouldn't happen unless the bitstream is
               already corrupted. */
-            if (shift < -2)
+         if (shift < -2)
-            {
+         {
-               g = 32767;
+            g = 32767;
-               shift = -2;
+            shift = -2;
-            }
+         }
-            do {
+         do {
-               *f++ = SHL32(MULT16_16(*x++, g), -shift);
+            *f++ = SHL32(MULT16_16(*x++, g), -shift);
-            } while (++j<band_end);
+         } while (++j<band_end);
-         } else
+      } else
 #endif
         /* Be careful of the fixed-point "else" just above when changing this code */
         do {
            *f++ = SHR32(MULT16_16(*x++, g), shift);
         } while (++j<band_end);
-      }
+   }
-      celt_assert(start <= end);
+   celt_assert(start <= end);
-      for (i=M*eBands[end];i<N;i++)
+   OPUS_CLEAR(&freq[bound], N-bound);
-         *f++ = 0;
-   } while (++c<C);
 }
 /* This prevents energy collapse for transients with multiple short MDCTs */
 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
-      int start, int end, opus_val16 *logE, opus_val16 *prev1logE,
+      int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
-      opus_val16 *prev2logE, int *pulses, opus_uint32 seed)
+      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed)
 {
   int c, i, j, k;
   for (i=start;i<end;i++)
@@ -274,7 +283,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
      N0 = m->eBands[i+1]-m->eBands[i];
      /* depth in 1/8 bits */
-      depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM);
+      celt_assert(pulses[i]>=0);
+      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
 #ifdef FIXED_POINT
      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
@@ -352,7 +362,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
   }
 }
-static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N)
+static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
 {
   int i = bandID;
   int j;
@@ -372,25 +382,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons
      celt_norm r, l;
      l = X[j];
      r = Y[j];
-      X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
+      X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
      /* Side is not encoded, no need to calculate */
   }
 }
-static void stereo_split(celt_norm *X, celt_norm *Y, int N)
+static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
 {
   int j;
   for (j=0;j<N;j++)
   {
-      celt_norm r, l;
+      opus_val32 r, l;
-      l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]);
+      l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
-      r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]);
+      r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
-      X[j] = l+r;
+      X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
-      Y[j] = r-l;
+      Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
   }
 }
-static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
+static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N)
 {
   int j;
   opus_val32 xp=0, side=0;
@@ -411,8 +421,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
   Er = MULT16_16(mid2, mid2) + side + 2*xp;
   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
   {
-      for (j=0;j<N;j++)
+      OPUS_COPY(Y, X, N);
-         Y[j] = X[j];
      return;
   }
@@ -436,7 +445,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
   {
      celt_norm r, l;
      /* Apply mid scaling (side is already scaled) */
-      l = MULT16_16_Q15(mid, X[j]);
+      l = MULT16_16_P15(mid, X[j]);
      r = Y[j];
      X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
      Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
@@ -445,7 +454,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
 #if 0
 /* Decide whether we should spread the pulses in the current frame */
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
      int end, int C, int M)
 {
@@ -466,7 +475,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
      {
         int j, N, tmp=0;
         int tcount[3] = {0,0,0};
-         celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
+         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
         N = M*(eBands[i+1]-eBands[i]);
         if (N<=8)
            continue;
@@ -486,7 +495,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
         /* Only include four last bands (8 kHz and up) */
         if (i>m->nbEBands-4)
-            hf_sum += 32*(tcount[1]+tcount[0])/N;
+            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
         sum += tmp*256;
         nbBands++;
@@ -496,7 +505,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
   if (update_hf)
   {
      if (hf_sum)
-         hf_sum /= C*(4-m->nbEBands+end);
+         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
      *hf_average = (*hf_average+hf_sum)>>1;
      hf_sum = *hf_average;
      if (*tapset_decision==2)
@@ -512,7 +521,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
   }
   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
   celt_assert(nbBands>0); /* end has to be non-zero */
-   sum /= nbBands;
+   celt_assert(sum>=0);
+   sum = celt_udiv(sum, nbBands);
   /* Recursive averaging */
   sum = (sum+*average)>>1;
   *average = sum;
@@ -571,8 +581,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard
         for (j=0;j<N0;j++)
            tmp[i*N0+j] = X[j*stride+i];
   }
-   for (j=0;j<N;j++)
+   OPUS_COPY(X, tmp, N);
-      X[j] = tmp[j];
   RESTORE_STACK;
 }
@@ -595,8 +604,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
         for (j=0;j<N0;j++)
            tmp[j*stride+i] = X[i*N0+j];
   }
-   for (j=0;j<N;j++)
+   OPUS_COPY(X, tmp, N);
-      X[j] = tmp[j];
   RESTORE_STACK;
 }
@@ -607,11 +615,11 @@ void haar1(celt_norm *X, int N0, int stride)
   for (i=0;i<stride;i++)
      for (j=0;j<N0;j++)
      {
-         celt_norm tmp1, tmp2;
+         opus_val32 tmp1, tmp2;
-         tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]);
+         tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
-         tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
+         tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
-         X[stride*2*j+i] = tmp1 + tmp2;
+         X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
-         X[stride*(2*j+1)+i] = tmp1 - tmp2;
+         X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
      }
 }
@@ -626,7 +634,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
       always have enough bits left over to code at least one pulse in the
       side; otherwise it would collapse, since it doesn't get folded. */
-   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
+   qb = celt_sudiv(b+N2*offset, N2);
+   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
   qb = IMIN(8<<BITRES, qb);
@@ -773,7 +782,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
            ec_dec_update(ec, fl, fl+fs, ft);
         }
      }
-      itheta = (opus_int32)itheta*16384/qn;
+      celt_assert(itheta>=0);
+      itheta = celt_udiv((opus_int32)itheta*16384, qn);
      if (encode && stereo)
      {
         if (itheta==0)
@@ -1025,8 +1035,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
            fill &= cm_mask;
            if (!fill)
            {
-               for (j=0;j<N;j++)
+               OPUS_CLEAR(X, N);
-                  X[j] = 0;
            } else {
               if (lowband == NULL)
               {
@@ -1088,7 +1097,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
   longBlocks = B0==1;
-   N_B /= B;
+   N_B = celt_udiv(N_B, B);
   /* Special case for one sample */
   if (N==1)
@@ -1102,9 +1111,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
   {
-      int j;
+      OPUS_COPY(lowband_scratch, lowband, N);
-      for (j=0;j<N;j++)
-         lowband_scratch[j] = lowband[j];
      lowband = lowband_scratch;
   }
@@ -1432,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      ctx.remaining_bits = remaining_bits;
      if (i <= codedBands-1)
      {
-         curr_balance = balance / IMIN(3, codedBands-i);
+         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
      } else {
         b = 0;
author	Nils Wallménius <nils@rockbox.org>	2014-01-19 16:31:59 +0100
committer	Nils Wallménius <nils@rockbox.org>	2014-07-13 11:12:40 +0200
commit	9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree	07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/bands.c
parent	e557951c94c1efa769900257e466900f0ffeb53b (diff)
download	rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip