Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
author: Nils Wallménius <nils@rockbox.org> 2014-01-19 16:31:59 +0100
committer: Nils Wallménius <nils@rockbox.org> 2014-07-13 11:12:40 +0200
commit: 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree: 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/kiss_fft.c
parent: e557951c94c1efa769900257e466900f0ffeb53b (diff)
download: rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
1 files changed, 173 insertions, 309 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index e2b8f3b3da..833ef5a71f 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -45,73 +45,62 @@
   complex numbers.  It also delares the kf_ internal functions.
 */
-#if 0
 static void kf_bfly2(
                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
                     int m,
-                     int N,
+                     int N
-                     int mm
                    )
 {
   kiss_fft_cpx * Fout2;
-   const kiss_twiddle_cpx * tw1;
+   int i;
-   int i,j;
+   (void)m;
-   kiss_fft_cpx * Fout_beg = Fout;
+#ifdef CUSTOM_MODES
-   for (i=0;i<N;i++)
+   if (m==1)
   {
-      Fout = Fout_beg + i*mm;
+      celt_assert(m==1);
-      Fout2 = Fout + m;
+      for (i=0;i<N;i++)
-      tw1 = st->twiddles;
-      for(j=0;j<m;j++)
      {
         kiss_fft_cpx t;
-         Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
+         Fout2 = Fout + 1;
-         Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
+         t = *Fout2;
-         C_MUL (t,  *Fout2 , *tw1);
-         tw1 += fstride;
         C_SUB( *Fout2 ,  *Fout , t );
         C_ADDTO( *Fout ,  t );
-         ++Fout2;
+         Fout += 2;
-         ++Fout;
      }
-   }
+   } else
-}
 #endif
-static void ki_bfly2(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx * Fout2;
-   const kiss_twiddle_cpx * tw1;
-   kiss_fft_cpx t;
-   int i,j;
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
   {
-      Fout = Fout_beg + i*mm;
+      opus_val16 tw;
-      Fout2 = Fout + m;
+      tw = QCONST16(0.7071067812f, 15);
-      tw1 = st->twiddles;
+      /* We know that m==4 here because the radix-2 is just after a radix-4 */
-      for(j=0;j<m;j++)
+      celt_assert(m==4);
+      for (i=0;i<N;i++)
      {
-         C_MULC (t,  *Fout2 , *tw1);
+         kiss_fft_cpx t;
-         tw1 += fstride;
+         Fout2 = Fout + 4;
-         C_SUB( *Fout2 ,  *Fout , t );
+         t = Fout2[0];
-         C_ADDTO( *Fout ,  t );
+         C_SUB( Fout2[0] ,  Fout[0] , t );
-         ++Fout2;
+         C_ADDTO( Fout[0] ,  t );
-         ++Fout;
+         t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
+         t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
+         C_SUB( Fout2[1] ,  Fout[1] , t );
+         C_ADDTO( Fout[1] ,  t );
+         t.r = Fout2[2].i;
+         t.i = -Fout2[2].r;
+         C_SUB( Fout2[2] ,  Fout[2] , t );
+         C_ADDTO( Fout[2] ,  t );
+         t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
+         t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
+         C_SUB( Fout2[3] ,  Fout[3] , t );
+         C_ADDTO( Fout[3] ,  t );
+         Fout += 8;
      }
   }
 }
-#if 0
 static void kf_bfly4(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
@@ -121,93 +110,69 @@ static void kf_bfly4(
                     int mm
                    )
 {
-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+   int i;
-   kiss_fft_cpx scratch[6];
-   const size_t m2=2*m;
-   const size_t m3=3*m;
-   int i, j;
-   kiss_fft_cpx * Fout_beg = Fout;
+   if (m==1)
-   for (i=0;i<N;i++)
   {
-      Fout = Fout_beg + i*mm;
+      /* Degenerate case where all the twiddles are 1. */
-      tw3 = tw2 = tw1 = st->twiddles;
+      for (i=0;i<N;i++)
-      for (j=0;j<m;j++)
      {
-         C_MUL4(scratch[0],Fout[m] , *tw1 );
+         kiss_fft_cpx scratch0, scratch1;
-         C_MUL4(scratch[1],Fout[m2] , *tw2 );
-         C_MUL4(scratch[2],Fout[m3] , *tw3 );
+         C_SUB( scratch0 , *Fout, Fout[2] );
+         C_ADDTO(*Fout, Fout[2]);
-         Fout->r = PSHR32(Fout->r, 2);
+         C_ADD( scratch1 , Fout[1] , Fout[3] );
-         Fout->i = PSHR32(Fout->i, 2);
+         C_SUB( Fout[2], *Fout, scratch1 );
-         C_SUB( scratch[5] , *Fout, scratch[1] );
+         C_ADDTO( *Fout , scratch1 );
-         C_ADDTO(*Fout, scratch[1]);
+         C_SUB( scratch1 , Fout[1] , Fout[3] );
-         C_ADD( scratch[3] , scratch[0] , scratch[2] );
-         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+         Fout[1].r = scratch0.r + scratch1.i;
-         C_SUB( Fout[m2], *Fout, scratch[3] );
+         Fout[1].i = scratch0.i - scratch1.r;
-         tw1 += fstride;
+         Fout[3].r = scratch0.r - scratch1.i;
-         tw2 += fstride*2;
+         Fout[3].i = scratch0.i + scratch1.r;
-         tw3 += fstride*3;
+         Fout+=4;
-         C_ADDTO( *Fout , scratch[3] );
-         Fout[m].r = scratch[5].r + scratch[4].i;
-         Fout[m].i = scratch[5].i - scratch[4].r;
-         Fout[m3].r = scratch[5].r - scratch[4].i;
-         Fout[m3].i = scratch[5].i + scratch[4].r;
-         ++Fout;
      }
-   }
+   } else {
-}
+      int j;
-#endif
+      kiss_fft_cpx scratch[6];
+      const kiss_twiddle_cpx *tw1,*tw2,*tw3;
-static void ki_bfly4(
+      const int m2=2*m;
-                     kiss_fft_cpx * Fout,
+      const int m3=3*m;
-                     const size_t fstride,
+      kiss_fft_cpx * Fout_beg = Fout;
-                     const kiss_fft_state *st,
+      for (i=0;i<N;i++)
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   const kiss_twiddle_cpx *tw1,*tw2,*tw3;
-   kiss_fft_cpx scratch[6];
-   const size_t m2=2*m;
-   const size_t m3=3*m;
-   int i, j;
-   kiss_fft_cpx * Fout_beg = Fout;
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw3 = tw2 = tw1 = st->twiddles;
-      for (j=0;j<m;j++)
      {
-         C_MULC(scratch[0],Fout[m] , *tw1 );
+         Fout = Fout_beg + i*mm;
-         C_MULC(scratch[1],Fout[m2] , *tw2 );
+         tw3 = tw2 = tw1 = st->twiddles;
-         C_MULC(scratch[2],Fout[m3] , *tw3 );
+         /* m is guaranteed to be a multiple of 4. */
+         for (j=0;j<m;j++)
-         C_SUB( scratch[5] , *Fout, scratch[1] );
+         {
-         C_ADDTO(*Fout, scratch[1]);
+            C_MUL(scratch[0],Fout[m] , *tw1 );
-         C_ADD( scratch[3] , scratch[0] , scratch[2] );
+            C_MUL(scratch[1],Fout[m2] , *tw2 );
-         C_SUB( scratch[4] , scratch[0] , scratch[2] );
+            C_MUL(scratch[2],Fout[m3] , *tw3 );
-         C_SUB( Fout[m2], *Fout, scratch[3] );
-         tw1 += fstride;
+            C_SUB( scratch[5] , *Fout, scratch[1] );
-         tw2 += fstride*2;
+            C_ADDTO(*Fout, scratch[1]);
-         tw3 += fstride*3;
+            C_ADD( scratch[3] , scratch[0] , scratch[2] );
-         C_ADDTO( *Fout , scratch[3] );
+            C_SUB( scratch[4] , scratch[0] , scratch[2] );
+            C_SUB( Fout[m2], *Fout, scratch[3] );
-         Fout[m].r = scratch[5].r - scratch[4].i;
+            tw1 += fstride;
-         Fout[m].i = scratch[5].i + scratch[4].r;
+            tw2 += fstride*2;
-         Fout[m3].r = scratch[5].r + scratch[4].i;
+            tw3 += fstride*3;
-         Fout[m3].i = scratch[5].i - scratch[4].r;
+            C_ADDTO( *Fout , scratch[3] );
-         ++Fout;
+            Fout[m].r = scratch[5].r + scratch[4].i;
+            Fout[m].i = scratch[5].i - scratch[4].r;
+            Fout[m3].r = scratch[5].r - scratch[4].i;
+            Fout[m3].i = scratch[5].i + scratch[4].r;
+            ++Fout;
+         }
      }
   }
 }
 #ifndef RADIX_TWO_ONLY
-#if 0
 static void kf_bfly3(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
@@ -225,14 +190,19 @@ static void kf_bfly3(
   kiss_twiddle_cpx epi3;
   kiss_fft_cpx * Fout_beg = Fout;
+#ifdef FIXED_POINT
+   epi3.r = -16384;
+   epi3.i = -28378;
+#else
   epi3 = st->twiddles[fstride*m];
+#endif
   for (i=0;i<N;i++)
   {
      Fout = Fout_beg + i*mm;
      tw1=tw2=st->twiddles;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      k=m;
      do {
-         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
         C_MUL(scratch[1],Fout[m] , *tw1);
         C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -259,59 +229,9 @@ static void kf_bfly3(
      } while(--k);
   }
 }
-#endif
-static void ki_bfly3(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   int i, k;
-   const size_t m2 = 2*m;
-   const kiss_twiddle_cpx *tw1,*tw2;
-   kiss_fft_cpx scratch[5];
-   kiss_twiddle_cpx epi3;
-   kiss_fft_cpx * Fout_beg = Fout;
-   epi3 = st->twiddles[fstride*m];
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      tw1=tw2=st->twiddles;
-      k=m;
-      do{
-         C_MULC(scratch[1],Fout[m] , *tw1);
-         C_MULC(scratch[2],Fout[m2] , *tw2);
-         C_ADD(scratch[3],scratch[1],scratch[2]);
-         C_SUB(scratch[0],scratch[1],scratch[2]);
-         tw1 += fstride;
-         tw2 += fstride*2;
-         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
-         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-         C_MULBYSCALAR( scratch[0] , -epi3.i );
-         C_ADDTO(*Fout,scratch[3]);
-         Fout[m2].r = Fout[m].r + scratch[0].i;
-         Fout[m2].i = Fout[m].i - scratch[0].r;
-         Fout[m].r -= scratch[0].i;
-         Fout[m].i += scratch[0].r;
-         ++Fout;
-      }while(--k);
-   }
-}
-#if 0
+#ifndef OVERRIDE_kf_bfly5
 static void kf_bfly5(
                     kiss_fft_cpx * Fout,
                     const size_t fstride,
@@ -324,13 +244,19 @@ static void kf_bfly5(
   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
   int i, u;
   kiss_fft_cpx scratch[13];
-   const kiss_twiddle_cpx * twiddles = st->twiddles;
   const kiss_twiddle_cpx *tw;
   kiss_twiddle_cpx ya,yb;
   kiss_fft_cpx * Fout_beg = Fout;
-   ya = twiddles[fstride*m];
+#ifdef FIXED_POINT
-   yb = twiddles[fstride*2*m];
+   ya.r = 10126;
+   ya.i = -31164;
+   yb.r = -26510;
+   yb.i = -19261;
+#else
+   ya = st->twiddles[fstride*m];
+   yb = st->twiddles[fstride*2*m];
+#endif
   tw=st->twiddles;
   for (i=0;i<N;i++)
@@ -342,8 +268,8 @@ static void kf_bfly5(
      Fout3=Fout0+3*m;
      Fout4=Fout0+4*m;
+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */
      for ( u=0; u<m; ++u ) {
-         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
         scratch[0] = *Fout0;
         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -380,75 +306,8 @@ static void kf_bfly5(
      }
   }
 }
-#endif
+#endif /* OVERRIDE_kf_bfly5 */
-static void ki_bfly5(
-                     kiss_fft_cpx * Fout,
-                     const size_t fstride,
-                     const kiss_fft_state *st,
-                     int m,
-                     int N,
-                     int mm
-                    )
-{
-   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-   int i, u;
-   kiss_fft_cpx scratch[13];
-   const kiss_twiddle_cpx * twiddles = st->twiddles;
-   const kiss_twiddle_cpx *tw;
-   kiss_twiddle_cpx ya,yb;
-   kiss_fft_cpx * Fout_beg = Fout;
-   ya = twiddles[fstride*m];
-   yb = twiddles[fstride*2*m];
-   tw=st->twiddles;
-   for (i=0;i<N;i++)
-   {
-      Fout = Fout_beg + i*mm;
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-      for ( u=0; u<m; ++u ) {
-         scratch[0] = *Fout0;
-         C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
-         C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
-         C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
-         C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-         C_ADD( scratch[7],scratch[1],scratch[4]);
-         C_SUB( scratch[10],scratch[1],scratch[4]);
-         C_ADD( scratch[8],scratch[2],scratch[3]);
-         C_SUB( scratch[9],scratch[2],scratch[3]);
-         Fout0->r += scratch[7].r + scratch[8].r;
-         Fout0->i += scratch[7].i + scratch[8].i;
-         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
-         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-         scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
-         scratch[6].i =  S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-         C_SUB(*Fout1,scratch[5],scratch[6]);
-         C_ADD(*Fout4,scratch[5],scratch[6]);
-         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
-         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
-         scratch[12].r =  S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
-         scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-         C_ADD(*Fout2,scratch[11],scratch[12]);
-         C_SUB(*Fout3,scratch[11],scratch[12]);
-         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-   }
-}
 #endif
@@ -496,6 +355,9 @@ static
 int kf_factor(int n,opus_int16 * facbuf)
 {
    int p=4;
+    int i;
+    int stages=0;
+    int nbak = n;
    /*factor out powers of 4, powers of 2, then any remaining primes */
    do {
@@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
        {
           return 0;
        }
-        *facbuf++ = p;
+        facbuf[2*stages] = p;
-        *facbuf++ = n;
+        if (p==2 && stages > 1)
+        {
+           facbuf[2*stages] = 4;
+           facbuf[2] = 2;
+        }
+        stages++;
    } while (n > 1);
+    n = nbak;
+    /* Reverse the order to get the radix 4 at the end, so we can use the
+       fast degenerate case. It turns out that reversing the order also
+       improves the noise behaviour. */
+    for (i=0;i<stages/2;i++)
+    {
+       int tmp;
+       tmp = facbuf[2*i];
+       facbuf[2*i] = facbuf[2*(stages-i-1)];
+       facbuf[2*(stages-i-1)] = tmp;
+    }
+    for (i=0;i<stages;i++)
+    {
+        n /= facbuf[2*i];
+        facbuf[2*i+1] = n;
+    }
    return 1;
 }
@@ -563,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  co
        kiss_twiddle_cpx *twiddles;
        st->nfft=nfft;
-#ifndef FIXED_POINT
+#ifdef FIXED_POINT
+        st->scale_shift = celt_ilog2(st->nfft);
+        if (st->nfft == 1<<st->scale_shift)
+           st->scale = Q15ONE;
+        else
+           st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
+#else
        st->scale = 1.f/nfft;
 #endif
        if (base != NULL)
        {
           st->twiddles = base->twiddles;
           st->shift = 0;
-           while (nfft<<st->shift != base->nfft && st->shift < 32)
+           while (st->shift < 32 && nfft<<st->shift != base->nfft)
              st->shift++;
           if (st->shift>=32)
              goto fail;
@@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
 #endif /* CUSTOM_MODES */
-#if 0
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 {
    int m2, m;
    int p;
@@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
    /* st->shift can be -1 */
    shift = st->shift>0 ? st->shift : 0;
-    celt_assert2 (fin != fout, "In-place FFT not supported");
-    /* Bit-reverse the input */
-    for (i=0;i<st->nfft;i++)
-    {
-       fout[st->bitrev[i]] = fin[i];
-#ifndef FIXED_POINT
-       fout[st->bitrev[i]].r *= st->scale;
-       fout[st->bitrev[i]].i *= st->scale;
-#endif
-    }
    fstride[0] = 1;
    L=0;
    do {
@@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
       switch (st->factors[2*i])
       {
       case 2:
-          kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+          kf_bfly2(fout, m, fstride[i]);
          break;
       case 4:
          kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -673,57 +550,44 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
       m = m2;
    }
 }
-#endif
-void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+#if 0
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
 {
-   int m2, m;
-   int p;
-   int L;
-   int fstride[MAXFACTORS];
   int i;
-   int shift;
+   opus_val16 scale;
+#ifdef FIXED_POINT
+   /* Allows us to scale with MULT16_32_Q16(), which is faster than
+      MULT16_32_Q15() on ARM. */
+   int scale_shift = st->scale_shift-1;
+#endif
+   scale = st->scale;
-   /* st->shift can be -1 */
-   shift = st->shift>0 ? st->shift : 0;
   celt_assert2 (fin != fout, "In-place FFT not supported");
   /* Bit-reverse the input */
   for (i=0;i<st->nfft;i++)
-      fout[st->bitrev[i]] = fin[i];
-   fstride[0] = 1;
-   L=0;
-   do {
-      p = st->factors[2*L];
-      m = st->factors[2*L+1];
-      fstride[L+1] = fstride[L]*p;
-      L++;
-   } while(m!=1);
-   m = st->factors[2*L-1];
-   for (i=L-1;i>=0;i--)
   {
-      if (i!=0)
+      kiss_fft_cpx x = fin[i];
-         m2 = st->factors[2*i-1];
+      fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
-      else
+      fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
-         m2 = 1;
-      switch (st->factors[2*i])
-      {
-      case 2:
-         ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 4:
-         ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#ifndef RADIX_TWO_ONLY
-      case 3:
-         ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-      case 5:
-         ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
-         break;
-#endif
-      }
-      m = m2;
   }
+   opus_fft_impl(st, fout);
 }
+#endif
+#ifdef TEST_UNIT_DFT_C
+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+   int i;
+   celt_assert2 (fin != fout, "In-place FFT not supported");
+   /* Bit-reverse the input */
+   for (i=0;i<st->nfft;i++)
+      fout[st->bitrev[i]] = fin[i];
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+   opus_fft_impl(st, fout);
+   for (i=0;i<st->nfft;i++)
+      fout[i].i = -fout[i].i;
+}
+#endif
author	Nils Wallménius <nils@rockbox.org>	2014-01-19 16:31:59 +0100
committer	Nils Wallménius <nils@rockbox.org>	2014-07-13 11:12:40 +0200
commit	9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree	07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/kiss_fft.c
parent	e557951c94c1efa769900257e466900f0ffeb53b (diff)
download	rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip

diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index e2b8f3b3da..833ef5a71f 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -45,73 +45,62 @@
45	complex numbers. It also delares the kf_ internal functions.	45	complex numbers. It also delares the kf_ internal functions.
46	*/	46	*/
47		47
48	#if 0
49	static void kf_bfly2(	48	static void kf_bfly2(
50	kiss_fft_cpx * Fout,	49	kiss_fft_cpx * Fout,
51	const size_t fstride,
52	const kiss_fft_state *st,
53	int m,	50	int m,
54	int N,	51	int N
55	int mm
56	)	52	)
57	{	53	{
58	kiss_fft_cpx * Fout2;	54	kiss_fft_cpx * Fout2;
59	const kiss_twiddle_cpx * tw1;	55	int i;
60	int i,j;	56	(void)m;
61	kiss_fft_cpx * Fout_beg = Fout;	57	#ifdef CUSTOM_MODES
62	for (i=0;i<N;i++)	58	if (m==1)
63	{	59	{
64	Fout = Fout_beg + i*mm;	60	celt_assert(m==1);
65	Fout2 = Fout + m;	61	for (i=0;i<N;i++)
66	tw1 = st->twiddles;
67	for(j=0;j<m;j++)
68	{	62	{
69	kiss_fft_cpx t;	63	kiss_fft_cpx t;
70	Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);	64	Fout2 = Fout + 1;
71	Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);	65	t = *Fout2;
72	C_MUL (t, Fout2 , tw1);
73	tw1 += fstride;
74	C_SUB( Fout2 , Fout , t );	66	C_SUB( Fout2 , Fout , t );
75	C_ADDTO( *Fout , t );	67	C_ADDTO( *Fout , t );
76	++Fout2;	68	Fout += 2;
77	++Fout;
78	}	69	}
79	}	70	} else
80	}
81	#endif	71	#endif
82
83	static void ki_bfly2(
84	kiss_fft_cpx * Fout,
85	const size_t fstride,
86	const kiss_fft_state *st,
87	int m,
88	int N,
89	int mm
90	)
91	{
92	kiss_fft_cpx * Fout2;
93	const kiss_twiddle_cpx * tw1;
94	kiss_fft_cpx t;
95	int i,j;
96	kiss_fft_cpx * Fout_beg = Fout;
97	for (i=0;i<N;i++)
98	{	72	{
99	Fout = Fout_beg + i*mm;	73	opus_val16 tw;
100	Fout2 = Fout + m;	74	tw = QCONST16(0.7071067812f, 15);
101	tw1 = st->twiddles;	75	/* We know that m==4 here because the radix-2 is just after a radix-4 */
102	for(j=0;j<m;j++)	76	celt_assert(m==4);
		77	for (i=0;i<N;i++)
103	{	78	{
104	C_MULC (t, Fout2 , tw1);	79	kiss_fft_cpx t;
105	tw1 += fstride;	80	Fout2 = Fout + 4;
106	C_SUB( Fout2 , Fout , t );	81	t = Fout2[0];
107	C_ADDTO( *Fout , t );	82	C_SUB( Fout2[0] , Fout[0] , t );
108	++Fout2;	83	C_ADDTO( Fout[0] , t );
109	++Fout;	84
		85	t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
		86	t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
		87	C_SUB( Fout2[1] , Fout[1] , t );
		88	C_ADDTO( Fout[1] , t );
		89
		90	t.r = Fout2[2].i;
		91	t.i = -Fout2[2].r;
		92	C_SUB( Fout2[2] , Fout[2] , t );
		93	C_ADDTO( Fout[2] , t );
		94
		95	t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
		96	t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
		97	C_SUB( Fout2[3] , Fout[3] , t );
		98	C_ADDTO( Fout[3] , t );
		99	Fout += 8;
110	}	100	}
111	}	101	}
112	}	102	}
113		103
114	#if 0
115	static void kf_bfly4(	104	static void kf_bfly4(
116	kiss_fft_cpx * Fout,	105	kiss_fft_cpx * Fout,
117	const size_t fstride,	106	const size_t fstride,
@@ -121,93 +110,69 @@ static void kf_bfly4(
121	int mm	110	int mm
122	)	111	)
123	{	112	{
124	const kiss_twiddle_cpx tw1,tw2,*tw3;	113	int i;
125	kiss_fft_cpx scratch[6];
126	const size_t m2=2*m;
127	const size_t m3=3*m;
128	int i, j;
129		114
130	kiss_fft_cpx * Fout_beg = Fout;	115	if (m==1)
131	for (i=0;i<N;i++)
132	{	116	{
133	Fout = Fout_beg + i*mm;	117	/* Degenerate case where all the twiddles are 1. */
134	tw3 = tw2 = tw1 = st->twiddles;	118	for (i=0;i<N;i++)
135	for (j=0;j<m;j++)
136	{	119	{
137	C_MUL4(scratch[0],Fout[m] , *tw1 );	120	kiss_fft_cpx scratch0, scratch1;
138	C_MUL4(scratch[1],Fout[m2] , *tw2 );	121
139	C_MUL4(scratch[2],Fout[m3] , *tw3 );	122	C_SUB( scratch0 , *Fout, Fout[2] );
140		123	C_ADDTO(*Fout, Fout[2]);
141	Fout->r = PSHR32(Fout->r, 2);	124	C_ADD( scratch1 , Fout[1] , Fout[3] );
142	Fout->i = PSHR32(Fout->i, 2);	125	C_SUB( Fout[2], *Fout, scratch1 );
143	C_SUB( scratch[5] , *Fout, scratch[1] );	126	C_ADDTO( *Fout , scratch1 );
144	C_ADDTO(*Fout, scratch[1]);	127	C_SUB( scratch1 , Fout[1] , Fout[3] );
145	C_ADD( scratch[3] , scratch[0] , scratch[2] );	128
146	C_SUB( scratch[4] , scratch[0] , scratch[2] );	129	Fout[1].r = scratch0.r + scratch1.i;
147	C_SUB( Fout[m2], *Fout, scratch[3] );	130	Fout[1].i = scratch0.i - scratch1.r;
148	tw1 += fstride;	131	Fout[3].r = scratch0.r - scratch1.i;
149	tw2 += fstride*2;	132	Fout[3].i = scratch0.i + scratch1.r;
150	tw3 += fstride*3;	133	Fout+=4;
151	C_ADDTO( *Fout , scratch[3] );
152
153	Fout[m].r = scratch[5].r + scratch[4].i;
154	Fout[m].i = scratch[5].i - scratch[4].r;
155	Fout[m3].r = scratch[5].r - scratch[4].i;
156	Fout[m3].i = scratch[5].i + scratch[4].r;
157	++Fout;
158	}	134	}
159	}	135	} else {
160	}	136	int j;
161	#endif	137	kiss_fft_cpx scratch[6];
162		138	const kiss_twiddle_cpx tw1,tw2,*tw3;
163	static void ki_bfly4(	139	const int m2=2*m;
164	kiss_fft_cpx * Fout,	140	const int m3=3*m;
165	const size_t fstride,	141	kiss_fft_cpx * Fout_beg = Fout;
166	const kiss_fft_state *st,	142	for (i=0;i<N;i++)
167	int m,
168	int N,
169	int mm
170	)
171	{
172	const kiss_twiddle_cpx tw1,tw2,*tw3;
173	kiss_fft_cpx scratch[6];
174	const size_t m2=2*m;
175	const size_t m3=3*m;
176	int i, j;
177
178	kiss_fft_cpx * Fout_beg = Fout;
179	for (i=0;i<N;i++)
180	{
181	Fout = Fout_beg + i*mm;
182	tw3 = tw2 = tw1 = st->twiddles;
183	for (j=0;j<m;j++)
184	{	143	{
185	C_MULC(scratch[0],Fout[m] , *tw1 );	144	Fout = Fout_beg + i*mm;
186	C_MULC(scratch[1],Fout[m2] , *tw2 );	145	tw3 = tw2 = tw1 = st->twiddles;
187	C_MULC(scratch[2],Fout[m3] , *tw3 );	146	/* m is guaranteed to be a multiple of 4. */
188		147	for (j=0;j<m;j++)
189	C_SUB( scratch[5] , *Fout, scratch[1] );	148	{
190	C_ADDTO(*Fout, scratch[1]);	149	C_MUL(scratch[0],Fout[m] , *tw1 );
191	C_ADD( scratch[3] , scratch[0] , scratch[2] );	150	C_MUL(scratch[1],Fout[m2] , *tw2 );
192	C_SUB( scratch[4] , scratch[0] , scratch[2] );	151	C_MUL(scratch[2],Fout[m3] , *tw3 );
193	C_SUB( Fout[m2], *Fout, scratch[3] );	152
194	tw1 += fstride;	153	C_SUB( scratch[5] , *Fout, scratch[1] );
195	tw2 += fstride*2;	154	C_ADDTO(*Fout, scratch[1]);
196	tw3 += fstride*3;	155	C_ADD( scratch[3] , scratch[0] , scratch[2] );
197	C_ADDTO( *Fout , scratch[3] );	156	C_SUB( scratch[4] , scratch[0] , scratch[2] );
198		157	C_SUB( Fout[m2], *Fout, scratch[3] );
199	Fout[m].r = scratch[5].r - scratch[4].i;	158	tw1 += fstride;
200	Fout[m].i = scratch[5].i + scratch[4].r;	159	tw2 += fstride*2;
201	Fout[m3].r = scratch[5].r + scratch[4].i;	160	tw3 += fstride*3;
202	Fout[m3].i = scratch[5].i - scratch[4].r;	161	C_ADDTO( *Fout , scratch[3] );
203	++Fout;	162
		163	Fout[m].r = scratch[5].r + scratch[4].i;
		164	Fout[m].i = scratch[5].i - scratch[4].r;
		165	Fout[m3].r = scratch[5].r - scratch[4].i;
		166	Fout[m3].i = scratch[5].i + scratch[4].r;
		167	++Fout;
		168	}
204	}	169	}
205	}	170	}
206	}	171	}
207		172
		173
208	#ifndef RADIX_TWO_ONLY	174	#ifndef RADIX_TWO_ONLY
209		175
210	#if 0
211	static void kf_bfly3(	176	static void kf_bfly3(
212	kiss_fft_cpx * Fout,	177	kiss_fft_cpx * Fout,
213	const size_t fstride,	178	const size_t fstride,
@@ -225,14 +190,19 @@ static void kf_bfly3(
225	kiss_twiddle_cpx epi3;	190	kiss_twiddle_cpx epi3;
226		191
227	kiss_fft_cpx * Fout_beg = Fout;	192	kiss_fft_cpx * Fout_beg = Fout;
		193	#ifdef FIXED_POINT
		194	epi3.r = -16384;
		195	epi3.i = -28378;
		196	#else
228	epi3 = st->twiddles[fstride*m];	197	epi3 = st->twiddles[fstride*m];
		198	#endif
229	for (i=0;i<N;i++)	199	for (i=0;i<N;i++)
230	{	200	{
231	Fout = Fout_beg + i*mm;	201	Fout = Fout_beg + i*mm;
232	tw1=tw2=st->twiddles;	202	tw1=tw2=st->twiddles;
		203	/* For non-custom modes, m is guaranteed to be a multiple of 4. */
233	k=m;	204	k=m;
234	do {	205	do {
235	C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
236		206
237	C_MUL(scratch[1],Fout[m] , *tw1);	207	C_MUL(scratch[1],Fout[m] , *tw1);
238	C_MUL(scratch[2],Fout[m2] , *tw2);	208	C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -259,59 +229,9 @@ static void kf_bfly3(
259	} while(--k);	229	} while(--k);
260	}	230	}
261	}	231	}
262	#endif
263
264	static void ki_bfly3(
265	kiss_fft_cpx * Fout,
266	const size_t fstride,
267	const kiss_fft_state *st,
268	int m,
269	int N,
270	int mm
271	)
272	{
273	int i, k;
274	const size_t m2 = 2*m;
275	const kiss_twiddle_cpx tw1,tw2;
276	kiss_fft_cpx scratch[5];
277	kiss_twiddle_cpx epi3;
278
279	kiss_fft_cpx * Fout_beg = Fout;
280	epi3 = st->twiddles[fstride*m];
281	for (i=0;i<N;i++)
282	{
283	Fout = Fout_beg + i*mm;
284	tw1=tw2=st->twiddles;
285	k=m;
286	do{
287
288	C_MULC(scratch[1],Fout[m] , *tw1);
289	C_MULC(scratch[2],Fout[m2] , *tw2);
290
291	C_ADD(scratch[3],scratch[1],scratch[2]);
292	C_SUB(scratch[0],scratch[1],scratch[2]);
293	tw1 += fstride;
294	tw2 += fstride*2;
295
296	Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
297	Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
298
299	C_MULBYSCALAR( scratch[0] , -epi3.i );
300
301	C_ADDTO(*Fout,scratch[3]);
302
303	Fout[m2].r = Fout[m].r + scratch[0].i;
304	Fout[m2].i = Fout[m].i - scratch[0].r;
305
306	Fout[m].r -= scratch[0].i;
307	Fout[m].i += scratch[0].r;
308		232
309	++Fout;
310	}while(--k);
311	}
312	}
313		233
314	#if 0	234	#ifndef OVERRIDE_kf_bfly5
315	static void kf_bfly5(	235	static void kf_bfly5(
316	kiss_fft_cpx * Fout,	236	kiss_fft_cpx * Fout,
317	const size_t fstride,	237	const size_t fstride,
@@ -324,13 +244,19 @@ static void kf_bfly5(
324	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;	244	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;
325	int i, u;	245	int i, u;
326	kiss_fft_cpx scratch[13];	246	kiss_fft_cpx scratch[13];
327	const kiss_twiddle_cpx * twiddles = st->twiddles;
328	const kiss_twiddle_cpx *tw;	247	const kiss_twiddle_cpx *tw;
329	kiss_twiddle_cpx ya,yb;	248	kiss_twiddle_cpx ya,yb;
330	kiss_fft_cpx * Fout_beg = Fout;	249	kiss_fft_cpx * Fout_beg = Fout;
331		250
332	ya = twiddles[fstride*m];	251	#ifdef FIXED_POINT
333	yb = twiddles[fstride2m];	252	ya.r = 10126;
		253	ya.i = -31164;
		254	yb.r = -26510;
		255	yb.i = -19261;
		256	#else
		257	ya = st->twiddles[fstride*m];
		258	yb = st->twiddles[fstride2m];
		259	#endif
334	tw=st->twiddles;	260	tw=st->twiddles;
335		261
336	for (i=0;i<N;i++)	262	for (i=0;i<N;i++)
@@ -342,8 +268,8 @@ static void kf_bfly5(
342	Fout3=Fout0+3*m;	268	Fout3=Fout0+3*m;
343	Fout4=Fout0+4*m;	269	Fout4=Fout0+4*m;
344		270
		271	/* For non-custom modes, m is guaranteed to be a multiple of 4. */
345	for ( u=0; u<m; ++u ) {	272	for ( u=0; u<m; ++u ) {
346	C_FIXDIV( Fout0,5); C_FIXDIV( Fout1,5); C_FIXDIV( Fout2,5); C_FIXDIV( Fout3,5); C_FIXDIV( *Fout4,5);
347	scratch[0] = *Fout0;	273	scratch[0] = *Fout0;
348		274
349	C_MUL(scratch[1] ,Fout1, tw[ufstride]);	275	C_MUL(scratch[1] ,Fout1, tw[ufstride]);
@@ -380,75 +306,8 @@ static void kf_bfly5(
380	}	306	}
381	}	307	}
382	}	308	}
383	#endif	309	#endif /* OVERRIDE_kf_bfly5 */
384
385	static void ki_bfly5(
386	kiss_fft_cpx * Fout,
387	const size_t fstride,
388	const kiss_fft_state *st,
389	int m,
390	int N,
391	int mm
392	)
393	{
394	kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;
395	int i, u;
396	kiss_fft_cpx scratch[13];
397	const kiss_twiddle_cpx * twiddles = st->twiddles;
398	const kiss_twiddle_cpx *tw;
399	kiss_twiddle_cpx ya,yb;
400	kiss_fft_cpx * Fout_beg = Fout;
401		310
402	ya = twiddles[fstride*m];
403	yb = twiddles[fstride2m];
404	tw=st->twiddles;
405
406	for (i=0;i<N;i++)
407	{
408	Fout = Fout_beg + i*mm;
409	Fout0=Fout;
410	Fout1=Fout0+m;
411	Fout2=Fout0+2*m;
412	Fout3=Fout0+3*m;
413	Fout4=Fout0+4*m;
414
415	for ( u=0; u<m; ++u ) {
416	scratch[0] = *Fout0;
417
418	C_MULC(scratch[1] ,Fout1, tw[ufstride]);
419	C_MULC(scratch[2] ,Fout2, tw[2u*fstride]);
420	C_MULC(scratch[3] ,Fout3, tw[3u*fstride]);
421	C_MULC(scratch[4] ,Fout4, tw[4u*fstride]);
422
423	C_ADD( scratch[7],scratch[1],scratch[4]);
424	C_SUB( scratch[10],scratch[1],scratch[4]);
425	C_ADD( scratch[8],scratch[2],scratch[3]);
426	C_SUB( scratch[9],scratch[2],scratch[3]);
427
428	Fout0->r += scratch[7].r + scratch[8].r;
429	Fout0->i += scratch[7].i + scratch[8].i;
430
431	scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
432	scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
433
434	scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
435	scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
436
437	C_SUB(*Fout1,scratch[5],scratch[6]);
438	C_ADD(*Fout4,scratch[5],scratch[6]);
439
440	scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
441	scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
442	scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
443	scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
444
445	C_ADD(*Fout2,scratch[11],scratch[12]);
446	C_SUB(*Fout3,scratch[11],scratch[12]);
447
448	++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
449	}
450	}
451	}
452		311
453	#endif	312	#endif
454		313
@@ -496,6 +355,9 @@ static
496	int kf_factor(int n,opus_int16 * facbuf)	355	int kf_factor(int n,opus_int16 * facbuf)
497	{	356	{
498	int p=4;	357	int p=4;
		358	int i;
		359	int stages=0;
		360	int nbak = n;
499		361
500	/factor out powers of 4, powers of 2, then any remaining primes /	362	/factor out powers of 4, powers of 2, then any remaining primes /
501	do {	363	do {
@@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
517	{	379	{
518	return 0;	380	return 0;
519	}	381	}
520	*facbuf++ = p;	382	facbuf[2*stages] = p;
521	*facbuf++ = n;	383	if (p==2 && stages > 1)
		384	{
		385	facbuf[2*stages] = 4;
		386	facbuf[2] = 2;
		387	}
		388	stages++;
522	} while (n > 1);	389	} while (n > 1);
		390	n = nbak;
		391	/* Reverse the order to get the radix 4 at the end, so we can use the
		392	fast degenerate case. It turns out that reversing the order also
		393	improves the noise behaviour. */
		394	for (i=0;i<stages/2;i++)
		395	{
		396	int tmp;
		397	tmp = facbuf[2*i];
		398	facbuf[2i] = facbuf[2(stages-i-1)];
		399	facbuf[2*(stages-i-1)] = tmp;
		400	}
		401	for (i=0;i<stages;i++)
		402	{
		403	n /= facbuf[2*i];
		404	facbuf[2*i+1] = n;
		405	}
523	return 1;	406	return 1;
524	}	407	}
525		408
@@ -563,14 +446,20 @@ kiss_fft_state opus_fft_alloc_twiddles(int nfft,void mem,size_t * lenmem, co
563	kiss_twiddle_cpx *twiddles;	446	kiss_twiddle_cpx *twiddles;
564		447
565	st->nfft=nfft;	448	st->nfft=nfft;
566	#ifndef FIXED_POINT	449	#ifdef FIXED_POINT
		450	st->scale_shift = celt_ilog2(st->nfft);
		451	if (st->nfft == 1<<st->scale_shift)
		452	st->scale = Q15ONE;
		453	else
		454	st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
		455	#else
567	st->scale = 1.f/nfft;	456	st->scale = 1.f/nfft;
568	#endif	457	#endif
569	if (base != NULL)	458	if (base != NULL)
570	{	459	{
571	st->twiddles = base->twiddles;	460	st->twiddles = base->twiddles;
572	st->shift = 0;	461	st->shift = 0;
573	while (nfft<<st->shift != base->nfft && st->shift < 32)	462	while (st->shift < 32 && nfft<<st->shift != base->nfft)
574	st->shift++;	463	st->shift++;
575	if (st->shift>=32)	464	if (st->shift>=32)
576	goto fail;	465	goto fail;
@@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
614		503
615	#endif /* CUSTOM_MODES */	504	#endif /* CUSTOM_MODES */
616		505
617	#if 0	506	void opus_fft_impl(const kiss_fft_state st,kiss_fft_cpx fout)
618	void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
619	{	507	{
620	int m2, m;	508	int m2, m;
621	int p;	509	int p;
@@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fou
627	/* st->shift can be -1 */	515	/* st->shift can be -1 */
628	shift = st->shift>0 ? st->shift : 0;	516	shift = st->shift>0 ? st->shift : 0;
629		517
630	celt_assert2 (fin != fout, "In-place FFT not supported");
631	/* Bit-reverse the input */
632	for (i=0;i<st->nfft;i++)
633	{
634	fout[st->bitrev[i]] = fin[i];
635	#ifndef FIXED_POINT
636	fout[st->bitrev[i]].r *= st->scale;
637	fout[st->bitrev[i]].i *= st->scale;
638	#endif
639	}
640
641	fstride[0] = 1;	518	fstride[0] = 1;
642	L=0;	519	L=0;
643	do {	520	do {
@@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fou
656	switch (st->factors[2*i])	533	switch (st->factors[2*i])
657	{	534	{
658	case 2:	535	case 2:
659	kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);	536	kf_bfly2(fout, m, fstride[i]);
660	break;	537	break;
661	case 4:	538	case 4:
662	kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);	539	kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -673,57 +550,44 @@ void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fou
673	m = m2;	550	m = m2;
674	}	551	}
675	}	552	}
676	#endif
677		553
678	void opus_ifft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)	554	#if 0
		555	void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
679	{	556	{
680	int m2, m;
681	int p;
682	int L;
683	int fstride[MAXFACTORS];
684	int i;	557	int i;
685	int shift;	558	opus_val16 scale;
		559	#ifdef FIXED_POINT
		560	/* Allows us to scale with MULT16_32_Q16(), which is faster than
		561	MULT16_32_Q15() on ARM. */
		562	int scale_shift = st->scale_shift-1;
		563	#endif
		564	scale = st->scale;
686		565
687	/* st->shift can be -1 */
688	shift = st->shift>0 ? st->shift : 0;
689	celt_assert2 (fin != fout, "In-place FFT not supported");	566	celt_assert2 (fin != fout, "In-place FFT not supported");
690	/* Bit-reverse the input */	567	/* Bit-reverse the input */
691	for (i=0;i<st->nfft;i++)	568	for (i=0;i<st->nfft;i++)
692	fout[st->bitrev[i]] = fin[i];
693
694	fstride[0] = 1;
695	L=0;
696	do {
697	p = st->factors[2*L];
698	m = st->factors[2*L+1];
699	fstride[L+1] = fstride[L]*p;
700	L++;
701	} while(m!=1);
702	m = st->factors[2*L-1];
703	for (i=L-1;i>=0;i--)
704	{	569	{
705	if (i!=0)	570	kiss_fft_cpx x = fin[i];
706	m2 = st->factors[2*i-1];	571	fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
707	else	572	fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
708	m2 = 1;
709	switch (st->factors[2*i])
710	{
711	case 2:
712	ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
713	break;
714	case 4:
715	ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
716	break;
717	#ifndef RADIX_TWO_ONLY
718	case 3:
719	ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
720	break;
721	case 5:
722	ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
723	break;
724	#endif
725	}
726	m = m2;
727	}	573	}
		574	opus_fft_impl(st, fout);
728	}	575	}
		576	#endif
		577
729		578
		579	#ifdef TEST_UNIT_DFT_C
		580	void opus_ifft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fout)
		581	{
		582	int i;
		583	celt_assert2 (fin != fout, "In-place FFT not supported");
		584	/* Bit-reverse the input */
		585	for (i=0;i<st->nfft;i++)
		586	fout[st->bitrev[i]] = fin[i];
		587	for (i=0;i<st->nfft;i++)
		588	fout[i].i = -fout[i].i;
		589	opus_fft_impl(st, fout);
		590	for (i=0;i<st->nfft;i++)
		591	fout[i].i = -fout[i].i;
		592	}
		593	#endif