39 files changed, 3681 insertions, 3968 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
index 63e2548843..866a6520ca 100644
--- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
+++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
@@ -61,76 +61,9 @@
      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
-#if defined (CPU_COLDFIRE)
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile("move.l (%[bp]), %%d2;" \
-                   "clr.l %%d3;" \
-                   "move.w %%d2, %%d3;" \
-                   "swap %%d3;" \
-                   "clr.w %%d2;" \
-                   "movem.l (%[ap]), %%d0-%%d1;" \
-                   "mac.l %%d0, %%d2, %%acc0;" \
-                   "mac.l %%d1, %%d3, %%acc0;" \
-                   "mac.l %%d1, %%d2, %%acc1;" \
-                   "msac.l %%d0, %%d3, %%acc1;" \
-                   "movclr.l %%acc0, %[mr];" \
-                   "movclr.l %%acc1, %[mi];" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "a" (&(a)), [bp] "a" (&(b)) \
-                   : "d0", "d1", "d2", "d3", "cc"); \
-    }
-#elif defined(CPU_ARM)
-#if (ARM_ARCH < 5)
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile( \
-                   "ldm %[ap], {r0,r1}                  \n\t" \
-                   "ldrsh r2, [%[bp], #0]                 \n\t" \
-                   "ldrsh r3, [%[bp], #2]                 \n\t" \
-                   \
-                   "smull r4, %[mr], r0, r2               \n\t" \
-                   "smlal r4, %[mr], r1, r3               \n\t" \
-                   "mov   r4, r4, lsr #15                 \n\t" \
-                   "orr   %[mr], r4, %[mr], lsl #17       \n\t" \
-                   \
-                   "smull r4, %[mi], r1, r2               \n\t" \
-                   "rsb   r3, r3, #0                      \n\t" \
-                   "smlal r4, %[mi], r0, r3               \n\t" \
-                   "mov   r4, r4, lsr #15                 \n\t" \
-                   "orr   %[mi], r4, %[mi], lsl #17       \n\t" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "r" (&(a)), [bp] "r" (&(b)) \
-                   : "r0", "r1", "r2", "r3", "r4"); \
-}
-#else
-/*same as above but using armv5 packed multiplies*/
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile( \
-                   "ldm %[ap], {r0,r1}            \n\t" \
-                   "ldr r2, [%[bp], #0]                 \n\t" \
-                                \
-                   "smulwb r4, r0, r2               \n\t"  /*r4=a.r*b.r*/    \
-                   "smlawt %[mr], r1, r2, r4        \n\t"  /*m.r=r4+a.i*b.i*/\
-                   "mov   %[mr], %[mr], lsl #1      \n\t"  /*Q15 not Q16*/   \
-                    \
-                   "smulwb r1, r1, r2               \n\t"  /*r1=a.i*b.r*/    \
-                   "smulwt r4, r0, r2               \n\t"  /*r4=a.r*b.i*/    \
-                   "sub %[mi], r1, r4               \n\t" \
-                   "mov   %[mi], %[mi], lsl #1      \n\t" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "r" (&(a)), [bp] "r" (&(b)) \
-                   : "r0", "r1", "r2", "r4"); \
-}
-#endif /*ARMv5 code*/
-#else
 #   define C_MULC(m,a,b) \
      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
-#endif
 #   define C_MUL4(m,a,b) \
      do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
@@ -161,6 +94,18 @@
    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
    }while(0)
+#if defined(ARMv4_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+#if defined(ARMv5E_ASM)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+#if defined(CF_ASM)
+#include "cf/kiss_fft_cf.h"
+#endif
 #else  /* not FIXED_POINT*/
 #   define S_MUL(a,b) ( (a)*(b) )
diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h
index 03cda40f69..c0f9413d00 100644
--- a/lib/rbcodec/codecs/libopus/celt/arch.h
+++ b/lib/rbcodec/codecs/libopus/celt/arch.h
@@ -100,6 +100,7 @@ typedef opus_val32 celt_ener;
 #define DB_SHIFT 10
 #define EPSILON 1
+#define VERY_SMALL 0
 #define VERY_LARGE16 ((opus_val16)32767)
 #define Q15_ONE ((opus_val16)32767)
@@ -112,16 +113,18 @@ typedef opus_val32 celt_ener;
 #include "fixed_generic.h"
-#ifdef ARM5E_ASM
+#ifdef ARMv5E_ASM
-#include "fixed_arm5e.h"
+#include "arm/fixed_armv5e.h"
-#elif defined (ARM4_ASM)
+#elif defined (ARMv4_ASM)
-#include "fixed_arm4.h"
+#include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #elif defined (TI_C5X_ASM)
 #include "fixed_c5x.h"
 #elif defined (TI_C6X_ASM)
 #include "fixed_c6x.h"
+#elif defined (CF_ASM)
+#include "cf/fixed_cf.h"
 #endif
 #endif
@@ -140,6 +143,7 @@ typedef float celt_ener;
 #define NORM_SCALING 1.f
 #define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
 #define VERY_LARGE16 1e15f
 #define Q15_ONE ((opus_val16)1.f)
@@ -161,6 +165,7 @@ typedef float celt_ener;
 #define SHR(a,shift)    (a)
 #define SHL(a,shift)    (a)
 #define SATURATE(x,a)   (x)
+#define SATURATE16(x)   (x)
 #define ROUND16(a,shift)  (a)
 #define HALF16(x)       (.5f*(x))
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
new file mode 100644
index 0000000000..bcacc343e8
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(a<<16)
+  );
+  return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b), "r"(a<<16)
+  );
+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+  return rd_hi<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
new file mode 100644
index 0000000000..80632c4a94
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,116 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO
+   Copyright (C) 2013      Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+#include "fixed_armv4.h"
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b),"r"(a)
+  );
+  return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(a)
+  );
+  return res<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q15\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b<<1), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_16\n\t"
+      "smlabb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b), "r"(c)
+  );
+  return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static inline opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_16\n\t"
+      "smulbb %0, %1, %2;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 0000000000..e4faad6f2b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL4\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #17\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+            "mov %[br], %[br], lsr #17\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MULC\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mr], r0, %[br]\n\t" \
+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mi], r1, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mi], r0, %[bi]\n\t" \
+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 0000000000..9eca183d77
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHL32(mi__, 1); \
+    } \
+    while(0)
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL4\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHR32(mi__, 1); \
+    } \
+    while(0)
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+        int mr__; \
+        int mi1__; \
+        int mi2__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MULC\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mr], %[aval], %[bval]\n\t" \
+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+            "smulwt %[mi2], %[aval], %[bval]\n\t" \
+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(mr__, 1); \
+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+    } \
+    while(0)
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_GUTS_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c
index c7cb0d5500..5c715aff53 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/bands.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <math.h>
@@ -40,6 +40,23 @@
 #include "os_support.h"
 #include "mathops.h"
 #include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
 opus_uint32 celt_lcg_rand(opus_uint32 seed)
 {
@@ -174,7 +191,8 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 #endif
 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
-void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, celt_sig * OPUS_RESTRICT freq, const celt_ener *bandE, int end, int C, int M)
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
 {
   int i, c, N;
   const opus_int16 *eBands = m->eBands;
@@ -184,18 +202,39 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, cel
      celt_sig * OPUS_RESTRICT f;
      const celt_norm * OPUS_RESTRICT x;
      f = freq+c*N;
-      x = X+c*N;
+      x = X+c*N+M*eBands[start];
-      for (i=0;i<end;i++)
+      for (i=0;i<M*eBands[start];i++)
+         *f++ = 0;
+      for (i=start;i<end;i++)
      {
         int j, band_end;
-         opus_val32 g = SHR32(bandE[i+c*m->nbEBands],1);
+         opus_val16 g;
+         opus_val16 lg;
+#ifdef FIXED_POINT
+         int shift;
+#endif
         j=M*eBands[i];
         band_end = M*eBands[i+1];
+         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+#ifdef FIXED_POINT
+         /* Handle the integer part of the log energy */
+         shift = 16-(lg>>DB_SHIFT);
+         if (shift>31)
+         {
+            shift=0;
+            g=0;
+         } else {
+            /* Handle the fractional part. */
+            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+         }
+#else
+         g = celt_exp2(lg);
+#endif
         do {
-            *f++ = SHL32(MULT16_32_Q15(*x, g),2);
+            *f++ = SHR32(MULT16_16(*x++, g), shift);
-            x++;
         } while (++j<band_end);
      }
+      celt_assert(start <= end);
      for (i=M*eBands[end];i<N;i++)
         *f++ = 0;
   } while (++c<C);
@@ -347,11 +386,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
   opus_val32 t, lgain, rgain;
   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
-   for (j=0;j<N;j++)
+   dual_inner_prod(Y, X, Y, N, &xp, &side);
-   {
-      xp = MAC16_16(xp, X[j], Y[j]);
-      side = MAC16_16(side, Y[j], Y[j]);
-   }
   /* Compensating for the mid normalization */
   xp = MULT16_32_Q15(mid, xp);
   /* mid and side are in Q15, not Q14 like X and Y */
@@ -487,50 +522,6 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
 }
 #endif
-#ifdef MEASURE_NORM_MSE
-float MSE[30] = {0};
-int nbMSEBands = 0;
-int MSECount[30] = {0};
-void dump_norm_mse(void)
-{
-   int i;
-   for (i=0;i<nbMSEBands;i++)
-   {
-      printf ("%g ", MSE[i]/MSECount[i]);
-   }
-   printf ("\n");
-}
-void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C)
-{
-   static int init = 0;
-   int i;
-   if (!init)
-   {
-      atexit(dump_norm_mse);
-      init = 1;
-   }
-   for (i=0;i<m->nbEBands;i++)
-   {
-      int j;
-      int c;
-      float g;
-      if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1))
-         continue;
-      c=0; do {
-         g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]);
-         for (j=M*m->eBands[i];j<M*m->eBands[i+1];j++)
-            MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]);
-      } while (++c<C);
-      MSECount[i]+=C;
-   }
-   nbMSEBands = m->nbEBands;
-}
-#endif
 /* Indexing table for converting from natural Hadamard to ordery Hadamard
   This is essentially a bit-reversed Gray, on top of which we've added
   an inversion of the order because we want the DC at the end rather than
@@ -633,289 +624,304 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
   return qn;
 }
-/* This function is responsible for encoding and decoding a band for both
+struct band_ctx {
-   the mono and stereo case. Even in the mono case, it can split the band
+   int encode;
-   in two and transmit the energy difference with the two half-bands. It
+   const CELTMode *m;
-   can be called recursively so bands can end up being split in 8 parts. */
+   int i;
-static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,
+   int intensity;
-      int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, ec_ctx *ec,
+   int spread;
-      opus_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level,
+   int tf_change;
-      opus_uint32 *seed, opus_val16 gain, celt_norm *lowband_scratch, int fill)
+   ec_ctx *ec;
+   opus_int32 remaining_bits;
+   const celt_ener *bandE;
+   opus_uint32 seed;
+};
+struct split_ctx {
+   int inv;
+   int imid;
+   int iside;
+   int delta;
+   int itheta;
+   int qalloc;
+};
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+      int LM,
+      int stereo, int *fill)
 {
-   const unsigned char *cache;
+   int qn;
-   int q;
+   int itheta=0;
-   int curr_bits;
+   int delta;
-   int stereo, split;
+   int imid, iside;
-   int imid=0, iside=0;
+   int qalloc;
-   int N0=N;
+   int pulse_cap;
-   int N_B=N;
+   int offset;
-   int N_B0;
+   opus_int32 tell;
-   int B0=B;
+   int inv=0;
-   int time_divide=0;
+   int encode;
-   int recombine=0;
+   const CELTMode *m;
-   int inv = 0;
+   int i;
-   opus_val16 mid=0, side=0;
+   int intensity;
-   int longBlocks;
+   ec_ctx *ec;
-   unsigned cm=0;
+   const celt_ener *bandE;
-#ifdef RESYNTH
-   int resynth = 1;
+   encode = ctx->encode;
-#else
+   m = ctx->m;
-   int resynth = !encode;
+   i = ctx->i;
-#endif
+   intensity = ctx->intensity;
+   ec = ctx->ec;
+   bandE = ctx->bandE;
+   /* Decide on the resolution to give to the split parameter theta */
+   pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+   if (stereo && i>=intensity)
+      qn = 1;
+   if (encode)
+   {
+      /* theta is the atan() of the ratio between the (normalized)
+         side and mid. With just that parameter, we can re-scale both
+         mid and side because we know that 1) they have unit norm and
+         2) they are orthogonal. */
+      itheta = stereo_itheta(X, Y, stereo, N);
+   }
+   tell = ec_tell_frac(ec);
+   if (qn!=1)
+   {
+      if (encode)
+         itheta = (itheta*qn+8192)>>14;
-   longBlocks = B0==1;
+      /* Entropy coding of the angle. We use a uniform pdf for the
+         time split, a step for stereo, and a triangular one for the rest. */
+      if (stereo && N>2)
+      {
+         int p0 = 3;
+         int x = itheta;
+         int x0 = qn/2;
+         int ft = p0*(x0+1) + x0;
+         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+         if (encode)
+         {
+            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+         } else {
+            int fs;
+            fs=ec_decode(ec,ft);
+            if (fs<(x0+1)*p0)
+               x=fs/p0;
+            else
+               x=x0+1+(fs-(x0+1)*p0);
+            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+            itheta = x;
+         }
+      } else if (B0>1 || stereo) {
+         /* Uniform pdf */
+         if (encode)
+            ec_enc_uint(ec, itheta, qn+1);
+         else
+            itheta = ec_dec_uint(ec, qn+1);
+      } else {
+         int fs=1, ft;
+         ft = ((qn>>1)+1)*((qn>>1)+1);
+         if (encode)
+         {
+            int fl;
-   N_B /= B;
+            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
-   N_B0 = N_B;
+            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
-   split = stereo = Y != NULL;
+            ec_encode(ec, fl, fl+fs, ft);
+         } else {
+            /* Triangular pdf */
+            int fl=0;
+            int fm;
+            fm = ec_decode(ec, ft);
-   /* Special case for one sample */
+            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
-   if (N==1)
-   {
-      int c;
-      celt_norm *x = X;
-      c=0; do {
-         int sign=0;
-         if (*remaining_bits>=1<<BITRES)
-         {
-            if (encode)
            {
-               sign = x[0]<0;
+               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
-               ec_enc_bits(ec, sign, 1);
+               fs = itheta + 1;
-            } else {
+               fl = itheta*(itheta + 1)>>1;
-               sign = ec_dec_bits(ec, 1);
+            }
+            else
+            {
+               itheta = (2*(qn + 1)
+                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+               fs = qn + 1 - itheta;
+               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
            }
-            *remaining_bits -= 1<<BITRES;
-            b-=1<<BITRES;
-         }
-         if (resynth)
-            x[0] = sign ? -NORM_SCALING : NORM_SCALING;
-         x = Y;
-      } while (++c<1+stereo);
-      if (lowband_out)
-         lowband_out[0] = SHR16(X[0],4);
-      return 1;
-   }
-   if (!stereo && level == 0)
-   {
-      int k;
-      if (tf_change>0)
-         recombine = tf_change;
-      /* Band recombining to increase frequency resolution */
-      if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+            ec_dec_update(ec, fl, fl+fs, ft);
-      {
+         }
-         int j;
-         for (j=0;j<N;j++)
-            lowband_scratch[j] = lowband[j];
-         lowband = lowband_scratch;
      }
+      itheta = (opus_int32)itheta*16384/qn;
-      for (k=0;k<recombine;k++)
+      if (encode && stereo)
      {
-         static const unsigned char bit_interleave_table[16]={
+         if (itheta==0)
-           0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+            intensity_stereo(m, X, Y, bandE, i, N);
-         };
+         else
-         if (encode)
+            stereo_split(X, Y, N);
-            haar1(X, N>>k, 1<<k);
-         if (lowband)
-            haar1(lowband, N>>k, 1<<k);
-         fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
      }
-      B>>=recombine;
+      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
-      N_B<<=recombine;
+               Let's do that at higher complexity */
+   } else if (stereo) {
-      /* Increasing the time resolution */
+      if (encode)
-      while ((N_B&1) == 0 && tf_change<0)
      {
-         if (encode)
+         inv = itheta > 8192;
-            haar1(X, N_B, B);
+         if (inv)
-         if (lowband)
+         {
-            haar1(lowband, N_B, B);
+            int j;
-         fill |= fill<<B;
+            for (j=0;j<N;j++)
-         B <<= 1;
+               Y[j] = -Y[j];
-         N_B >>= 1;
+         }
-         time_divide++;
+         intensity_stereo(m, X, Y, bandE, i, N);
-         tf_change++;
      }
-      B0=B;
+      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
-      N_B0 = N_B;
-      /* Reorganize the samples in time order instead of frequency order */
-      if (B0>1)
      {
         if (encode)
-            deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+            ec_enc_bit_logp(ec, inv, 2);
-         if (lowband)
+         else
-            deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+            inv = ec_dec_bit_logp(ec, 2);
-      }
+      } else
+         inv = 0;
+      itheta = 0;
   }
+   qalloc = ec_tell_frac(ec) - tell;
+   *b -= qalloc;
-   /* If we need 1.5 more bit than we can produce, split the band in two. */
+   if (itheta == 0)
-   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
-   if (!stereo && LM != -1 && b > cache[cache[0]]+12 && N>2)
   {
-      N >>= 1;
+      imid = 32767;
-      Y = X+N;
+      iside = 0;
-      split = 1;
+      *fill &= (1<<B)-1;
-      LM -= 1;
+      delta = -16384;
-      if (B==1)
+   } else if (itheta == 16384)
-         fill = (fill&1)|(fill<<1);
+   {
-      B = (B+1)>>1;
+      imid = 0;
+      iside = 32767;
+      *fill &= ((1<<B)-1)<<B;
+      delta = 16384;
+   } else {
+      imid = bitexact_cos((opus_int16)itheta);
+      iside = bitexact_cos((opus_int16)(16384-itheta));
+      /* This is the mid vs side allocation that minimizes squared error
+         in that band. */
+      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
   }
-   if (split)
+   sctx->inv = inv;
-   {
+   sctx->imid = imid;
-      int qn;
+   sctx->iside = iside;
-      int itheta=0;
+   sctx->delta = delta;
-      int mbits, sbits, delta;
+   sctx->itheta = itheta;
-      int qalloc;
+   sctx->qalloc = qalloc;
-      int pulse_cap;
+}
-      int offset;
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
-      int orig_fill;
+      celt_norm *lowband_out)
-      opus_int32 tell;
+{
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int c;
+   int stereo;
+   celt_norm *x = X;
+   int encode;
+   ec_ctx *ec;
-      /* Decide on the resolution to give to the split parameter theta */
+   encode = ctx->encode;
-      pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   ec = ctx->ec;
-      offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
-      qn = compute_qn(N, b, offset, pulse_cap, stereo);
+   stereo = Y != NULL;
-      if (stereo && i>=intensity)
+   c=0; do {
-         qn = 1;
+      int sign=0;
-      if (encode)
+      if (ctx->remaining_bits>=1<<BITRES)
-      {
-         /* theta is the atan() of the ratio between the (normalized)
-            side and mid. With just that parameter, we can re-scale both
-            mid and side because we know that 1) they have unit norm and
-            2) they are orthogonal. */
-         itheta = stereo_itheta(X, Y, stereo, N);
-      }
-      tell = ec_tell_frac(ec);
-      if (qn!=1)
      {
         if (encode)
-            itheta = (itheta*qn+8192)>>14;
-         /* Entropy coding of the angle. We use a uniform pdf for the
-            time split, a step for stereo, and a triangular one for the rest. */
-         if (stereo && N>2)
         {
-            int p0 = 3;
+            sign = x[0]<0;
-            int x = itheta;
+            ec_enc_bits(ec, sign, 1);
-            int x0 = qn/2;
-            int ft = p0*(x0+1) + x0;
-            /* Use a probability of p0 up to itheta=8192 and then use 1 after */
-            if (encode)
-            {
-               ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
-            } else {
-               int fs;
-               fs=ec_decode(ec,ft);
-               if (fs<(x0+1)*p0)
-                  x=fs/p0;
-               else
-                  x=x0+1+(fs-(x0+1)*p0);
-               ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
-               itheta = x;
-            }
-         } else if (B0>1 || stereo) {
-            /* Uniform pdf */
-            if (encode)
-               ec_enc_uint(ec, itheta, qn+1);
-            else
-               itheta = ec_dec_uint(ec, qn+1);
         } else {
-            int fs=1, ft;
+            sign = ec_dec_bits(ec, 1);
-            ft = ((qn>>1)+1)*((qn>>1)+1);
+         }
-            if (encode)
+         ctx->remaining_bits -= 1<<BITRES;
-            {
+         b-=1<<BITRES;
-               int fl;
+      }
+      if (resynth)
+         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+      x = Y;
+   } while (++c<1+stereo);
+   if (lowband_out)
+      lowband_out[0] = SHR16(X[0],4);
+   return 1;
+}
-               fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+/* This function is responsible for encoding and decoding a mono partition.
-               fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+   It can split the band in two and transmit the energy difference with
-                ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+   the two half-bands. It can be called recursively so bands can end up being
+   split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM,
+      opus_val16 gain, int fill)
+{
+   const unsigned char *cache;
+   int q;
+   int curr_bits;
+   int imid=0, iside=0;
+   int N_B=N;
+   int B0=B;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   celt_norm *Y=NULL;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int spread;
+   ec_ctx *ec;
-               ec_encode(ec, fl, fl+fs, ft);
+   encode = ctx->encode;
-            } else {
+   m = ctx->m;
-               /* Triangular pdf */
+   i = ctx->i;
-               int fl=0;
+   spread = ctx->spread;
-               int fm;
+   ec = ctx->ec;
-               fm = ec_decode(ec, ft);
-               if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+   N_B /= B;
-               {
-                  itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
-                  fs = itheta + 1;
-                  fl = itheta*(itheta + 1)>>1;
-               }
-               else
-               {
-                  itheta = (2*(qn + 1)
-                   - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
-                  fs = qn + 1 - itheta;
-                  fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
-               }
-               ec_dec_update(ec, fl, fl+fs, ft);
+   /* If we need 1.5 more bit than we can produce, split the band in two. */
-            }
+   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
-         }
+   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
-         itheta = (opus_int32)itheta*16384/qn;
+   {
-         if (encode && stereo)
+      int mbits, sbits, delta;
-         {
+      int itheta;
-            if (itheta==0)
+      int qalloc;
-               intensity_stereo(m, X, Y, bandE, i, N);
+      struct split_ctx sctx;
-            else
+      celt_norm *next_lowband2=NULL;
-               stereo_split(X, Y, N);
+      opus_int32 rebalance;
-         }
-         /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
-                  Let's do that at higher complexity */
-      } else if (stereo) {
-         if (encode)
-         {
-            inv = itheta > 8192;
-            if (inv)
-            {
-               int j;
-               for (j=0;j<N;j++)
-                  Y[j] = -Y[j];
-            }
-            intensity_stereo(m, X, Y, bandE, i, N);
-         }
-         if (b>2<<BITRES && *remaining_bits > 2<<BITRES)
-         {
-            if (encode)
-               ec_enc_bit_logp(ec, inv, 2);
-            else
-               inv = ec_dec_bit_logp(ec, 2);
-         } else
-            inv = 0;
-         itheta = 0;
-      }
-      qalloc = ec_tell_frac(ec) - tell;
-      b -= qalloc;
-      orig_fill = fill;
+      N >>= 1;
-      if (itheta == 0)
+      Y = X+N;
-      {
+      LM -= 1;
-         imid = 32767;
+      if (B==1)
-         iside = 0;
+         fill = (fill&1)|(fill<<1);
-         fill &= (1<<B)-1;
+      B = (B+1)>>1;
-         delta = -16384;
-      } else if (itheta == 16384)
-      {
-         imid = 0;
-         iside = 32767;
-         fill &= ((1<<B)-1)<<B;
-         delta = 16384;
-      } else {
-         imid = bitexact_cos(itheta);
-         iside = bitexact_cos(16384-itheta);
-         /* This is the mid vs side allocation that minimizes squared error
-            in that band. */
-         delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
-      }
+      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
+            LM, 0, &fill);
+      imid = sctx.imid;
+      iside = sctx.iside;
+      delta = sctx.delta;
+      itheta = sctx.itheta;
+      qalloc = sctx.qalloc;
 #ifdef FIXED_POINT
      mid = imid;
      side = iside;
@@ -924,136 +930,59 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
      side = (1.f/32768)*iside;
 #endif
-      /* This is a special case for N=2 that only works for stereo and takes
+      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
-         advantage of the fact that mid and side are orthogonal to encode
+      if (B0>1 && (itheta&0x3fff))
-         the side with just one bit. */
-      if (N==2 && stereo)
      {
-         int c;
+         if (itheta > 8192)
-         int sign=0;
+            /* Rough approximation for pre-echo masking */
-         celt_norm *x2, *y2;
+            delta -= delta>>(4-LM);
-         mbits = b;
-         sbits = 0;
-         /* Only need one bit for the side */
-         if (itheta != 0 && itheta != 16384)
-            sbits = 1<<BITRES;
-         mbits -= sbits;
-         c = itheta > 8192;
-         *remaining_bits -= qalloc+sbits;
-         x2 = c ? Y : X;
-         y2 = c ? X : Y;
-         if (sbits)
-         {
-            if (encode)
-            {
-               /* Here we only need to encode a sign for the side */
-               sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
-               ec_enc_bits(ec, sign, 1);
-            } else {
-               sign = ec_dec_bits(ec, 1);
-            }
-         }
-         sign = 1-2*sign;
-         /* We use orig_fill here because we want to fold the side, but if
-             itheta==16384, we'll have cleared the low bits of fill. */
-         cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill);
-         /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
-             and there's no need to worry about mixing with the other channel. */
-         y2[0] = -sign*x2[1];
-         y2[1] = sign*x2[0];
-         if (resynth)
-         {
-            celt_norm tmp;
-            X[0] = MULT16_16_Q15(mid, X[0]);
-            X[1] = MULT16_16_Q15(mid, X[1]);
-            Y[0] = MULT16_16_Q15(side, Y[0]);
-            Y[1] = MULT16_16_Q15(side, Y[1]);
-            tmp = X[0];
-            X[0] = SUB16(tmp,Y[0]);
-            Y[0] = ADD16(tmp,Y[0]);
-            tmp = X[1];
-            X[1] = SUB16(tmp,Y[1]);
-            Y[1] = ADD16(tmp,Y[1]);
-         }
-      } else {
-         /* "Normal" split code */
-         celt_norm *next_lowband2=NULL;
-         celt_norm *next_lowband_out1=NULL;
-         int next_level=0;
-         opus_int32 rebalance;
-         /* Give more bits to low-energy MDCTs than they would otherwise deserve */
-         if (B0>1 && !stereo && (itheta&0x3fff))
-         {
-            if (itheta > 8192)
-               /* Rough approximation for pre-echo masking */
-               delta -= delta>>(4-LM);
-            else
-               /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
-               delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
-         }
-         mbits = IMAX(0, IMIN(b, (b-delta)/2));
-         sbits = b-mbits;
-         *remaining_bits -= qalloc;
-         if (lowband && !stereo)
-            next_lowband2 = lowband+N; /* >32-bit split case */
-         /* Only stereo needs to pass on lowband_out. Otherwise, it's
-            handled at the end */
-         if (stereo)
-            next_lowband_out1 = lowband_out;
         else
-            next_level = level+1;
+            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
-         rebalance = *remaining_bits;
-         if (mbits >= sbits)
-         {
-            /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
-               mid for folding later */
-            cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
-                  lowband, ec, remaining_bits, LM, next_lowband_out1,
-                  NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
-            rebalance = mbits - (rebalance-*remaining_bits);
-            if (rebalance > 3<<BITRES && itheta!=0)
-               sbits += rebalance - (3<<BITRES);
-            /* For a stereo split, the high bits of fill are always zero, so no
-               folding will be done to the side. */
-            cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
-                  next_lowband2, ec, remaining_bits, LM, NULL,
-                  NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1));
-         } else {
-            /* For a stereo split, the high bits of fill are always zero, so no
-               folding will be done to the side. */
-            cm = quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
-                  next_lowband2, ec, remaining_bits, LM, NULL,
-                  NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1));
-            rebalance = sbits - (rebalance-*remaining_bits);
-            if (rebalance > 3<<BITRES && itheta!=16384)
-               mbits += rebalance - (3<<BITRES);
-            /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
-               mid for folding later */
-            cm |= quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
-                  lowband, ec, remaining_bits, LM, next_lowband_out1,
-                  NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
-         }
      }
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+      if (lowband)
+         next_lowband2 = lowband+N; /* >32-bit split case */
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         cm = quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+      } else {
+         cm = quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+      }
   } else {
      /* This is the basic no-split case */
      q = bits2pulses(m, i, LM, b);
      curr_bits = pulses2bits(m, i, LM, q);
-      *remaining_bits -= curr_bits;
+      ctx->remaining_bits -= curr_bits;
      /* Ensures we can never bust the budget */
-      while (*remaining_bits < 0 && q > 0)
+      while (ctx->remaining_bits < 0 && q > 0)
      {
-         *remaining_bits += curr_bits;
+         ctx->remaining_bits += curr_bits;
         q--;
         curr_bits = pulses2bits(m, i, LM, q);
-         *remaining_bits -= curr_bits;
+         ctx->remaining_bits -= curr_bits;
      }
      if (q!=0)
@@ -1077,7 +1006,7 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
         if (resynth)
         {
            unsigned cm_mask;
-            /*B can be as large as 16, so this shift might overflow an int on a
+            /* B can be as large as 16, so this shift might overflow an int on a
               16-bit platform; use a long to get defined behavior.*/
            cm_mask = (unsigned)(1UL<<B)-1;
            fill &= cm_mask;
@@ -1091,8 +1020,8 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
                  /* Noise */
                  for (j=0;j<N;j++)
                  {
-                     *seed = celt_lcg_rand(*seed);
+                     ctx->seed = celt_lcg_rand(ctx->seed);
-                     X[j] = (celt_norm)((opus_int32)*seed>>20);
+                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
                  }
                  cm = cm_mask;
               } else {
@@ -1100,10 +1029,10 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
                  for (j=0;j<N;j++)
                  {
                     opus_val16 tmp;
-                     *seed = celt_lcg_rand(*seed);
+                     ctx->seed = celt_lcg_rand(ctx->seed);
                     /* About 48 dB below the "normal" folding level */
                     tmp = QCONST16(1.0f/256, 10);
-                     tmp = (*seed)&0x8000 ? tmp : -tmp;
+                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
                     X[j] = lowband[j]+tmp;
                  }
                  cm = fill;
@@ -1114,64 +1043,307 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
      }
   }
+   return cm;
+}
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+   int N0=N;
+   int N_B=N;
+   int N_B0;
+   int B0=B;
+   int time_divide=0;
+   int recombine=0;
+   int longBlocks;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int k;
+   int encode;
+   int tf_change;
+   encode = ctx->encode;
+   tf_change = ctx->tf_change;
+   longBlocks = B0==1;
+   N_B /= B;
+   N_B0 = N_B;
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, NULL, b, lowband_out);
+   }
+   if (tf_change>0)
+      recombine = tf_change;
+   /* Band recombining to increase frequency resolution */
+   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+   {
+      int j;
+      for (j=0;j<N;j++)
+         lowband_scratch[j] = lowband[j];
+      lowband = lowband_scratch;
+   }
+   for (k=0;k<recombine;k++)
+   {
+      static const unsigned char bit_interleave_table[16]={
+            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+      };
+      if (encode)
+         haar1(X, N>>k, 1<<k);
+      if (lowband)
+         haar1(lowband, N>>k, 1<<k);
+      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+   }
+   B>>=recombine;
+   N_B<<=recombine;
+   /* Increasing the time resolution */
+   while ((N_B&1) == 0 && tf_change<0)
+   {
+      if (encode)
+         haar1(X, N_B, B);
+      if (lowband)
+         haar1(lowband, N_B, B);
+      fill |= fill<<B;
+      B <<= 1;
+      N_B >>= 1;
+      time_divide++;
+      tf_change++;
+   }
+   B0=B;
+   N_B0 = N_B;
+   /* Reorganize the samples in time order instead of frequency order */
+   if (B0>1)
+   {
+      if (encode)
+         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      if (lowband)
+         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+   }
+   cm = quant_partition(ctx, X, N, b, B, lowband,
+         LM, gain, fill);
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
   if (resynth)
   {
-      if (stereo)
+      /* Undo the sample reorganization going from time order to frequency order */
+      if (B0>1)
+         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      /* Undo time-freq changes that we did earlier */
+      N_B = N_B0;
+      B = B0;
+      for (k=0;k<time_divide;k++)
      {
-         if (N!=2)
+         B >>= 1;
-            stereo_merge(X, Y, mid, N);
+         N_B <<= 1;
-         if (inv)
+         cm |= cm>>B;
-         {
+         haar1(X, N_B, B);
-            int j;
+      }
-            for (j=0;j<N;j++)
-               Y[j] = -Y[j];
+      for (k=0;k<recombine;k++)
-         }
-      } else if (level == 0)
      {
-         int k;
+         static const unsigned char bit_deinterleave_table[16]={
+               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+         };
+         cm = bit_deinterleave_table[cm];
+         haar1(X, N0>>k, 1<<k);
+      }
+      B<<=recombine;
-         /* Undo the sample reorganization going from time order to frequency order */
+      /* Scale output for later folding */
-         if (B0>1)
+      if (lowband_out)
-            interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      {
+         int j;
+         opus_val16 n;
+         n = celt_sqrt(SHL32(EXTEND32(N0),22));
+         for (j=0;j<N0;j++)
+            lowband_out[j] = MULT16_16_Q15(n,X[j]);
+      }
+      cm &= (1<<B)-1;
+   }
+   return cm;
+}
-         /* Undo time-freq changes that we did earlier */
-         N_B = N_B0;
-         B = B0;
-         for (k=0;k<time_divide;k++)
-         {
-            B >>= 1;
-            N_B <<= 1;
-            cm |= cm>>B;
-            haar1(X, N_B, B);
-         }
-         for (k=0;k<recombine;k++)
+/* This function is responsible for encoding and decoding a band for the stereo case. */
-         {
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
-            static const unsigned char bit_deinterleave_table[16]={
+      int N, int b, int B, celt_norm *lowband,
-              0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+      int LM, celt_norm *lowband_out,
-              0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+      celt_norm *lowband_scratch, int fill)
-            };
+{
-            cm = bit_deinterleave_table[cm];
+   int imid=0, iside=0;
-            haar1(X, N0>>k, 1<<k);
+   int inv = 0;
-         }
+   opus_val16 mid=0, side=0;
-         B<<=recombine;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int mbits, sbits, delta;
+   int itheta;
+   int qalloc;
+   struct split_ctx sctx;
+   int orig_fill;
+   int encode;
+   ec_ctx *ec;
+   encode = ctx->encode;
+   ec = ctx->ec;
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, Y, b, lowband_out);
+   }
+   orig_fill = fill;
+   compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
+         LM, 1, &fill);
+   inv = sctx.inv;
+   imid = sctx.imid;
+   iside = sctx.iside;
+   delta = sctx.delta;
+   itheta = sctx.itheta;
+   qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+   mid = imid;
+   side = iside;
+#else
+   mid = (1.f/32768)*imid;
+   side = (1.f/32768)*iside;
+#endif
-         /* Scale output for later folding */
+   /* This is a special case for N=2 that only works for stereo and takes
-         if (lowband_out)
+      advantage of the fact that mid and side are orthogonal to encode
+      the side with just one bit. */
+   if (N==2)
+   {
+      int c;
+      int sign=0;
+      celt_norm *x2, *y2;
+      mbits = b;
+      sbits = 0;
+      /* Only need one bit for the side. */
+      if (itheta != 0 && itheta != 16384)
+         sbits = 1<<BITRES;
+      mbits -= sbits;
+      c = itheta > 8192;
+      ctx->remaining_bits -= qalloc+sbits;
+      x2 = c ? Y : X;
+      y2 = c ? X : Y;
+      if (sbits)
+      {
+         if (encode)
         {
-            int j;
+            /* Here we only need to encode a sign for the side. */
-            opus_val16 n;
+            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
-            n = celt_sqrt(SHL32(EXTEND32(N0),22));
+            ec_enc_bits(ec, sign, 1);
-            for (j=0;j<N0;j++)
+         } else {
-               lowband_out[j] = MULT16_16_Q15(n,X[j]);
+            sign = ec_dec_bits(ec, 1);
         }
-         cm &= (1<<B)-1;
+      }
+      sign = 1-2*sign;
+      /* We use orig_fill here because we want to fold the side, but if
+         itheta==16384, we'll have cleared the low bits of fill. */
+      cm = quant_band(ctx, x2, N, mbits, B, lowband,
+            LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
+      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+         and there's no need to worry about mixing with the other channel. */
+      y2[0] = -sign*x2[1];
+      y2[1] = sign*x2[0];
+      if (resynth)
+      {
+         celt_norm tmp;
+         X[0] = MULT16_16_Q15(mid, X[0]);
+         X[1] = MULT16_16_Q15(mid, X[1]);
+         Y[0] = MULT16_16_Q15(side, Y[0]);
+         Y[1] = MULT16_16_Q15(side, Y[1]);
+         tmp = X[0];
+         X[0] = SUB16(tmp,Y[0]);
+         Y[0] = ADD16(tmp,Y[0]);
+         tmp = X[1];
+         X[1] = SUB16(tmp,Y[1]);
+         Y[1] = ADD16(tmp,Y[1]);
+      }
+   } else {
+      /* "Normal" split code */
+      opus_int32 rebalance;
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm = quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm |= quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+      } else {
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm = quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm |= quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+      }
+   }
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      if (N!=2)
+         stereo_merge(X, Y, mid, N);
+      if (inv)
+      {
+         int j;
+         for (j=0;j<N;j++)
+            Y[j] = -Y[j];
      }
   }
   return cm;
 }
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
@@ -1182,27 +1354,41 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
   VARDECL(celt_norm, _norm);
-   VARDECL(celt_norm, lowband_scratch);
+   celt_norm *lowband_scratch;
   int B;
   int M;
   int lowband_offset;
   int update_lowband = 1;
   int C = Y_ != NULL ? 2 : 1;
+   int norm_offset;
 #ifdef RESYNTH
   int resynth = 1;
 #else
   int resynth = !encode;
 #endif
+   struct band_ctx ctx;
   SAVE_STACK;
   M = 1<<LM;
   B = shortBlocks ? M : 1;
-   ALLOC(_norm, C*M*eBands[m->nbEBands], celt_norm);
+   norm_offset = M*eBands[start];
-   ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm);
+   /* No need to allocate norm for the last band because we don't need an
+      output in that band. */
+   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
   norm = _norm;
-   norm2 = norm + M*eBands[m->nbEBands];
+   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+   /* We can use the last band as scratch space because we don't need that
+      scratch space for the last band. */
+   lowband_scratch = X_+M*eBands[m->nbEBands-1];
   lowband_offset = 0;
+   ctx.bandE = bandE;
+   ctx.ec = ec;
+   ctx.encode = encode;
+   ctx.intensity = intensity;
+   ctx.m = m;
+   ctx.seed = *seed;
+   ctx.spread = spread;
   for (i=start;i<end;i++)
   {
      opus_int32 tell;
@@ -1214,6 +1400,10 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      int tf_change=0;
      unsigned x_cm;
      unsigned y_cm;
+      int last;
+      ctx.i = i;
+      last = (i==end-1);
      X = X_+M*eBands[i];
      if (Y_!=NULL)
@@ -1227,6 +1417,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      if (i != start)
         balance -= tell;
      remaining_bits = total_bits-tell-1;
+      ctx.remaining_bits = remaining_bits;
      if (i <= codedBands-1)
      {
         curr_balance = balance / IMIN(3, codedBands-i);
@@ -1239,26 +1430,30 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
            lowband_offset = i;
      tf_change = tf_res[i];
+      ctx.tf_change = tf_change;
      if (i>=m->effEBands)
      {
         X=norm;
         if (Y_!=NULL)
            Y = norm;
+         lowband_scratch = NULL;
      }
+      if (i==end-1)
+         lowband_scratch = NULL;
      /* Get a conservative estimate of the collapse_mask's for the bands we're
-          going to be folding from. */
+         going to be folding from. */
      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
      {
         int fold_start;
         int fold_end;
         int fold_i;
         /* This ensures we never repeat spectral content within one band */
-         effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);
+         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
         fold_start = lowband_offset;
-         while(M*eBands[--fold_start] > effective_lowband);
+         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
         fold_end = lowband_offset-1;
-         while(M*eBands[++fold_end] < effective_lowband+N);
+         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
         x_cm = y_cm = 0;
         fold_i = fold_start; do {
           x_cm |= collapse_masks[fold_i*C+0];
@@ -1266,7 +1461,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
         } while (++fold_i<fold_end);
      }
      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
-          always) be non-zero.*/
+         always) be non-zero. */
      else
         x_cm = y_cm = (1<<B)-1;
@@ -1274,33 +1469,42 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      {
         int j;
-         /* Switch off dual stereo to do intensity */
+         /* Switch off dual stereo to do intensity. */
         dual_stereo = 0;
         if (resynth)
-            for (j=M*eBands[start];j<M*eBands[i];j++)
+            for (j=0;j<M*eBands[i]-norm_offset;j++)
               norm[j] = HALF32(norm[j]+norm2[j]);
      }
      if (dual_stereo)
      {
-         x_cm = quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change,
+         x_cm = quant_band(&ctx, X, N, b/2, B,
-               effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM,
+               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
-               norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm);
+               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
-         y_cm = quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change,
+         y_cm = quant_band(&ctx, Y, N, b/2, B,
-               effective_lowband != -1 ? norm2+effective_lowband : NULL, ec, &remaining_bits, LM,
+               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
-               norm2+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, y_cm);
+               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
      } else {
-         x_cm = quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change,
+         if (Y!=NULL)
-               effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM,
+         {
-               norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm|y_cm);
+            x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+         } else {
+            x_cm = quant_band(&ctx, X, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+         }
         y_cm = x_cm;
      }
      collapse_masks[i*C+0] = (unsigned char)x_cm;
      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
      balance += pulses[i] + tell;
-      /* Update the folding position only as long as we have 1 bit/sample depth */
+      /* Update the folding position only as long as we have 1 bit/sample depth. */
      update_lowband = b>(N<<BITRES);
   }
+   *seed = ctx.seed;
   RESTORE_STACK;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.h b/lib/rbcodec/codecs/libopus/celt/bands.h
index 9ff8ffd7ba..96ba52a649 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/bands.h
@@ -39,7 +39,7 @@
 /** Compute the amplitude (sqrt energy) in each of the bands
 * @param m Mode data
 * @param X Spectrum
- * @param bands Square root of the energy for each band (returned)
+ * @param bandE Square root of the energy for each band (returned)
 */
 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
@@ -49,16 +49,17 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
    equal to 1
 * @param m Mode data
 * @param X Spectrum (returned normalised)
- * @param bands Square root of the energy for each band
+ * @param bandE Square root of the energy for each band
 */
 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
 /** Denormalise each band of X to restore full amplitude
 * @param m Mode data
 * @param X Spectrum (returned de-normalised)
- * @param bands Square root of the energy for each band
+ * @param bandE Square root of the energy for each band
 */
-void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, celt_sig * OPUS_RESTRICT freq, const celt_ener *bandE, int end, int C, int M);
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
 #define SPREAD_NONE       (0)
 #define SPREAD_LIGHT      (1)
@@ -76,14 +77,30 @@ void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, floa
 void haar1(celt_norm *X, int N0, int stride);
 /** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
 * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
 * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
 * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
- * @param enc Entropy encoder
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
 */
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
-      int time_domain, int fold, int dual_stereo, int intensity, int *tf_res,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
@@ -92,4 +109,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
 opus_uint32 celt_lcg_rand(opus_uint32 seed);
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
 #endif /* BANDS_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 52a66d1b68..3e0ce6e6a5 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #define CELT_C
@@ -50,62 +50,12 @@
 #include "celt_lpc.h"
 #include "vq.h"
-#ifndef OPUS_VERSION
+#ifndef PACKAGE_VERSION
-#define OPUS_VERSION "unknown"
+#define PACKAGE_VERSION "unknown"
 #endif
-#ifdef CUSTOM_MODES
-#define OPUS_CUSTOM_NOSTATIC
-#else
-#define OPUS_CUSTOM_NOSTATIC static inline
-#endif
-static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
-/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
-static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
-static const unsigned char tapset_icdf[3]={2,1,0};
-#ifdef CUSTOM_MODES
-static const unsigned char toOpusTable[20] = {
-      0xE0, 0xE8, 0xF0, 0xF8,
-      0xC0, 0xC8, 0xD0, 0xD8,
-      0xA0, 0xA8, 0xB0, 0xB8,
-      0x00, 0x00, 0x00, 0x00,
-      0x80, 0x88, 0x90, 0x98,
-};
-static const unsigned char fromOpusTable[16] = {
-      0x80, 0x88, 0x90, 0x98,
-      0x40, 0x48, 0x50, 0x58,
-      0x20, 0x28, 0x30, 0x38,
-      0x00, 0x08, 0x10, 0x18
-};
-static inline int toOpus(unsigned char c)
-{
-   int ret=0;
-   if (c<0xA0)
-      ret = toOpusTable[c>>3];
-   if (ret == 0)
-      return -1;
-   else
-      return ret|(c&0x7);
-}
-static inline int fromOpus(unsigned char c)
+int resampling_factor(opus_int32 rate)
-{
-   if (c<0x80)
-      return -1;
-   else
-      return fromOpusTable[(c>>3)-16] | (c&0x7);
-}
-#endif /* CUSTOM_MODES */
-#define COMBFILTER_MAXPERIOD 1024
-#define COMBFILTER_MINPERIOD 15
-static int resampling_factor(opus_int32 rate)
 {
   int ret;
   switch (rate)
@@ -135,693 +85,101 @@ static int resampling_factor(opus_int32 rate)
   return ret;
 }
-/** Encoder state
+#ifndef OVERRIDE_COMB_FILTER_CONST
- @brief Encoder state
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
- */
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
-struct OpusCustomEncoder {
-   const OpusCustomMode *mode;     /**< Mode used by the encoder */
-   int overlap;
-   int channels;
-   int stream_channels;
-   int force_intra;
-   int clip;
-   int disable_pf;
-   int complexity;
-   int upsample;
-   int start, end;
-   opus_int32 bitrate;
-   int vbr;
-   int signalling;
-   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
-   int loss_rate;
-   int lsb_depth;
-   /* Everything beyond this point gets cleared on a reset */
-#define ENCODER_RESET_START rng
-   opus_uint32 rng;
-   int spread_decision;
-   opus_val32 delayedIntra;
-   int tonal_average;
-   int lastCodedBands;
-   int hf_average;
-   int tapset_decision;
-   int prefilter_period;
-   opus_val16 prefilter_gain;
-   int prefilter_tapset;
-#ifdef RESYNTH
-   int prefilter_period_old;
-   opus_val16 prefilter_gain_old;
-   int prefilter_tapset_old;
-#endif
-   int consec_transient;
-   opus_val32 preemph_memE[2];
-   opus_val32 preemph_memD[2];
-   /* VBR-related parameters */
-   opus_int32 vbr_reservoir;
-   opus_int32 vbr_drift;
-   opus_int32 vbr_offset;
-   opus_int32 vbr_count;
-#ifdef RESYNTH
-   celt_sig syn_mem[2][2*MAX_PERIOD];
-#endif
-   celt_sig in_mem[1]; /* Size = channels*mode->overlap */
-   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_PERIOD */
-   /* celt_sig overlap_mem[],  Size = channels*mode->overlap */
-   /* opus_val16 oldEBands[], Size = 2*channels*mode->nbEBands */
-};
-#if 0
-int celt_encoder_get_size(int channels)
-{
-   CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
-   return opus_custom_encoder_get_size(mode, channels);
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
-{
-   int size = sizeof(struct CELTEncoder)
-         + (2*channels*mode->overlap-1)*sizeof(celt_sig)
-         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig)
-         + 3*channels*mode->nbEBands*sizeof(opus_val16);
-   return size;
-}
-#ifdef CUSTOM_MODES
-CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
-{
-   int ret;
-   CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
-   /* init will handle the NULL case */
-   ret = opus_custom_encoder_init(st, mode, channels);
-   if (ret != OPUS_OK)
-   {
-      opus_custom_encoder_destroy(st);
-      st = NULL;
-   }
-   if (error)
-      *error = ret;
-   return st;
-}
-#endif /* CUSTOM_MODES */
-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->upsample = resampling_factor(sampling_rate);
-   return OPUS_OK;
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
-{
-   if (channels < 0 || channels > 2)
-      return OPUS_BAD_ARG;
-   if (st==NULL || mode==NULL)
-      return OPUS_ALLOC_FAIL;
-   OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
-   st->mode = mode;
-   st->overlap = mode->overlap;
-   st->stream_channels = st->channels = channels;
-   st->upsample = 1;
-   st->start = 0;
-   st->end = st->mode->effEBands;
-   st->signalling = 1;
-   st->constrained_vbr = 1;
-   st->clip = 1;
-   st->bitrate = OPUS_BITRATE_MAX;
-   st->vbr = 0;
-   st->force_intra  = 0;
-   st->complexity = 5;
-   st->lsb_depth=24;
-   opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
-   return OPUS_OK;
-}
-#ifdef CUSTOM_MODES
-void opus_custom_encoder_destroy(CELTEncoder *st)
-{
-   opus_free(st);
-}
-#endif /* CUSTOM_MODES */
-#endif
-static inline opus_val16 SIG2WORD16(celt_sig x)
-{
-#ifdef FIXED_POINT
-   x = PSHR32(x, SIG_SHIFT);
-   x = MAX32(x, -32768);
-   x = MIN32(x, 32767);
-   return EXTRACT16(x);
-#else
-   return (opus_val16)x;
-#endif
-}
-#if 0
-static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
-                              int overlap)
 {
+   opus_val32 x0, x1, x2, x3, x4;
   int i;
-   VARDECL(opus_val16, tmp);
+   x4 = x[-T-2];
-   opus_val32 mem0=0,mem1=0;
+   x3 = x[-T-1];
-   int is_transient = 0;
+   x2 = x[-T];
-   int block;
+   x1 = x[-T+1];
-   int N;
-   VARDECL(opus_val16, bins);
-   SAVE_STACK;
-   ALLOC(tmp, len, opus_val16);
-   block = overlap/2;
-   N=len/block;
-   ALLOC(bins, N, opus_val16);
-   if (C==1)
-   {
-      for (i=0;i<len;i++)
-         tmp[i] = SHR32(in[i],SIG_SHIFT);
-   } else {
-      for (i=0;i<len;i++)
-         tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1);
-   }
-   /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
-   for (i=0;i<len;i++)
-   {
-      opus_val32 x,y;
-      x = tmp[i];
-      y = ADD32(mem0, x);
-#ifdef FIXED_POINT
-      mem0 = mem1 + y - SHL32(x,1);
-      mem1 = x - SHR32(y,1);
-#else
-      mem0 = mem1 + y - 2*x;
-      mem1 = x - .5f*y;
-#endif
-      tmp[i] = EXTRACT16(SHR32(y,2));
-   }
-   /* First few samples are bad because we don't propagate the memory */
-   for (i=0;i<12;i++)
-      tmp[i] = 0;
   for (i=0;i<N;i++)
   {
-      int j;
+      x0=x[i-T+2];
-      opus_val16 max_abs=0;
+      y[i] = x[i]
-      for (j=0;j<block;j++)
+               + MULT16_32_Q15(g10,x2)
-         max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
+               + MULT16_32_Q15(g11,ADD32(x1,x3))
-      bins[i] = max_abs;
+               + MULT16_32_Q15(g12,ADD32(x0,x4));
-   }
+      x4=x3;
-   for (i=0;i<N;i++)
+      x3=x2;
-   {
+      x2=x1;
-      int j;
+      x1=x0;
-      int conseq=0;
-      opus_val16 t1, t2, t3;
-      t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]);
-      t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      for (j=0;j<i;j++)
-      {
-         if (bins[j] < t1)
-            conseq++;
-         if (bins[j] < t2)
-            conseq++;
-         else
-            conseq = 0;
-      }
-      if (conseq>=3)
-         is_transient=1;
-      conseq = 0;
-      for (j=i+1;j<N;j++)
-      {
-         if (bins[j] < t3)
-            conseq++;
-         else
-            conseq = 0;
-      }
-      if (conseq>=7)
-         is_transient=1;
-   }
-   RESTORE_STACK;
-#ifdef FUZZING
-   is_transient = rand()&0x1;
-#endif
-   return is_transient;
-}
-/** Apply window and compute the MDCT for all sub-frames and
-    all channels in a frame */
-static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, celt_sig * OPUS_RESTRICT out, int C, int LM)
-{
-   if (C==1 && !shortBlocks)
-   {
-      const int overlap = OVERLAP(mode);
-      clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM, 1);
-   } else {
-      const int overlap = OVERLAP(mode);
-      int N = mode->shortMdctSize<<LM;
-      int B = 1;
-      int b, c;
-      if (shortBlocks)
-      {
-         N = mode->shortMdctSize;
-         B = shortBlocks;
-      }
-      c=0; do {
-         for (b=0;b<B;b++)
-         {
-            /* Interleaving the sub-frames while doing the MDCTs */
-            clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
-         }
-      } while (++c<C);
   }
-}
-#endif
-/** Compute the IMDCT and apply window for all sub-frames and
-    all channels in a frame */
-static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
-      celt_sig * OPUS_RESTRICT out_mem[],
-      celt_sig * OPUS_RESTRICT overlap_mem[], int C, int LM)
-{
-   int c;
-   const int N = mode->shortMdctSize<<LM;
-   const int overlap = OVERLAP(mode);
-   VARDECL(opus_val32, x);
-   SAVE_STACK;
-   ALLOC(x, N+overlap, opus_val32);
-   c=0; do {
-      int j;
-      int b;
-      int N2 = N;
-      int B = 1;
-      if (shortBlocks)
-      {
-         N2 = mode->shortMdctSize;
-         B = shortBlocks;
-      }
-      /* Prevents problems from the imdct doing the overlap-add */
-      OPUS_CLEAR(x, overlap);
-      for (b=0;b<B;b++)
-      {
-         /* IMDCT on the interleaved the sub-frames */
-         clt_mdct_backward(&mode->mdct, &X[b+c*N2*B], x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
-      }
-      /* overlap can be divided by 4 */
-      for (j=0;j<overlap;j+=4)
-      {
-         out_mem[c][j  ] = x[j  ] + overlap_mem[c][j  ];
-         out_mem[c][j+1] = x[j+1] + overlap_mem[c][j+1];
-         out_mem[c][j+2] = x[j+2] + overlap_mem[c][j+2];
-         out_mem[c][j+3] = x[j+3] + overlap_mem[c][j+3];
-      }
-      OPUS_COPY(out_mem[c]+overlap, x+overlap, N-overlap);
-      OPUS_COPY(overlap_mem[c]    , x+N      ,   overlap);
-   } while (++c<C);
-   RESTORE_STACK;
 }
-static void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, /* int downsample,*/ const opus_val16 *coef, celt_sig *mem)
-{
-   int c;
-/*   int count=0;*/
-   c=0; do {
-      int j;
-      celt_sig * OPUS_RESTRICT x;
-      opus_val16  * OPUS_RESTRICT y;
-      opus_val16 coef0 = coef[0];
-#ifdef CUSTOM_MODES
-      opus_val16 coef1 = coef[1];
-      opus_val16 coef3 = coef[3];
 #endif
-      celt_sig m = mem[c];
-      x =in[c];
-      y = pcm+c;
-      for (j=0;j<N;j++)
-      {
-         celt_sig tmp = *x + m;
-         m = MULT16_32_Q15(coef0, tmp);
-#ifdef CUSTOM_MODES
-         m -= MULT16_32_Q15(coef1, *x);
-         tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
-#endif
-         x++;
-         /* Technically the store could be moved outside of the if because
-            the stores we don't want will just be overwritten */
-         /* ROCKBOX: we don't downsample
-         if (count==0) */
-            *y = SCALEOUT(SIG2WORD16(tmp));
-         /* if (++count==downsample) */
-         {
-            y+=C;
-         /*   count=0; */
-         }
-      }
-      mem[c] = m;
-   } while (++c<C);
-}
-static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap)
 {
-   /* Multiply-adds are only needed if g0 or g1 are non-zero. In all other cases a simple
+   int i;
-    * copy of vector x to y is possible. */
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
-   if (g0!=0 || g1!=0)
+   opus_val16 g00, g01, g02, g10, g11, g12;
-   {
+   opus_val32 x0, x1, x2, x3, x4;
-      int i;
+   static const opus_val16 gains[3][3] = {
-      opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1;
-      static const opus_val16 gains[3][3] = {
         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
-      g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
-      g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
-      g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
-      g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
-      g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
-      g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
-      /* printf("g0 %d g1 %d\n", g0,g1); */
-      idx0 = -T0;
-      idx1 = -T1;
-      for (i=0;i<overlap;i++,idx0++,idx1++)
-      {
-         opus_val16 f0, f1;
-         f1 = MULT16_16_Q15(window[i],window[i]);
-         f0 = Q15ONE - f1;
-         y[i] = x[i]
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0-2])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g00), x[idx0  ])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0+2])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1-2])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g10), x[idx1  ])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1+2]);
-      }
-      /* No multiply-add required if g1=0 as all multiplicants are =0. */
-      if (g1!=0)
-      {
-         idx1 = overlap-T1;
-         for (i=overlap;i<N;i++,idx1++)
-         {
-            y[i] = x[i]
-                  + MULT16_32_Q15(g12, x[idx1-2])
-                  + MULT16_32_Q15(g11, x[idx1-1])
-                  + MULT16_32_Q15(g10, x[idx1  ])
-                  + MULT16_32_Q15(g11, x[idx1+1])
-                  + MULT16_32_Q15(g12, x[idx1+2]);
-         }
-      }
-      /* Only perform vector copy if source and destination are not same. */
-      else if (x != y)
-      {
-         /* Copy part of vector from x[overlap..N] to y[overlap..N] */
-         OPUS_COPY(y+overlap, x+overlap, N-overlap);
-      }
-   }
-   /* Only perform vector copy if source and destination are not same. */
-   else if (x != y)
-   {
-      /* Copy full vector from x[0..N] to y[0..N] */
-      OPUS_COPY(y, x, N);
-   }
-}
-static const signed char tf_select_table[4][8] = {
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      x0=x[i-T1+2];
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+   /* Compute the part with the constant filter. */
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+}
+const signed char tf_select_table[4][8] = {
      {0, -1, 0, -1,    0,-1, 0,-1},
      {0, -1, 0, -2,    1, 0, 1,-1},
      {0, -2, 0, -3,    2, 0, 1,-1},
      {0, -2, 0, -3,    3, 0, 1,-1},
 };
-#if 0
-static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
-{
-   int i, j;
-   static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)};
-   opus_val32 L1;
-   opus_val16 bias;
-   L1=0;
-   for (i=0;i<1<<LM;i++)
-   {
-      opus_val32 L2 = 0;
-      for (j=0;j<N>>LM;j++)
-         L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]);
-      L1 += celt_sqrt(L2);
-   }
-   L1 = MULT16_32_Q15(sqrtM_1[LM], L1);
-   if (width==1)
-      bias = QCONST16(.12f,15)*LM;
-   else if (width==2)
-      bias = QCONST16(.05f,15)*LM;
-   else
-      bias = QCONST16(.02f,15)*LM;
-   L1 = MAC16_32_Q15(L1, bias, L1);
-   return L1;
-}
-static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
-      int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM,
-      int *tf_sum)
-{
-   int i;
-   VARDECL(int, metric);
-   int cost0;
-   int cost1;
-   VARDECL(int, path0);
-   VARDECL(int, path1);
-   VARDECL(celt_norm, tmp);
-   int lambda;
-   int tf_select=0;
-   SAVE_STACK;
-   if (nbCompressedBytes<15*C)
-   {
-      *tf_sum = 0;
-      for (i=0;i<len;i++)
-         tf_res[i] = isTransient;
-      return 0;
-   }
-   if (nbCompressedBytes<40)
-      lambda = 12;
-   else if (nbCompressedBytes<60)
-      lambda = 6;
-   else if (nbCompressedBytes<100)
-      lambda = 4;
-   else
-      lambda = 3;
-   ALLOC(metric, len, int);
-   ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
-   ALLOC(path0, len, int);
-   ALLOC(path1, len, int);
-   *tf_sum = 0;
-   for (i=0;i<len;i++)
-   {
-      int j, k, N;
-      opus_val32 L1, best_L1;
-      int best_level=0;
-      N = (m->eBands[i+1]-m->eBands[i])<<LM;
-      for (j=0;j<N;j++)
-         tmp[j] = X[j+(m->eBands[i]<<LM)];
-      /* Just add the right channel if we're in stereo */
-      if (C==2)
-         for (j=0;j<N;j++)
-            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));
-      L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM);
-      best_L1 = L1;
-      /*printf ("%f ", L1);*/
-      for (k=0;k<LM;k++)
-      {
-         int B;
-         if (isTransient)
-            B = (LM-k-1);
-         else
-            B = k+1;
-         if (isTransient)
-            haar1(tmp, N>>(LM-k), 1<<(LM-k));
-         else
-            haar1(tmp, N>>k, 1<<k);
-         L1 = l1_metric(tmp, N, B, N>>LM);
-         if (L1 < best_L1)
-         {
-            best_L1 = L1;
-            best_level = k+1;
-         }
-      }
-      /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
-      if (isTransient)
-         metric[i] = best_level;
-      else
-         metric[i] = -best_level;
-      *tf_sum += metric[i];
-   }
-   /*printf("\n");*/
-   /* NOTE: Future optimized implementations could detect extreme transients and set
-      tf_select = 1 but so far we have not found a reliable way of making this useful */
-   tf_select = 0;
-   cost0 = 0;
-   cost1 = isTransient ? 0 : lambda;
-   /* Viterbi forward pass */
-   for (i=1;i<len;i++)
-   {
-      int curr0, curr1;
-      int from0, from1;
-      from0 = cost0;
-      from1 = cost1 + lambda;
-      if (from0 < from1)
-      {
-         curr0 = from0;
-         path0[i]= 0;
-      } else {
-         curr0 = from1;
-         path0[i]= 1;
-      }
-      from0 = cost0 + lambda;
-      from1 = cost1;
-      if (from0 < from1)
-      {
-         curr1 = from0;
-         path1[i]= 0;
-      } else {
-         curr1 = from1;
-         path1[i]= 1;
-      }
-      cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]);
-      cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]);
-   }
-   tf_res[len-1] = cost0 < cost1 ? 0 : 1;
-   /* Viterbi backward pass to check the decisions */
-   for (i=len-2;i>=0;i--)
-   {
-      if (tf_res[i+1] == 1)
-         tf_res[i] = path1[i+1];
-      else
-         tf_res[i] = path0[i+1];
-   }
-   RESTORE_STACK;
-#ifdef FUZZING
-   tf_select = rand()&0x1;
-   tf_res[0] = rand()&0x1;
-   for (i=1;i<len;i++)
-      tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
-#endif
-   return tf_select;
-}
-static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
-{
-   int curr, i;
-   int tf_select_rsv;
-   int tf_changed;
-   int logp;
-   opus_uint32 budget;
-   opus_uint32 tell;
-   budget = enc->storage*8;
-   tell = ec_tell(enc);
-   logp = isTransient ? 2 : 4;
-   /* Reserve space to code the tf_select decision. */
-   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
-   budget -= tf_select_rsv;
-   curr = tf_changed = 0;
-   for (i=start;i<end;i++)
-   {
-      if (tell+logp<=budget)
-      {
-         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
-         tell = ec_tell(enc);
-         curr = tf_res[i];
-         tf_changed |= curr;
-      }
-      else
-         tf_res[i] = curr;
-      logp = isTransient ? 4 : 5;
-   }
-   /* Only code tf_select if it would actually make a difference. */
-   if (tf_select_rsv &&
-         tf_select_table[LM][4*isTransient+0+tf_changed]!=
-         tf_select_table[LM][4*isTransient+2+tf_changed])
-      ec_enc_bit_logp(enc, tf_select, 1);
-   else
-      tf_select = 0;
-   for (i=start;i<end;i++)
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
-   /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
-}
-#endif
-static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
-{
-   int i, curr, tf_select;
-   int tf_select_rsv;
-   int tf_changed;
-   int logp;
-   opus_uint32 budget;
-   opus_uint32 tell;
-   budget = dec->storage*8;
-   tell = ec_tell(dec);
-   logp = isTransient ? 2 : 4;
-   tf_select_rsv = LM>0 && tell+logp+1<=budget;
-   budget -= tf_select_rsv;
-   tf_changed = curr = 0;
-   for (i=start;i<end;i++)
-   {
-      if (tell+logp<=budget)
-      {
-         curr ^= ec_dec_bit_logp(dec, logp);
-         tell = ec_tell(dec);
-         tf_changed |= curr;
-      }
-      tf_res[i] = curr;
-      logp = isTransient ? 4 : 5;
-   }
-   tf_select = 0;
-   if (tf_select_rsv &&
-     tf_select_table[LM][4*isTransient+0+tf_changed] !=
-     tf_select_table[LM][4*isTransient+2+tf_changed])
-   {
-      tf_select = ec_dec_bit_logp(dec, 1);
-   }
-   for (i=start;i<end;i++)
-   {
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
-   }
-}
-static void init_caps(const CELTMode *m,int *cap,int LM,int C)
 {
   int i;
   for (i=0;i<m->nbEBands;i++)
@@ -832,2070 +190,6 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
   }
 }
-#if 0
-static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
-      const opus_val16 *bandLogE, int end, int LM, int C, int N0)
-{
-   int i;
-   opus_val32 diff=0;
-   int c;
-   int trim_index = 5;
-   if (C==2)
-   {
-      opus_val16 sum = 0; /* Q10 */
-      /* Compute inter-channel correlation for low frequencies */
-      for (i=0;i<8;i++)
-      {
-         int j;
-         opus_val32 partial = 0;
-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-            partial = MAC16_16(partial, X[j], X[N0+j]);
-         sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
-      }
-      sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
-      /*printf ("%f\n", sum);*/
-      if (sum > QCONST16(.995f,10))
-         trim_index-=4;
-      else if (sum > QCONST16(.92f,10))
-         trim_index-=3;
-      else if (sum > QCONST16(.85f,10))
-         trim_index-=2;
-      else if (sum > QCONST16(.8f,10))
-         trim_index-=1;
-   }
-   /* Estimate spectral tilt */
-   c=0; do {
-      for (i=0;i<end-1;i++)
-      {
-         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands);
-      }
-   } while (++c<C);
-   /* We divide by two here to avoid making the tilt larger for stereo as a
-      result of a bug in the loop above */
-   diff /= 2*C*(end-1);
-   /*printf("%f\n", diff);*/
-   if (diff > QCONST16(2.f, DB_SHIFT))
-      trim_index--;
-   if (diff > QCONST16(8.f, DB_SHIFT))
-      trim_index--;
-   if (diff < -QCONST16(4.f, DB_SHIFT))
-      trim_index++;
-   if (diff < -QCONST16(10.f, DB_SHIFT))
-      trim_index++;
-   if (trim_index<0)
-      trim_index = 0;
-   if (trim_index>10)
-      trim_index = 10;
-#ifdef FUZZING
-   trim_index = rand()%11;
-#endif
-   return trim_index;
-}
-static int stereo_analysis(const CELTMode *m, const celt_norm *X,
-      int LM, int N0)
-{
-   int i;
-   int thetas;
-   opus_val32 sumLR = EPSILON, sumMS = EPSILON;
-   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
-   for (i=0;i<13;i++)
-   {
-      int j;
-      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-      {
-         opus_val32 L, R, M, S;
-         /* We cast to 32-bit first because of the -32768 case */
-         L = EXTEND32(X[j]);
-         R = EXTEND32(X[N0+j]);
-         M = ADD32(L, R);
-         S = SUB32(L, R);
-         sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
-         sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
-      }
-   }
-   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
-   thetas = 13;
-   /* We don't need thetas for lower bands with LM<=1 */
-   if (LM<=1)
-      thetas -= 8;
-   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
-         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
-}
-int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
-{
-   int i, c, N;
-   opus_int32 bits;
-   ec_enc _enc;
-   VARDECL(celt_sig, in);
-   VARDECL(celt_sig, freq);
-   VARDECL(celt_norm, X);
-   VARDECL(celt_ener, bandE);
-   VARDECL(opus_val16, bandLogE);
-   VARDECL(int, fine_quant);
-   VARDECL(opus_val16, error);
-   VARDECL(int, pulses);
-   VARDECL(int, cap);
-   VARDECL(int, offsets);
-   VARDECL(int, fine_priority);
-   VARDECL(int, tf_res);
-   VARDECL(unsigned char, collapse_masks);
-   celt_sig *prefilter_mem;
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2;
-   int shortBlocks=0;
-   int isTransient=0;
-   const int CC = st->channels;
-   const int C = st->stream_channels;
-   int LM, M;
-   int tf_select;
-   int nbFilledBytes, nbAvailableBytes;
-   int effEnd;
-   int codedBands;
-   int tf_sum;
-   int alloc_trim;
-   int pitch_index=COMBFILTER_MINPERIOD;
-   opus_val16 gain1 = 0;
-   int intensity=0;
-   int dual_stereo=0;
-   int effectiveBytes;
-   opus_val16 pf_threshold;
-   int dynalloc_logp;
-   opus_int32 vbr_rate;
-   opus_int32 total_bits;
-   opus_int32 total_boost;
-   opus_int32 balance;
-   opus_int32 tell;
-   int prefilter_tapset=0;
-   int pf_on;
-   int anti_collapse_rsv;
-   int anti_collapse_on=0;
-   int silence=0;
-   ALLOC_STACK;
-   if (nbCompressedBytes<2 || pcm==NULL)
-     return OPUS_BAD_ARG;
-   frame_size *= st->upsample;
-   for (LM=0;LM<=st->mode->maxLM;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>st->mode->maxLM)
-      return OPUS_BAD_ARG;
-   M=1<<LM;
-   N = M*st->mode->shortMdctSize;
-   prefilter_mem = st->in_mem+CC*(st->overlap);
-   oldBandE = (opus_val16*)(st->in_mem+CC*(2*st->overlap+COMBFILTER_MAXPERIOD));
-   oldLogE = oldBandE + CC*st->mode->nbEBands;
-   oldLogE2 = oldLogE + CC*st->mode->nbEBands;
-   if (enc==NULL)
-   {
-      tell=1;
-      nbFilledBytes=0;
-   } else {
-      tell=ec_tell(enc);
-      nbFilledBytes=(tell+4)>>3;
-   }
-#ifdef CUSTOM_MODES
-   if (st->signalling && enc==NULL)
-   {
-      int tmp = (st->mode->effEBands-st->end)>>1;
-      st->end = IMAX(1, st->mode->effEBands-tmp);
-      compressed[0] = tmp<<5;
-      compressed[0] |= LM<<3;
-      compressed[0] |= (C==2)<<2;
-      /* Convert "standard mode" to Opus header */
-      if (st->mode->Fs==48000 && st->mode->shortMdctSize==120)
-      {
-         int c0 = toOpus(compressed[0]);
-         if (c0<0)
-            return OPUS_BAD_ARG;
-         compressed[0] = c0;
-      }
-      compressed++;
-      nbCompressedBytes--;
-   }
-#else
-   celt_assert(st->signalling==0);
-#endif
-   /* Can't produce more than 1275 output bytes */
-   nbCompressedBytes = IMIN(nbCompressedBytes,1275);
-   nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
-   if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
-   {
-      opus_int32 den=st->mode->Fs>>BITRES;
-      vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
-#ifdef CUSTOM_MODES
-      if (st->signalling)
-         vbr_rate -= 8<<BITRES;
-#endif
-      effectiveBytes = vbr_rate>>(3+BITRES);
-   } else {
-      opus_int32 tmp;
-      vbr_rate = 0;
-      tmp = st->bitrate*frame_size;
-      if (tell>1)
-         tmp += tell;
-      if (st->bitrate!=OPUS_BITRATE_MAX)
-         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
-               (tmp+4*st->mode->Fs)/(8*st->mode->Fs)-!!st->signalling));
-      effectiveBytes = nbCompressedBytes;
-   }
-   if (enc==NULL)
-   {
-      ec_enc_init(&_enc, compressed, nbCompressedBytes);
-      enc = &_enc;
-   }
-   if (vbr_rate>0)
-   {
-      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
-          target rate and buffering.
-         We must do this up front so that bust-prevention logic triggers
-          correctly if we don't have enough bits. */
-      if (st->constrained_vbr)
-      {
-         opus_int32 vbr_bound;
-         opus_int32 max_allowed;
-         /* We could use any multiple of vbr_rate as bound (depending on the
-             delay).
-            This is clamped to ensure we use at least two bytes if the encoder
-             was entirely empty, but to allow 0 in hybrid mode. */
-         vbr_bound = vbr_rate;
-         max_allowed = IMIN(IMAX(tell==1?2:0,
-               (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
-               nbAvailableBytes);
-         if(max_allowed < nbAvailableBytes)
-         {
-            nbCompressedBytes = nbFilledBytes+max_allowed;
-            nbAvailableBytes = max_allowed;
-            ec_enc_shrink(enc, nbCompressedBytes);
-         }
-      }
-   }
-   total_bits = nbCompressedBytes*8;
-   effEnd = st->end;
-   if (effEnd > st->mode->effEBands)
-      effEnd = st->mode->effEBands;
-   ALLOC(in, CC*(N+st->overlap), celt_sig);
-   /* Find pitch period and gain */
-   {
-      VARDECL(celt_sig, _pre);
-      celt_sig *pre[2];
-      SAVE_STACK;
-      ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
-      pre[0] = _pre;
-      pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
-      silence = 1;
-      c=0; do {
-         int count = 0;
-         const opus_val16 * OPUS_RESTRICT pcmp = pcm+c;
-         celt_sig * OPUS_RESTRICT inp = in+c*(N+st->overlap)+st->overlap;
-         for (i=0;i<N;i++)
-         {
-            celt_sig x, tmp;
-            x = SCALEIN(*pcmp);
-#ifndef FIXED_POINT
-            if (!(x==x))
-               x = 0;
-            if (st->clip)
-               x = MAX32(-65536.f, MIN32(65536.f,x));
-#endif
-            if (++count==st->upsample)
-            {
-               count=0;
-               pcmp+=CC;
-            } else {
-               x = 0;
-            }
-            /* Apply pre-emphasis */
-            tmp = MULT16_16(st->mode->preemph[2], x);
-            *inp = tmp + st->preemph_memE[c];
-            st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
-                                   - MULT16_32_Q15(st->mode->preemph[0], tmp);
-            silence = silence && *inp == 0;
-            inp++;
-         }
-         OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
-         OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
-      } while (++c<CC);
-#ifdef FUZZING
-      if ((rand()&0x3F)==0)
-         silence = 1;
-#endif
-      if (tell==1)
-         ec_enc_bit_logp(enc, silence, 15);
-      else
-         silence=0;
-      if (silence)
-      {
-         /*In VBR mode there is no need to send more than the minimum. */
-         if (vbr_rate>0)
-         {
-            effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
-            total_bits=nbCompressedBytes*8;
-            nbAvailableBytes=2;
-            ec_enc_shrink(enc, nbCompressedBytes);
-         }
-         /* Pretend we've filled all the remaining bits with zeros
-            (that's what the initialiser did anyway) */
-         tell = nbCompressedBytes*8;
-         enc->nbits_total+=tell-ec_tell(enc);
-      }
-      if (nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5)
-      {
-         VARDECL(opus_val16, pitch_buf);
-         ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
-         pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
-         pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
-         pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
-         gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
-               N, &pitch_index, st->prefilter_period, st->prefilter_gain);
-         if (pitch_index > COMBFILTER_MAXPERIOD-2)
-            pitch_index = COMBFILTER_MAXPERIOD-2;
-         gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
-         if (st->loss_rate>2)
-            gain1 = HALF32(gain1);
-         if (st->loss_rate>4)
-            gain1 = HALF32(gain1);
-         if (st->loss_rate>8)
-            gain1 = 0;
-         prefilter_tapset = st->tapset_decision;
-      } else {
-         gain1 = 0;
-      }
-      /* Gain threshold for enabling the prefilter/postfilter */
-      pf_threshold = QCONST16(.2f,15);
-      /* Adjusting the threshold based on rate and continuity */
-      if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
-         pf_threshold += QCONST16(.2f,15);
-      if (nbAvailableBytes<25)
-         pf_threshold += QCONST16(.1f,15);
-      if (nbAvailableBytes<35)
-         pf_threshold += QCONST16(.1f,15);
-      if (st->prefilter_gain > QCONST16(.4f,15))
-         pf_threshold -= QCONST16(.1f,15);
-      if (st->prefilter_gain > QCONST16(.55f,15))
-         pf_threshold -= QCONST16(.1f,15);
-      /* Hard threshold at 0.2 */
-      pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
-      if (gain1<pf_threshold)
-      {
-         if(st->start==0 && tell+16<=total_bits)
-            ec_enc_bit_logp(enc, 0, 1);
-         gain1 = 0;
-         pf_on = 0;
-      } else {
-         /*This block is not gated by a total bits check only because
-           of the nbAvailableBytes check above.*/
-         int qg;
-         int octave;
-         if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
-            gain1=st->prefilter_gain;
-#ifdef FIXED_POINT
-         qg = ((gain1+1536)>>10)/3-1;
-#else
-         qg = (int)floor(.5f+gain1*32/3)-1;
-#endif
-         qg = IMAX(0, IMIN(7, qg));
-         ec_enc_bit_logp(enc, 1, 1);
-         pitch_index += 1;
-         octave = EC_ILOG(pitch_index)-5;
-         ec_enc_uint(enc, octave, 6);
-         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
-         pitch_index -= 1;
-         ec_enc_bits(enc, qg, 3);
-         if (ec_tell(enc)+2<=total_bits)
-            ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
-         else
-           prefilter_tapset = 0;
-         gain1 = QCONST16(0.09375f,15)*(qg+1);
-         pf_on = 1;
-      }
-      /*printf("%d %f\n", pitch_index, gain1);*/
-      c=0; do {
-         int offset = st->mode->shortMdctSize-st->mode->overlap;
-         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
-         OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
-         if (offset)
-            comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
-                  st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
-                  st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
-         comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
-               st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
-               st->prefilter_tapset, prefilter_tapset, st->mode->window, st->mode->overlap);
-         OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
-         if (N>COMBFILTER_MAXPERIOD)
-         {
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
-         } else {
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
-         }
-      } while (++c<CC);
-      RESTORE_STACK;
-   }
-   isTransient = 0;
-   shortBlocks = 0;
-   if (LM>0 && ec_tell(enc)+3<=total_bits)
-   {
-      if (st->complexity > 1)
-      {
-         isTransient = transient_analysis(in, N+st->overlap, CC,
-                  st->overlap);
-         if (isTransient)
-            shortBlocks = M;
-      }
-      ec_enc_bit_logp(enc, isTransient, 3);
-   }
-   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
-   ALLOC(bandE,st->mode->nbEBands*CC, celt_ener);
-   ALLOC(bandLogE,st->mode->nbEBands*CC, opus_val16);
-   /* Compute MDCTs */
-   compute_mdcts(st->mode, shortBlocks, in, freq, CC, LM);
-   if (CC==2&&C==1)
-   {
-      for (i=0;i<N;i++)
-         freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
-   }
-   if (st->upsample != 1)
-   {
-      c=0; do
-      {
-         int bound = N/st->upsample;
-         for (i=0;i<bound;i++)
-            freq[c*N+i] *= st->upsample;
-         for (;i<N;i++)
-            freq[c*N+i] = 0;
-      } while (++c<C);
-   }
-   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
-   compute_band_energies(st->mode, freq, bandE, effEnd, C, M);
-   amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C);
-   /* Band normalisation */
-   normalise_bands(st->mode, freq, X, bandE, effEnd, C, M);
-   ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum);
-   for (i=effEnd;i<st->end;i++)
-      tf_res[i] = tf_res[effEnd-1];
-   ALLOC(error, C*st->mode->nbEBands, opus_val16);
-   quant_coarse_energy(st->mode, st->start, st->end, effEnd, bandLogE,
-         oldBandE, total_bits, error, enc,
-         C, LM, nbAvailableBytes, st->force_intra,
-         &st->delayedIntra, st->complexity >= 4, st->loss_rate);
-   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
-   st->spread_decision = SPREAD_NORMAL;
-   if (ec_tell(enc)+4<=total_bits)
-   {
-      if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
-      {
-         if (st->complexity == 0)
-            st->spread_decision = SPREAD_NONE;
-      } else {
-         st->spread_decision = spreading_decision(st->mode, X,
-               &st->tonal_average, st->spread_decision, &st->hf_average,
-               &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
-      }
-      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
-   }
-   ALLOC(cap, st->mode->nbEBands, int);
-   ALLOC(offsets, st->mode->nbEBands, int);
-   init_caps(st->mode,cap,LM,C);
-   for (i=0;i<st->mode->nbEBands;i++)
-      offsets[i] = 0;
-   /* Dynamic allocation code */
-   /* Make sure that dynamic allocation can't make us bust the budget */
-   if (effectiveBytes > 50 && LM>=1)
-   {
-      int t1, t2;
-      if (LM <= 1)
-      {
-         t1 = 3;
-         t2 = 5;
-      } else {
-         t1 = 2;
-         t2 = 4;
-      }
-      for (i=st->start+1;i<st->end-1;i++)
-      {
-         opus_val32 d2;
-         d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1];
-         if (C==2)
-            d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]-
-                  bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]);
-#ifdef FUZZING
-         if((rand()&0xF)==0)
-         {
-            offsets[i] += 1;
-            if((rand()&0x3)==0)
-               offsets[i] += 1+(rand()&0x3);
-         }
-#else
-         if (d2 > SHL16(t1,DB_SHIFT))
-            offsets[i] += 1;
-         if (d2 > SHL16(t2,DB_SHIFT))
-            offsets[i] += 1;
-#endif
-      }
-   }
-   dynalloc_logp = 6;
-   total_bits<<=BITRES;
-   total_boost = 0;
-   tell = ec_tell_frac(enc);
-   for (i=st->start;i<st->end;i++)
-   {
-      int width, quanta;
-      int dynalloc_loop_logp;
-      int boost;
-      int j;
-      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-      /* quanta is 6 bits, but no more than 1 bit/sample
-         and no less than 1/8 bit/sample */
-      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-      dynalloc_loop_logp = dynalloc_logp;
-      boost = 0;
-      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
-            && boost < cap[i]; j++)
-      {
-         int flag;
-         flag = j<offsets[i];
-         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
-         tell = ec_tell_frac(enc);
-         if (!flag)
-            break;
-         boost += quanta;
-         total_boost += quanta;
-         dynalloc_loop_logp = 1;
-      }
-      /* Making dynalloc more likely */
-      if (j)
-         dynalloc_logp = IMAX(2, dynalloc_logp-1);
-      offsets[i] = boost;
-   }
-   alloc_trim = 5;
-   if (tell+(6<<BITRES) <= total_bits - total_boost)
-   {
-      alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
-            st->end, LM, C, N);
-      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
-      tell = ec_tell_frac(enc);
-   }
-   /* Variable bitrate */
-   if (vbr_rate>0)
-   {
-     opus_val16 alpha;
-     opus_int32 delta;
-     /* The target rate in 8th bits per frame */
-     opus_int32 target;
-     opus_int32 min_allowed;
-     int lm_diff = st->mode->maxLM - LM;
-     /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
-        The CELT allocator will just not be able to use more than that anyway. */
-     nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
-     target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES);
-     /* Shortblocks get a large boost in bitrate, but since they
-        are uncommon long blocks are not greatly affected */
-     if (shortBlocks || tf_sum < -2*(st->end-st->start))
-        target = 7*target/4;
-     else if (tf_sum < -(st->end-st->start))
-        target = 3*target/2;
-     else if (M > 1)
-        target-=(target+14)/28;
-     /* The current offset is removed from the target and the space used
-        so far is added*/
-     target=target+tell;
-     /* In VBR mode the frame size must not be reduced so much that it would
-         result in the encoder running out of bits.
-        The margin of 2 bytes ensures that none of the bust-prevention logic
-         in the decoder will have triggered so far. */
-     min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
-     nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
-     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
-     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
-     /* By how much did we "miss" the target on that frame */
-     delta = target - vbr_rate;
-     target=nbAvailableBytes<<(BITRES+3);
-     /*If the frame is silent we don't adjust our drift, otherwise
-       the encoder will shoot to very high rates after hitting a
-       span of silence, but we do allow the bitres to refill.
-       This means that we'll undershoot our target in CVBR/VBR modes
-       on files with lots of silence. */
-     if(silence)
-     {
-       nbAvailableBytes = 2;
-       target = 2*8<<BITRES;
-       delta = 0;
-     }
-     if (st->vbr_count < 970)
-     {
-        st->vbr_count++;
-        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
-     } else
-        alpha = QCONST16(.001f,15);
-     /* How many bits have we used in excess of what we're allowed */
-     if (st->constrained_vbr)
-        st->vbr_reservoir += target - vbr_rate;
-     /*printf ("%d\n", st->vbr_reservoir);*/
-     /* Compute the offset we need to apply in order to reach the target */
-     st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
-     st->vbr_offset = -st->vbr_drift;
-     /*printf ("%d\n", st->vbr_drift);*/
-     if (st->constrained_vbr && st->vbr_reservoir < 0)
-     {
-        /* We're under the min value -- increase rate */
-        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
-        /* Unless we're just coding silence */
-        nbAvailableBytes += silence?0:adjust;
-        st->vbr_reservoir = 0;
-        /*printf ("+%d\n", adjust);*/
-     }
-     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
-     /* This moves the raw bits to take into account the new compressed size */
-     ec_enc_shrink(enc, nbCompressedBytes);
-   }
-   if (C==2)
-   {
-      int effectiveRate;
-      /* Always use MS for 2.5 ms frames until we can do a better analysis */
-      if (LM!=0)
-         dual_stereo = stereo_analysis(st->mode, X, LM, N);
-      /* Account for coarse energy */
-      effectiveRate = (8*effectiveBytes - 80)>>LM;
-      /* effectiveRate in kb/s */
-      effectiveRate = 2*effectiveRate/5;
-      if (effectiveRate<35)
-         intensity = 8;
-      else if (effectiveRate<50)
-         intensity = 12;
-      else if (effectiveRate<68)
-         intensity = 16;
-      else if (effectiveRate<84)
-         intensity = 18;
-      else if (effectiveRate<102)
-         intensity = 19;
-      else if (effectiveRate<130)
-         intensity = 20;
-      else
-         intensity = 100;
-      intensity = IMIN(st->end,IMAX(st->start, intensity));
-   }
-   /* Bit allocation */
-   ALLOC(fine_quant, st->mode->nbEBands, int);
-   ALLOC(pulses, st->mode->nbEBands, int);
-   ALLOC(fine_priority, st->mode->nbEBands, int);
-   /* bits =           packet size                    - where we are - safety*/
-   bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
-   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
-   bits -= anti_collapse_rsv;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
-         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
-         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
-   st->lastCodedBands = codedBands;
-   quant_fine_energy(st->mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
-#ifdef MEASURE_NORM_MSE
-   float X0[3000];
-   float bandE0[60];
-   c=0; do
-      for (i=0;i<N;i++)
-         X0[i+c*N] = X[i+c*N];
-   while (++c<C);
-   for (i=0;i<C*st->mode->nbEBands;i++)
-      bandE0[i] = bandE[i];
-#endif
-   /* Residual quantisation */
-   ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
-   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res,
-         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
-   if (anti_collapse_rsv > 0)
-   {
-      anti_collapse_on = st->consec_transient<2;
-#ifdef FUZZING
-      anti_collapse_on = rand()&0x1;
-#endif
-      ec_enc_bits(enc, anti_collapse_on, 1);
-   }
-   quant_energy_finalise(st->mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
-   if (silence)
-   {
-      for (i=0;i<C*st->mode->nbEBands;i++)
-         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
-   }
-#ifdef RESYNTH
-   /* Re-synthesis of the coded audio if required */
-   {
-      celt_sig *out_mem[2];
-      celt_sig *overlap_mem[2];
-      log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-      if (silence)
-      {
-         for (i=0;i<C*st->mode->nbEBands;i++)
-            bandE[i] = 0;
-      }
-#ifdef MEASURE_NORM_MSE
-      measure_norm_mse(st->mode, X, X0, bandE, bandE0, M, N, C);
-#endif
-      if (anti_collapse_on)
-      {
-         anti_collapse(st->mode, X, collapse_masks, LM, C, N,
-               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-      }
-      /* Synthesis */
-      denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
-      OPUS_MOVE(st->syn_mem[0], st->syn_mem[0]+N, MAX_PERIOD);
-      if (CC==2)
-         OPUS_MOVE(st->syn_mem[1], st->syn_mem[1]+N, MAX_PERIOD);
-      c=0; do
-         for (i=0;i<M*st->mode->eBands[st->start];i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      c=0; do
-         for (i=M*st->mode->eBands[st->end];i<N;i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      if (CC==2&&C==1)
-      {
-         for (i=0;i<N;i++)
-            freq[N+i] = freq[i];
-      }
-      out_mem[0] = st->syn_mem[0]+MAX_PERIOD;
-      if (CC==2)
-         out_mem[1] = st->syn_mem[1]+MAX_PERIOD;
-      overlap_mem[0] = prefilter_mem+CC*COMBFILTER_MAXPERIOD;
-      if (CC==2)
-         overlap_mem[1] = overlap_mem[0] + st->overlap;
-      compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, CC, LM);
-      c=0; do {
-         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
-         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
-         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, st->mode->shortMdctSize,
-               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
-               st->mode->window, st->overlap);
-         if (LM!=0)
-            comb_filter(out_mem[c]+st->mode->shortMdctSize, out_mem[c]+st->mode->shortMdctSize, st->prefilter_period, pitch_index, N-st->mode->shortMdctSize,
-                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
-                  st->mode->window, st->mode->overlap);
-      } while (++c<CC);
-      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, st->mode->preemph, st->preemph_memD);
-      st->prefilter_period_old = st->prefilter_period;
-      st->prefilter_gain_old = st->prefilter_gain;
-      st->prefilter_tapset_old = st->prefilter_tapset;
-   }
-#endif
-   st->prefilter_period = pitch_index;
-   st->prefilter_gain = gain1;
-   st->prefilter_tapset = prefilter_tapset;
-#ifdef RESYNTH
-   if (LM!=0)
-   {
-      st->prefilter_period_old = st->prefilter_period;
-      st->prefilter_gain_old = st->prefilter_gain;
-      st->prefilter_tapset_old = st->prefilter_tapset;
-   }
-#endif
-   if (CC==2&&C==1) {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
-   }
-   if (!isTransient)
-   {
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
-   } else {
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
-   }
-   /* In case start or end were to change */
-   c=0; do
-   {
-      for (i=0;i<st->start;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      for (i=st->end;i<st->mode->nbEBands;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-   } while (++c<CC);
-   if (isTransient)
-      st->consec_transient++;
-   else
-      st->consec_transient=0;
-   st->rng = enc->rng;
-   /* If there's any room left (can only happen for very high rates),
-      it's already filled with zeros */
-   ec_enc_done(enc);
-#ifdef CUSTOM_MODES
-   if (st->signalling)
-      nbCompressedBytes++;
-#endif
-   RESTORE_STACK;
-   if (ec_get_error(enc))
-      return OPUS_INTERNAL_ERROR;
-   else
-      return nbCompressedBytes;
-}
-#ifdef CUSTOM_MODES
-#ifdef FIXED_POINT
-int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
-}
-#ifndef DISABLE_FLOAT_API
-int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   int j, ret, C, N;
-   VARDECL(opus_int16, in);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(in, C*N, opus_int16);
-   for (j=0;j<C*N;j++)
-     in[j] = FLOAT2INT16(pcm[j]);
-   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
-#ifdef RESYNTH
-   for (j=0;j<C*N;j++)
-      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
-#endif
-   RESTORE_STACK;
-   return ret;
-}
-#endif /* DISABLE_FLOAT_API */
-#else
-int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   int j, ret, C, N;
-   VARDECL(celt_sig, in);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C=st->channels;
-   N=frame_size;
-   ALLOC(in, C*N, celt_sig);
-   for (j=0;j<C*N;j++) {
-     in[j] = SCALEOUT(pcm[j]);
-   }
-   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
-#ifdef RESYNTH
-   for (j=0;j<C*N;j++)
-      ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
-#endif
-   RESTORE_STACK;
-   return ret;
-}
-int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
-}
-#endif
-#endif /* CUSTOM_MODES */
-int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
-{
-   va_list ap;
-   va_start(ap, request);
-   switch (request)
-   {
-      case OPUS_SET_COMPLEXITY_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>10)
-            goto bad_arg;
-         st->complexity = value;
-      }
-      break;
-      case CELT_SET_START_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<0 || value>=st->mode->nbEBands)
-            goto bad_arg;
-         st->start = value;
-      }
-      break;
-      case CELT_SET_END_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>st->mode->nbEBands)
-            goto bad_arg;
-         st->end = value;
-      }
-      break;
-      case CELT_SET_PREDICTION_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>2)
-            goto bad_arg;
-         st->disable_pf = value<=1;
-         st->force_intra = value==0;
-      }
-      break;
-      case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>100)
-            goto bad_arg;
-         st->loss_rate = value;
-      }
-      break;
-      case OPUS_SET_VBR_CONSTRAINT_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->constrained_vbr = value;
-      }
-      break;
-      case OPUS_SET_VBR_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->vbr = value;
-      }
-      break;
-      case OPUS_SET_BITRATE_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<=500 && value!=OPUS_BITRATE_MAX)
-            goto bad_arg;
-         value = IMIN(value, 260000*st->channels);
-         st->bitrate = value;
-      }
-      break;
-      case CELT_SET_CHANNELS_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>2)
-            goto bad_arg;
-         st->stream_channels = value;
-      }
-      break;
-      case OPUS_SET_LSB_DEPTH_REQUEST:
-      {
-          opus_int32 value = va_arg(ap, opus_int32);
-          if (value<8 || value>24)
-             goto bad_arg;
-          st->lsb_depth=value;
-      }
-      break;
-      case OPUS_GET_LSB_DEPTH_REQUEST:
-      {
-          opus_int32 *value = va_arg(ap, opus_int32*);
-          *value=st->lsb_depth;
-      }
-      break;
-      case OPUS_RESET_STATE:
-      {
-         int i;
-         opus_val16 *oldBandE, *oldLogE, *oldLogE2;
-         oldBandE = (opus_val16*)(st->in_mem+st->channels*(2*st->overlap+COMBFILTER_MAXPERIOD));
-         oldLogE = oldBandE + st->channels*st->mode->nbEBands;
-         oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
-         OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
-               opus_custom_encoder_get_size(st->mode, st->channels)-
-               ((char*)&st->ENCODER_RESET_START - (char*)st));
-         for (i=0;i<st->channels*st->mode->nbEBands;i++)
-            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
-         st->vbr_offset = 0;
-         st->delayedIntra = 1;
-         st->spread_decision = SPREAD_NORMAL;
-         st->tonal_average = 256;
-         st->hf_average = 0;
-         st->tapset_decision = 0;
-      }
-      break;
-#ifdef CUSTOM_MODES
-      case CELT_SET_INPUT_CLIPPING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->clip = value;
-      }
-      break;
-#endif
-      case CELT_SET_SIGNALLING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->signalling = value;
-      }
-      break;
-      case CELT_GET_MODE_REQUEST:
-      {
-         const CELTMode ** value = va_arg(ap, const CELTMode**);
-         if (value==0)
-            goto bad_arg;
-         *value=st->mode;
-      }
-      break;
-      case OPUS_GET_FINAL_RANGE_REQUEST:
-      {
-         opus_uint32 * value = va_arg(ap, opus_uint32 *);
-         if (value==0)
-            goto bad_arg;
-         *value=st->rng;
-      }
-      break;
-      default:
-         goto bad_request;
-   }
-   va_end(ap);
-   return OPUS_OK;
-bad_arg:
-   va_end(ap);
-   return OPUS_BAD_ARG;
-bad_request:
-   va_end(ap);
-   return OPUS_UNIMPLEMENTED;
-}
-#endif
-/**********************************************************************/
-/*                                                                    */
-/*                             DECODER                                */
-/*                                                                    */
-/**********************************************************************/
-#define DECODE_BUFFER_SIZE 2048
-/** Decoder state
- @brief Decoder state
- */
-struct OpusCustomDecoder {
-   const OpusCustomMode *mode;
-   int overlap;
-   int channels;
-   int stream_channels;
-   int downsample;
-   int start, end;
-   int signalling;
-   /* Everything beyond this point gets cleared on a reset */
-#define DECODER_RESET_START rng
-   opus_uint32 rng;
-   int error;
-   int last_pitch_index;
-   int loss_count;
-   int postfilter_period;
-   int postfilter_period_old;
-   opus_val16 postfilter_gain;
-   opus_val16 postfilter_gain_old;
-   int postfilter_tapset;
-   int postfilter_tapset_old;
-   celt_sig preemph_memD[2];
-   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
-   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
-   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
-   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
-   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
-   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
-};
-int celt_decoder_get_size(int channels)
-{
-   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
-   return opus_custom_decoder_get_size(mode, channels);
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
-{
-   int size = sizeof(struct CELTDecoder)
-            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
-            + channels*LPC_ORDER*sizeof(opus_val16)
-            + 4*2*mode->nbEBands*sizeof(opus_val16);
-   return size;
-}
-#ifdef CUSTOM_MODES
-CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
-{
-   int ret;
-   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
-   ret = opus_custom_decoder_init(st, mode, channels);
-   if (ret != OPUS_OK)
-   {
-      opus_custom_decoder_destroy(st);
-      st = NULL;
-   }
-   if (error)
-      *error = ret;
-   return st;
-}
-#endif /* CUSTOM_MODES */
-int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->downsample = resampling_factor(sampling_rate);
-   if (st->downsample==0)
-      return OPUS_BAD_ARG;
-   else
-      return OPUS_OK;
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
-{
-   if (channels < 0 || channels > 2)
-      return OPUS_BAD_ARG;
-   if (st==NULL)
-      return OPUS_ALLOC_FAIL;
-   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
-   st->mode = mode;
-   st->overlap = mode->overlap;
-   st->stream_channels = st->channels = channels;
-   st->downsample = 1;
-   st->start = 0;
-   st->end = st->mode->effEBands;
-   st->signalling = 1;
-   st->loss_count = 0;
-   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
-   return OPUS_OK;
-}
-#ifdef CUSTOM_MODES
-void opus_custom_decoder_destroy(CELTDecoder *st)
-{
-   opus_free(st);
-}
-#endif /* CUSTOM_MODES */
-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
-{
-   int c;
-   int pitch_index;
-   int overlap = st->mode->overlap;
-   opus_val16 fade = Q15ONE;
-   int i, len;
-   const int C = st->channels;
-   int offset;
-   celt_sig *out_mem[2];
-   celt_sig *decode_mem[2];
-   celt_sig *overlap_mem[2];
-   opus_val16 *lpc;
-   opus_val32 *out_syn[2];
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
-   SAVE_STACK;
-   c=0; do {
-      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
-      overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   } while (++c<C);
-   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
-   oldBandE = lpc+C*LPC_ORDER;
-   oldLogE = oldBandE + 2*st->mode->nbEBands;
-   oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-   backgroundLogE = oldLogE2  + 2*st->mode->nbEBands;
-   out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-   if (C==2)
-      out_syn[1] = out_mem[1]+MAX_PERIOD-N;
-   len = N+st->mode->overlap;
-   if (st->loss_count >= 5 || st->start!=0)
-   {
-      /* Noise-based PLC/CNG */
-      VARDECL(celt_sig, freq);
-      VARDECL(celt_norm, X);
-      VARDECL(celt_ener, bandE);
-      opus_uint32 seed;
-      int effEnd;
-      effEnd = st->end;
-      if (effEnd > st->mode->effEBands)
-         effEnd = st->mode->effEBands;
-      ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
-      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
-      ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
-      if (st->loss_count >= 5)
-         log2Amp(st->mode, st->start, st->end, bandE, backgroundLogE, C);
-      else {
-         /* Energy decay */
-         opus_val16 decay = st->loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
-         c=0; do
-         {
-            for (i=st->start;i<st->end;i++)
-               oldBandE[c*st->mode->nbEBands+i] -= decay;
-         } while (++c<C);
-         log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-      }
-      seed = st->rng;
-      for (c=0;c<C;c++)
-      {
-         for (i=0;i<(st->mode->eBands[st->start]<<LM);i++)
-            X[c*N+i] = 0;
-         for (i=st->start;i<st->mode->effEBands;i++)
-         {
-            int j;
-            int boffs;
-            int blen;
-            boffs = N*c+(st->mode->eBands[i]<<LM);
-            blen = (st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-            for (j=0;j<blen;j++)
-            {
-               seed = celt_lcg_rand(seed);
-               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
-            }
-            renormalise_vector(X+boffs, blen, Q15ONE);
-         }
-         for (i=(st->mode->eBands[st->end]<<LM);i<N;i++)
-            X[c*N+i] = 0;
-      }
-      st->rng = seed;
-      denormalise_bands(st->mode, X, freq, bandE, st->mode->effEBands, C, 1<<LM);
-      c=0; do
-         for (i=0;i<st->mode->eBands[st->start]<<LM;i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      c=0; do {
-         int bound = st->mode->eBands[effEnd]<<LM;
-         if (st->downsample!=1)
-            bound = IMIN(bound, N/st->downsample);
-         for (i=bound;i<N;i++)
-            freq[c*N+i] = 0;
-      } while (++c<C);
-      compute_inv_mdcts(st->mode, 0, freq, out_syn, overlap_mem, C, LM);
-   } else {
-      /* Pitch-based PLC */
-      if (st->loss_count == 0)
-      {
-         opus_val16 pitch_buf[DECODE_BUFFER_SIZE>>1];
-         /* Corresponds to a min pitch of 67 Hz. It's possible to save CPU in this
-         search by using only part of the decode buffer */
-         int poffset = 720;
-         pitch_downsample(decode_mem, pitch_buf, DECODE_BUFFER_SIZE, C);
-         /* Max pitch is 100 samples (480 Hz) */
-         pitch_search(pitch_buf+((poffset)>>1), pitch_buf, DECODE_BUFFER_SIZE-poffset,
-               poffset-100, &pitch_index);
-         pitch_index = poffset-pitch_index;
-         st->last_pitch_index = pitch_index;
-      } else {
-         pitch_index = st->last_pitch_index;
-         fade = QCONST16(.8f,15);
-      }
-      c=0; do {
-         VARDECL(opus_val32, e);
-         opus_val16 exc[MAX_PERIOD];
-         opus_val32 ac[LPC_ORDER+1];
-         opus_val16 decay = 1;
-         opus_val32 S1=0;
-         opus_val16 mem[LPC_ORDER]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-         ALLOC(e, MAX_PERIOD+2*st->mode->overlap, opus_val32);
-         offset = MAX_PERIOD-pitch_index;
-         for (i=0;i<MAX_PERIOD;i++)
-            exc[i] = ROUND16(out_mem[c][i], SIG_SHIFT);
-         if (st->loss_count == 0)
-         {
-            _celt_autocorr(exc, ac, st->mode->window, st->mode->overlap,
-                  LPC_ORDER, MAX_PERIOD);
-            /* Noise floor -40 dB */
-#ifdef FIXED_POINT
-            ac[0] += SHR32(ac[0],13);
-#else
-            ac[0] *= 1.0001f;
-#endif
-            /* Lag windowing */
-            for (i=1;i<=LPC_ORDER;i++)
-            {
-               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
-#ifdef FIXED_POINT
-               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
-#else
-               ac[i] -= ac[i]*(.008f*i)*(.008f*i);
-#endif
-            }
-            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
-         }
-         for (i=0;i<LPC_ORDER;i++)
-            mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
-         celt_fir(exc, lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem);
-         /*for (i=0;i<MAX_PERIOD;i++)printf("%d ", exc[i]); printf("\n");*/
-         /* Check if the waveform is decaying (and if so how fast) */
-         {
-            opus_val32 E1=1, E2=1;
-            int period;
-            if (pitch_index <= MAX_PERIOD/2)
-               period = pitch_index;
-            else
-               period = MAX_PERIOD/2;
-            for (i=0;i<period;i++)
-            {
-               E1 += SHR32(MULT16_16(exc[MAX_PERIOD-period+i],exc[MAX_PERIOD-period+i]),8);
-               E2 += SHR32(MULT16_16(exc[MAX_PERIOD-2*period+i],exc[MAX_PERIOD-2*period+i]),8);
-            }
-            if (E1 > E2)
-               E1 = E2;
-            decay = celt_sqrt(frac_div32(SHR32(E1,1),E2));
-         }
-         /* Copy excitation, taking decay into account */
-         for (i=0;i<len+st->mode->overlap;i++)
-         {
-            opus_val16 tmp;
-            if (offset+i >= MAX_PERIOD)
-            {
-               offset -= pitch_index;
-               decay = MULT16_16_Q15(decay, decay);
-            }
-            e[i] = SHL32(EXTEND32(MULT16_16_Q15(decay, exc[offset+i])), SIG_SHIFT);
-            tmp = ROUND16(out_mem[c][offset+i],SIG_SHIFT);
-            S1 += SHR32(MULT16_16(tmp,tmp),8);
-         }
-         for (i=0;i<LPC_ORDER;i++)
-            mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
-         for (i=0;i<len+st->mode->overlap;i++)
-            e[i] = MULT16_32_Q15(fade, e[i]);
-         celt_iir(e, lpc+c*LPC_ORDER, e, len+st->mode->overlap, LPC_ORDER, mem);
-         {
-            opus_val32 S2=0;
-            for (i=0;i<len+overlap;i++)
-            {
-               opus_val16 tmp = ROUND16(e[i],SIG_SHIFT);
-               S2 += SHR32(MULT16_16(tmp,tmp),8);
-            }
-            /* This checks for an "explosion" in the synthesis */
-#ifdef FIXED_POINT
-            if (!(S1 > SHR32(S2,2)))
-#else
-               /* Float test is written this way to catch NaNs at the same time */
-               if (!(S1 > 0.2f*S2))
-#endif
-               {
-                  for (i=0;i<len+overlap;i++)
-                     e[i] = 0;
-               } else if (S1 < S2)
-               {
-                  opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
-                  for (i=0;i<len+overlap;i++)
-                     e[i] = MULT16_32_Q15(ratio, e[i]);
-               }
-         }
-         /* Apply post-filter to the MDCT overlap of the previous frame */
-         comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
-               st->postfilter_gain, st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
-               NULL, 0);
-         for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++)
-            out_mem[c][i] = out_mem[c][N+i];
-         /* Apply TDAC to the concealed audio so that it blends with the
-         previous and next frames */
-         for (i=0;i<overlap/2;i++)
-         {
-            opus_val32 tmp;
-            tmp = MULT16_32_Q15(st->mode->window[i],           e[N+overlap-1-i]) +
-                  MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i          ]);
-            out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp);
-            out_mem[c][MAX_PERIOD+overlap-i-1] = MULT16_32_Q15(st->mode->window[i], tmp);
-         }
-         for (i=0;i<N;i++)
-            out_mem[c][MAX_PERIOD-N+i] = e[i];
-         /* Apply pre-filter to the MDCT overlap for the next frame (post-filter will be applied then) */
-         comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
-               -st->postfilter_gain, -st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
-               NULL, 0);
-         for (i=0;i<overlap;i++)
-            out_mem[c][MAX_PERIOD+i] = e[i];
-      } while (++c<C);
-   }
-   deemphasis(out_syn, pcm, N, C, /*st->downsample,*/ st->mode->preemph, st->preemph_memD);
-   st->loss_count++;
-   RESTORE_STACK;
-}
-#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */
-static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */
-static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
-{
-   int c, i, N;
-   int spread_decision;
-   opus_int32 bits;
-   ec_dec _dec;
-   VARDECL(celt_sig, freq);
-   VARDECL(celt_norm, X);
-   VARDECL(celt_ener, bandE);
-   VARDECL(int, fine_quant);
-   VARDECL(int, pulses);
-   VARDECL(int, cap);
-   VARDECL(int, offsets);
-   VARDECL(int, fine_priority);
-   VARDECL(int, tf_res);
-   VARDECL(unsigned char, collapse_masks);
-   celt_sig *out_mem[2];
-   celt_sig *decode_mem[2];
-   celt_sig *overlap_mem[2];
-   celt_sig *out_syn[2];
-   opus_val16 *lpc;
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
-   int shortBlocks;
-   int isTransient;
-   int intra_ener;
-   const int CC = st->channels;
-   int LM, M;
-   int effEnd;
-   int codedBands;
-   int alloc_trim;
-   int postfilter_pitch;
-   opus_val16 postfilter_gain;
-   int intensity=0;
-   int dual_stereo=0;
-   opus_int32 total_bits;
-   opus_int32 balance;
-   opus_int32 tell;
-   int dynalloc_logp;
-   int postfilter_tapset;
-   int anti_collapse_rsv;
-   int anti_collapse_on=0;
-   int silence;
-   int C = st->stream_channels;
-   ALLOC_STACK;
-   frame_size *= st->downsample;
-   c=0; do {
-      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
-      overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   } while (++c<CC);
-   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*CC);
-   oldBandE = lpc+CC*LPC_ORDER;
-   oldLogE = oldBandE + 2*st->mode->nbEBands;
-   oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-   backgroundLogE = oldLogE2  + 2*st->mode->nbEBands;
-#ifdef CUSTOM_MODES
-   if (st->signalling && data!=NULL)
-   {
-      int data0=data[0];
-      /* Convert "standard mode" to Opus header */
-      if (st->mode->Fs==48000 && st->mode->shortMdctSize==120)
-      {
-         data0 = fromOpus(data0);
-         if (data0<0)
-            return OPUS_INVALID_PACKET;
-      }
-      st->end = IMAX(1, st->mode->effEBands-2*(data0>>5));
-      LM = (data0>>3)&0x3;
-      C = 1 + ((data0>>2)&0x1);
-      data++;
-      len--;
-      if (LM>st->mode->maxLM)
-         return OPUS_INVALID_PACKET;
-      if (frame_size < st->mode->shortMdctSize<<LM)
-         return OPUS_BUFFER_TOO_SMALL;
-      else
-         frame_size = st->mode->shortMdctSize<<LM;
-   } else {
-#else
-   {
-#endif
-      for (LM=0;LM<=st->mode->maxLM;LM++)
-         if (st->mode->shortMdctSize<<LM==frame_size)
-            break;
-      if (LM>st->mode->maxLM)
-         return OPUS_BAD_ARG;
-   }
-   M=1<<LM;
-   if (len<0 || len>1275 || pcm==NULL)
-      return OPUS_BAD_ARG;
-   N = M*st->mode->shortMdctSize;
-   effEnd = st->end;
-   if (effEnd > st->mode->effEBands)
-      effEnd = st->mode->effEBands;
-   /**< Interleaved signal MDCTs */
-   if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
-      freq = s_freq;
-   else
-      ALLOC(freq, IMAX(CC,C)*N, celt_sig);
-   /**< Interleaved normalised MDCTs */
-   if (FREQ_X_BUF_SIZE >= C*N)
-      X = s_X;
-   else
-      ALLOC(X, C*N, celt_norm);
-   ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
-   c=0; do
-      for (i=0;i<M*st->mode->eBands[st->start];i++)
-         X[c*N+i] = 0;
-   while (++c<C);
-   c=0; do
-      for (i=M*st->mode->eBands[effEnd];i<N;i++)
-         X[c*N+i] = 0;
-   while (++c<C);
-   if (data == NULL || len<=1)
-   {
-      celt_decode_lost(st, pcm, N, LM);
-      RESTORE_STACK;
-      return frame_size/st->downsample;
-   }
-   if (dec == NULL)
-   {
-      ec_dec_init(&_dec,(unsigned char*)data,len);
-      dec = &_dec;
-   }
-   if (C==1)
-   {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[i]=MAX16(oldBandE[i],oldBandE[st->mode->nbEBands+i]);
-   }
-   total_bits = len*8;
-   tell = ec_tell(dec);
-   if (tell >= total_bits)
-      silence = 1;
-   else if (tell==1)
-      silence = ec_dec_bit_logp(dec, 15);
-   else
-      silence = 0;
-   if (silence)
-   {
-      /* Pretend we've read all the remaining bits */
-      tell = len*8;
-      dec->nbits_total+=tell-ec_tell(dec);
-   }
-   postfilter_gain = 0;
-   postfilter_pitch = 0;
-   postfilter_tapset = 0;
-   if (st->start==0 && tell+16 <= total_bits)
-   {
-      if(ec_dec_bit_logp(dec, 1))
-      {
-         int qg, octave;
-         octave = ec_dec_uint(dec, 6);
-         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
-         qg = ec_dec_bits(dec, 3);
-         if (ec_tell(dec)+2<=total_bits)
-            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
-         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
-      }
-      tell = ec_tell(dec);
-   }
-   if (LM > 0 && tell+3 <= total_bits)
-   {
-      isTransient = ec_dec_bit_logp(dec, 3);
-      tell = ec_tell(dec);
-   }
-   else
-      isTransient = 0;
-   if (isTransient)
-      shortBlocks = M;
-   else
-      shortBlocks = 0;
-   /* Decode the global flags (first symbols in the stream) */
-   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
-   /* Get band energies */
-   unquant_coarse_energy(st->mode, st->start, st->end, oldBandE,
-         intra_ener, dec, C, LM);
-   ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
-   tell = ec_tell(dec);
-   spread_decision = SPREAD_NORMAL;
-   if (tell+4 <= total_bits)
-      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
-   ALLOC(pulses, st->mode->nbEBands, int);
-   ALLOC(cap, st->mode->nbEBands, int);
-   ALLOC(offsets, st->mode->nbEBands, int);
-   ALLOC(fine_priority, st->mode->nbEBands, int);
-   init_caps(st->mode,cap,LM,C);
-   dynalloc_logp = 6;
-   total_bits<<=BITRES;
-   tell = ec_tell_frac(dec);
-   for (i=st->start;i<st->end;i++)
-   {
-      int width, quanta;
-      int dynalloc_loop_logp;
-      int boost;
-      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-      /* quanta is 6 bits, but no more than 1 bit/sample
-         and no less than 1/8 bit/sample */
-      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-      dynalloc_loop_logp = dynalloc_logp;
-      boost = 0;
-      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
-      {
-         int flag;
-         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
-         tell = ec_tell_frac(dec);
-         if (!flag)
-            break;
-         boost += quanta;
-         total_bits -= quanta;
-         dynalloc_loop_logp = 1;
-      }
-      offsets[i] = boost;
-      /* Making dynalloc more likely */
-      if (boost>0)
-         dynalloc_logp = IMAX(2, dynalloc_logp-1);
-   }
-   ALLOC(fine_quant, st->mode->nbEBands, int);
-   alloc_trim = tell+(6<<BITRES) <= total_bits ?
-         ec_dec_icdf(dec, trim_icdf, 7) : 5;
-   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
-   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
-   bits -= anti_collapse_rsv;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
-         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
-         fine_quant, fine_priority, C, LM, dec, 0, 0);
-   unquant_fine_energy(st->mode, st->start, st->end, oldBandE, fine_quant, dec, C);
-   /* Decode fixed codebook */
-   ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
-   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
-         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
-   if (anti_collapse_rsv > 0)
-   {
-      anti_collapse_on = ec_dec_bits(dec, 1);
-   }
-   unquant_energy_finalise(st->mode, st->start, st->end, oldBandE,
-         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
-   if (anti_collapse_on)
-      anti_collapse(st->mode, X, collapse_masks, LM, C, N,
-            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-   log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-   if (silence)
-   {
-      for (i=0;i<C*st->mode->nbEBands;i++)
-      {
-         bandE[i] = 0;
-         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
-      }
-   }
-   /* Synthesis */
-   denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
-   OPUS_MOVE(decode_mem[0], decode_mem[0]+N, DECODE_BUFFER_SIZE-N);
-   if (CC==2)
-      OPUS_MOVE(decode_mem[1], decode_mem[1]+N, DECODE_BUFFER_SIZE-N);
-   c=0; do
-      for (i=0;i<M*st->mode->eBands[st->start];i++)
-         freq[c*N+i] = 0;
-   while (++c<C);
-   c=0; do {
-      int bound = M*st->mode->eBands[effEnd];
-      if (st->downsample!=1)
-         bound = IMIN(bound, N/st->downsample);
-      for (i=bound;i<N;i++)
-         freq[c*N+i] = 0;
-   } while (++c<C);
-   out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-   if (CC==2)
-      out_syn[1] = out_mem[1]+MAX_PERIOD-N;
-   if (CC==2&&C==1)
-   {
-      for (i=0;i<N;i++)
-         freq[N+i] = freq[i];
-   }
-   if (CC==1&&C==2)
-   {
-      for (i=0;i<N;i++)
-         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
-   }
-   /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, CC, LM);
-   c=0; do {
-      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
-      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
-      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, st->mode->shortMdctSize,
-            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
-            st->mode->window, st->overlap);
-      if (LM!=0)
-         comb_filter(out_syn[c]+st->mode->shortMdctSize, out_syn[c]+st->mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-st->mode->shortMdctSize,
-               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
-               st->mode->window, st->mode->overlap);
-   } while (++c<CC);
-   st->postfilter_period_old = st->postfilter_period;
-   st->postfilter_gain_old = st->postfilter_gain;
-   st->postfilter_tapset_old = st->postfilter_tapset;
-   st->postfilter_period = postfilter_pitch;
-   st->postfilter_gain = postfilter_gain;
-   st->postfilter_tapset = postfilter_tapset;
-   if (LM!=0)
-   {
-      st->postfilter_period_old = st->postfilter_period;
-      st->postfilter_gain_old = st->postfilter_gain;
-      st->postfilter_tapset_old = st->postfilter_tapset;
-   }
-   if (C==1) {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
-   }
-   /* In case start or end were to change */
-   if (!isTransient)
-   {
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
-   } else {
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
-   }
-   c=0; do
-   {
-      for (i=0;i<st->start;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      for (i=st->end;i<st->mode->nbEBands;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-   } while (++c<2);
-   st->rng = dec->rng;
-   deemphasis(out_syn, pcm, N, CC, /*st->downsample,*/ st->mode->preemph, st->preemph_memD);
-   st->loss_count = 0;
-   RESTORE_STACK;
-   if (ec_tell(dec) > 8*len)
-      return OPUS_INTERNAL_ERROR;
-   if(ec_get_error(dec))
-      st->error = 1;
-   return frame_size/st->downsample;
-}
-#ifdef CUSTOM_MODES
-#ifdef FIXED_POINT
-int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
-{
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
-}
-#ifndef DISABLE_FLOAT_API
-int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
-{
-   int j, ret, C, N;
-   VARDECL(opus_int16, out);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(out, C*N, opus_int16);
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
-   if (ret>0)
-      for (j=0;j<C*ret;j++)
-         pcm[j]=out[j]*(1.f/32768.f);
-   RESTORE_STACK;
-   return ret;
-}
-#endif /* DISABLE_FLOAT_API */
-#else
-int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
-{
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
-}
-int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
-{
-   int j, ret, C, N;
-   VARDECL(celt_sig, out);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(out, C*N, celt_sig);
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
-   if (ret>0)
-      for (j=0;j<C*ret;j++)
-         pcm[j] = FLOAT2INT16 (out[j]);
-   RESTORE_STACK;
-   return ret;
-}
-#endif
-#endif /* CUSTOM_MODES */
-int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
-{
-   va_list ap;
-   va_start(ap, request);
-   switch (request)
-   {
-      case CELT_SET_START_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<0 || value>=st->mode->nbEBands)
-            goto bad_arg;
-         st->start = value;
-      }
-      break;
-      case CELT_SET_END_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>st->mode->nbEBands)
-            goto bad_arg;
-         st->end = value;
-      }
-      break;
-      case CELT_SET_CHANNELS_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>2)
-            goto bad_arg;
-         st->stream_channels = value;
-      }
-      break;
-      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value=st->error;
-         st->error = 0;
-      }
-      break;
-      case OPUS_GET_LOOKAHEAD_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value = st->overlap/st->downsample;
-      }
-      break;
-      case OPUS_RESET_STATE:
-      {
-         int i;
-         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
-         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
-         oldBandE = lpc+st->channels*LPC_ORDER;
-         oldLogE = oldBandE + 2*st->mode->nbEBands;
-         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
-               opus_custom_decoder_get_size(st->mode, st->channels)-
-               ((char*)&st->DECODER_RESET_START - (char*)st));
-         for (i=0;i<2*st->mode->nbEBands;i++)
-            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      break;
-      case OPUS_GET_PITCH_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value = st->postfilter_period;
-      }
-      break;
-#ifdef OPUS_BUILD
-      case CELT_GET_MODE_REQUEST:
-      {
-         const CELTMode ** value = va_arg(ap, const CELTMode**);
-         if (value==0)
-            goto bad_arg;
-         *value=st->mode;
-      }
-      break;
-      case CELT_SET_SIGNALLING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->signalling = value;
-      }
-      break;
-      case OPUS_GET_FINAL_RANGE_REQUEST:
-      {
-         opus_uint32 * value = va_arg(ap, opus_uint32 *);
-         if (value==0)
-            goto bad_arg;
-         *value=st->rng;
-      }
-      break;
-#endif
-      default:
-         goto bad_request;
-   }
-   va_end(ap);
-   return OPUS_OK;
-bad_arg:
-   va_end(ap);
-   return OPUS_BAD_ARG;
-bad_request:
-      va_end(ap);
-  return OPUS_UNIMPLEMENTED;
-}
 const char *opus_strerror(int error)
@@ -2918,7 +212,7 @@ const char *opus_strerror(int error)
 const char *opus_get_version_string(void)
 {
-    return "libopus " OPUS_VERSION
+    return "libopus " PACKAGE_VERSION
 #ifdef FIXED_POINT
          "-fixed"
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.h b/lib/rbcodec/codecs/libopus/celt/celt.h
index 218cd883df..0911c72f72 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt.h
@@ -50,7 +50,19 @@ extern "C" {
 #define CELTDecoder OpusCustomDecoder
 #define CELTMode OpusCustomMode
-#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+typedef struct {
+   int valid;
+   opus_val16 tonality;
+   opus_val16 tonality_slope;
+   opus_val16 noisiness;
+   opus_val16 activity;
+   opus_val16 music_prob;
+   int        bandwidth;
+}AnalysisInfo;
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
 /* Encoder/decoder Requests */
@@ -81,12 +93,27 @@ extern "C" {
 #define CELT_GET_MODE_REQUEST    10015
 /** Get the CELTMode used by an encoder or decoder */
-#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x)
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
 #define CELT_SET_SIGNALLING_REQUEST    10016
 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+#define OPUS_SET_LFE_REQUEST    10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+#define OPUS_SET_ENERGY_SAVE_REQUEST    10026
+#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
+#define OPUS_SET_ENERGY_MASK_REQUEST    10028
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
 /* Encoder stuff */
@@ -110,6 +137,75 @@ int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned cha
 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static inline
+#endif
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+static const unsigned char tapset_icdf[3]={2,1,0};
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+      0xE0, 0xE8, 0xF0, 0xF8,
+      0xC0, 0xC8, 0xD0, 0xD8,
+      0xA0, 0xA8, 0xB0, 0xB8,
+      0x00, 0x00, 0x00, 0x00,
+      0x80, 0x88, 0x90, 0x98,
+};
+static const unsigned char fromOpusTable[16] = {
+      0x80, 0x88, 0x90, 0x98,
+      0x40, 0x48, 0x50, 0x58,
+      0x20, 0x28, 0x30, 0x38,
+      0x00, 0x08, 0x10, 0x18
+};
+static inline int toOpus(unsigned char c)
+{
+   int ret=0;
+   if (c<0xA0)
+      ret = toOpusTable[c>>3];
+   if (ret == 0)
+      return -1;
+   else
+      return ret|(c&0x7);
+}
+static inline int fromOpus(unsigned char c)
+{
+   if (c<0x80)
+      return -1;
+   else
+      return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+extern const signed char tf_select_table[4][8];
+int resampling_factor(opus_int32 rate);
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap);
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
new file mode 100644
index 0000000000..929d1d441b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
@@ -0,0 +1,1207 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#define CELT_DECODER_C
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+/**********************************************************************/
+/*                                                                    */
+/*                             DECODER                                */
+/*                                                                    */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+   const OpusCustomMode *mode;
+   int overlap;
+   int channels;
+   int stream_channels;
+   int downsample;
+   int start, end;
+   int signalling;
+   int arch;
+   /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+   opus_uint32 rng;
+   int error;
+   int last_pitch_index;
+   int loss_count;
+   int postfilter_period;
+   int postfilter_period_old;
+   opus_val16 postfilter_gain;
+   opus_val16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
+   celt_sig preemph_memD[2];
+   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_decoder_get_size(mode, channels);
+}
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTDecoder)
+            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+            + channels*LPC_ORDER*sizeof(opus_val16)
+            + 4*2*mode->nbEBands*sizeof(opus_val16);
+   return size;
+}
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+   ret = opus_custom_decoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_decoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+   int ret;
+   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+   if (ret != OPUS_OK)
+      return ret;
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+      return OPUS_BAD_ARG;
+   else
+      return OPUS_OK;
+}
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+   if (st==NULL)
+      return OPUS_ALLOC_FAIL;
+   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+   st->downsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+   st->arch = opus_select_arch();
+   st->loss_count = 0;
+   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+   return OPUS_OK;
+}
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+static inline opus_val16 SIG2WORD16(celt_sig x)
+{
+#ifdef FIXED_POINT
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+#else
+   return (opus_val16)x;
+#endif
+}
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+{
+   int c;
+   int Nd;
+   int apply_downsampling=0;
+   opus_val16 coef0;
+   coef0 = coef[0];
+   Nd = N/downsample;
+   c=0; do {
+      int j;
+      celt_sig * OPUS_RESTRICT x;
+      opus_val16  * OPUS_RESTRICT y;
+      celt_sig m = mem[c];
+      x =in[c];
+      y = pcm+c;
+#ifdef CUSTOM_MODES
+      if (coef[1] != 0)
+      {
+         opus_val16 coef1 = coef[1];
+         opus_val16 coef3 = coef[3];
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m;
+            m = MULT16_32_Q15(coef0, tmp)
+                          - MULT16_32_Q15(coef1, x[j]);
+            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else
+#endif
+      if (downsample>1)
+      {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m;
+            m = MULT16_32_Q15(coef0, tmp);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+         }
+      }
+      mem[c] = m;
+      if (apply_downsampling)
+      {
+         /* Perform down-sampling */
+         for (j=0;j<Nd;j++)
+            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+      }
+   } while (++c<C);
+}
+/** Compute the IMDCT and apply window for all sub-frames and
+    all channels in a frame */
+#ifndef RESYNTH
+static
+#endif
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+{
+   int b, c;
+   int B;
+   int N;
+   int shift;
+   const int overlap = OVERLAP(mode);
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
+   } while (++c<C);
+}
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+   int i, curr, tf_select;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+   budget = dec->storage*8;
+   tell = ec_tell(dec);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_tell(dec);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
+      tf_select = ec_dec_bit_logp(dec, 1);
+   }
+   for (i=start;i<end;i++)
+   {
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   }
+}
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+{
+   int c;
+   int i;
+   const int C = st->channels;
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   int start;
+   int downsample;
+   int loss_count;
+   int noise_based;
+   const opus_int16 *eBands;
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<C);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+   loss_count = st->loss_count;
+   start = st->start;
+   downsample = st->downsample;
+   noise_based = loss_count >= 5 || start != 0;
+   ALLOC(scratch, noise_based?N*C:N, celt_sig);
+   if (noise_based)
+   {
+      /* Noise-based PLC/CNG */
+      celt_sig *freq;
+      VARDECL(celt_norm, X);
+      opus_uint32 seed;
+      opus_val16 *plcLogE;
+      int end;
+      int effEnd;
+      end = st->end;
+      effEnd = IMAX(start, IMIN(end, mode->effEBands));
+      /* Share the interleaved signal MDCT coefficient buffer with the
+         deemphasis scratch buffer. */
+      freq = scratch;
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+      if (loss_count >= 5)
+         plcLogE = backgroundLogE;
+      else {
+         /* Energy decay */
+         opus_val16 decay = loss_count==0 ?
+               QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+         c=0; do
+         {
+            for (i=start;i<end;i++)
+               oldBandE[c*nbEBands+i] -= decay;
+         } while (++c<C);
+         plcLogE = oldBandE;
+      }
+      seed = st->rng;
+      for (c=0;c<C;c++)
+      {
+         for (i=start;i<effEnd;i++)
+         {
+            int j;
+            int boffs;
+            int blen;
+            boffs = N*c+(eBands[i]<<LM);
+            blen = (eBands[i+1]-eBands[i])<<LM;
+            for (j=0;j<blen;j++)
+            {
+               seed = celt_lcg_rand(seed);
+               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+            }
+            renormalise_vector(X+boffs, blen, Q15ONE);
+         }
+      }
+      st->rng = seed;
+      denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
+      c=0; do {
+         int bound = eBands[effEnd]<<LM;
+         if (downsample!=1)
+            bound = IMIN(bound, N/downsample);
+         for (i=bound;i<N;i++)
+            freq[c*N+i] = 0;
+      } while (++c<C);
+      c=0; do {
+         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+               DECODE_BUFFER_SIZE-N+(overlap>>1));
+      } while (++c<C);
+      compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+   } else {
+      /* Pitch-based PLC */
+      const opus_val16 *window;
+      opus_val16 fade = Q15ONE;
+      int pitch_index;
+      VARDECL(opus_val32, etmp);
+      VARDECL(opus_val16, exc);
+      if (loss_count == 0)
+      {
+         VARDECL( opus_val16, lp_pitch_buf );
+         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
+         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+         st->last_pitch_index = pitch_index;
+      } else {
+         pitch_index = st->last_pitch_index;
+         fade = QCONST16(.8f,15);
+      }
+      ALLOC(etmp, overlap, opus_val32);
+      ALLOC(exc, MAX_PERIOD, opus_val16);
+      window = mode->window;
+      c=0; do {
+         opus_val16 decay;
+         opus_val16 attenuation;
+         opus_val32 S1=0;
+         celt_sig *buf;
+         int extrapolation_offset;
+         int extrapolation_len;
+         int exc_length;
+         int j;
+         buf = decode_mem[c];
+         for (i=0;i<MAX_PERIOD;i++) {
+            exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+         }
+         if (loss_count == 0)
+         {
+            opus_val32 ac[LPC_ORDER+1];
+            /* Compute LPC coefficients for the last MAX_PERIOD samples before
+               the first loss so we can work in the excitation-filter domain. */
+            _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
+            /* Add a noise floor of -40 dB. */
+#ifdef FIXED_POINT
+            ac[0] += SHR32(ac[0],13);
+#else
+            ac[0] *= 1.0001f;
+#endif
+            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+            for (i=1;i<=LPC_ORDER;i++)
+            {
+               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+            }
+            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+         }
+         /* We want the excitation for 2 pitch periods in order to look for a
+            decaying signal, but we can't get more than MAX_PERIOD. */
+         exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+         /* Initialize the LPC history with the samples just before the start
+            of the region for which we're computing the excitation. */
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               lpc_mem[i] =
+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+            }
+            /* Compute the excitation for exc_length samples before the loss. */
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+         }
+         /* Check if the waveform is decaying, and if so how fast.
+            We do this to avoid adding energy when concealing in a segment
+            with decaying energy. */
+         {
+            opus_val32 E1=1, E2=1;
+            int decay_length;
+#ifdef FIXED_POINT
+            int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+            decay_length = exc_length>>1;
+            for (i=0;i<decay_length;i++)
+            {
+               opus_val16 e;
+               e = exc[MAX_PERIOD-decay_length+i];
+               E1 += SHR32(MULT16_16(e, e), shift);
+               e = exc[MAX_PERIOD-2*decay_length+i];
+               E2 += SHR32(MULT16_16(e, e), shift);
+            }
+            E1 = MIN32(E1, E2);
+            decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+         }
+         /* Move the decoder memory one frame to the left to give us room to
+            add the data for the new frame. We ignore the overlap that extends
+            past the end of the buffer, because we aren't going to use it. */
+         OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+         /* Extrapolate from the end of the excitation with a period of
+            "pitch_index", scaling down each period by an additional factor of
+            "decay". */
+         extrapolation_offset = MAX_PERIOD-pitch_index;
+         /* We need to extrapolate enough samples to cover a complete MDCT
+            window (including overlap/2 samples on both sides). */
+         extrapolation_len = N+overlap;
+         /* We also apply fading if this is not the first loss. */
+         attenuation = MULT16_16_Q15(fade, decay);
+         for (i=j=0;i<extrapolation_len;i++,j++)
+         {
+            opus_val16 tmp;
+            if (j >= pitch_index) {
+               j -= pitch_index;
+               attenuation = MULT16_16_Q15(attenuation, decay);
+            }
+            buf[DECODE_BUFFER_SIZE-N+i] =
+                  SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+                        exc[extrapolation_offset+j])), SIG_SHIFT);
+            /* Compute the energy of the previously decoded signal whose
+               excitation we're copying. */
+            tmp = ROUND16(
+                  buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+                  SIG_SHIFT);
+            S1 += SHR32(MULT16_16(tmp, tmp), 8);
+         }
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            /* Copy the last decoded samples (prior to the overlap region) to
+               synthesis filter memory so we can have a continuous signal. */
+            for (i=0;i<LPC_ORDER;i++)
+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+            /* Apply the synthesis filter to convert the excitation back into
+               the signal domain. */
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+                  lpc_mem);
+         }
+         /* Check if the synthesis energy is higher than expected, which can
+            happen with the signal changes during our window. If so,
+            attenuate. */
+         {
+            opus_val32 S2=0;
+            for (i=0;i<extrapolation_len;i++)
+            {
+               opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+               S2 += SHR32(MULT16_16(tmp, tmp), 8);
+            }
+            /* This checks for an "explosion" in the synthesis. */
+#ifdef FIXED_POINT
+            if (!(S1 > SHR32(S2,2)))
+#else
+            /* The float test is written this way to catch NaNs in the output
+               of the IIR filter at the same time. */
+            if (!(S1 > 0.2f*S2))
+#endif
+            {
+               for (i=0;i<extrapolation_len;i++)
+                  buf[DECODE_BUFFER_SIZE-N+i] = 0;
+            } else if (S1 < S2)
+            {
+               opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+               for (i=0;i<overlap;i++)
+               {
+                  opus_val16 tmp_g = Q15ONE
+                        - MULT16_16_Q15(window[i], Q15ONE-ratio);
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+               for (i=overlap;i<extrapolation_len;i++)
+               {
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+            }
+         }
+         /* Apply the pre-filter to the MDCT overlap for the next frame because
+            the post-filter will be re-applied in the decoder after the MDCT
+            overlap. */
+         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+              st->postfilter_period, st->postfilter_period, overlap,
+              -st->postfilter_gain, -st->postfilter_gain,
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+         /* Simulate TDAC on the concealed audio so that it blends with the
+            MDCT of the next frame. */
+         for (i=0;i<overlap/2;i++)
+         {
+            buf[DECODE_BUFFER_SIZE+i] =
+               MULT16_32_Q15(window[i], etmp[overlap-1-i])
+               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+         }
+      } while (++c<C);
+   }
+   deemphasis(out_syn, pcm, N, C, downsample,
+         mode->preemph, st->preemph_memD, scratch);
+   st->loss_count = loss_count+1;
+   RESTORE_STACK;
+}
+#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */
+static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */
+static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+{
+   int c, i, N;
+   int spread_decision;
+   opus_int32 bits;
+   ec_dec _dec;
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(int, fine_quant);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *out_mem[2];
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   int shortBlocks;
+   int isTransient;
+   int intra_ener;
+   const int CC = st->channels;
+   int LM, M;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int postfilter_pitch;
+   opus_val16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   opus_int32 total_bits;
+   opus_int32 balance;
+   opus_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   int C = st->stream_channels;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   ALLOC_STACK;
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   frame_size *= st->downsample;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
+   } while (++c<CC);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+#ifdef CUSTOM_MODES
+   if (st->signalling && data!=NULL)
+   {
+      int data0=data[0];
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         data0 = fromOpus(data0);
+         if (data0<0)
+            return OPUS_INVALID_PACKET;
+      }
+      st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+      LM = (data0>>3)&0x3;
+      C = 1 + ((data0>>2)&0x1);
+      data++;
+      len--;
+      if (LM>mode->maxLM)
+         return OPUS_INVALID_PACKET;
+      if (frame_size < mode->shortMdctSize<<LM)
+         return OPUS_BUFFER_TOO_SMALL;
+      else
+         frame_size = mode->shortMdctSize<<LM;
+   } else {
+#else
+   {
+#endif
+      for (LM=0;LM<=mode->maxLM;LM++)
+         if (mode->shortMdctSize<<LM==frame_size)
+            break;
+      if (LM>mode->maxLM)
+         return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+   if (len<0 || len>1275 || pcm==NULL)
+      return OPUS_BAD_ARG;
+   N = M*mode->shortMdctSize;
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+   if (data == NULL || len<=1)
+   {
+      celt_decode_lost(st, pcm, N, LM);
+      RESTORE_STACK;
+      return frame_size/st->downsample;
+   }
+   if (dec == NULL)
+   {
+      ec_dec_init(&_dec,(unsigned char*)data,len);
+      dec = &_dec;
+   }
+   if (C==1)
+   {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+   }
+   total_bits = len*8;
+   tell = ec_tell(dec);
+   if (tell >= total_bits)
+      silence = 1;
+   else if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_tell(dec);
+   }
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (st->start==0 && tell+16 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 3);
+         if (ec_tell(dec)+2<=total_bits)
+            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+      }
+      tell = ec_tell(dec);
+   }
+   if (LM > 0 && tell+3 <= total_bits)
+   {
+      isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_tell(dec);
+   }
+   else
+      isTransient = 0;
+   if (isTransient)
+      shortBlocks = M;
+   else
+      shortBlocks = 0;
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+         intra_ener, dec, C, LM);
+   ALLOC(tf_res, nbEBands, int);
+   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+   tell = ec_tell(dec);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+   ALLOC(cap, nbEBands, int);
+   init_caps(mode,cap,LM,C);
+   ALLOC(offsets, nbEBands, int);
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_tell_frac(dec);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+      {
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_tell_frac(dec);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
+      }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+   }
+   ALLOC(fine_quant, nbEBands, int);
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
+   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+   unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+   /* Decode fixed codebook */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   /**< Interleaved normalised MDCTs */
+   if (FREQ_X_BUF_SIZE >= C*N)
+      X = s_X;
+   else
+      ALLOC(X, C*N, celt_norm);
+   quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+   unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+   if (anti_collapse_on)
+      anti_collapse(mode, X, collapse_masks, LM, C, N,
+            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+   /**< Interleaved signal MDCTs */
+   if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
+      freq = s_freq;
+   else
+      ALLOC(freq, IMAX(CC,C)*N, celt_sig);
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+      for (i=0;i<C*N;i++)
+         freq[i] = 0;
+   } else {
+      /* Synthesis */
+      denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+   }
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
+   c=0; do {
+      int bound = M*eBands[effEnd];
+      if (st->downsample!=1)
+         bound = IMIN(bound, N/st->downsample);
+      for (i=bound;i<N;i++)
+         freq[c*N+i] = 0;
+   } while (++c<C);
+   c=0; do {
+      out_syn[c] = out_mem[c]+MAX_PERIOD-N;
+   } while (++c<CC);
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[N+i] = freq[i];
+   }
+   if (CC==1&&C==2)
+   {
+      for (i=0;i<N;i++)
+         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
+   }
+   /* Compute inverse MDCTs */
+   compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+            mode->window, overlap);
+      if (LM!=0)
+         comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               mode->window, overlap);
+   } while (++c<CC);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
+   if (LM!=0)
+   {
+      st->postfilter_period_old = st->postfilter_period;
+      st->postfilter_gain_old = st->postfilter_gain;
+      st->postfilter_tapset_old = st->postfilter_tapset;
+   }
+   if (C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+   /* In case start or end were to change */
+   if (!isTransient)
+   {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+      for (i=0;i<2*nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+   } else {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<2);
+   st->rng = dec->rng;
+   /* We reuse freq[] as scratch space for the de-emphasis */
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+   st->loss_count = 0;
+   RESTORE_STACK;
+   if (ec_tell(dec) > 8*len)
+      return OPUS_INTERNAL_ERROR;
+   if(ec_get_error(dec))
+      st->error = 1;
+   return frame_size/st->downsample;
+}
+#ifdef CUSTOM_MODES
+#ifdef FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, out);
+   ALLOC_STACK;
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, opus_int16);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=out[j]*(1.f/32768.f);
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, out);
+   ALLOC_STACK;
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, celt_sig);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j] = FLOAT2INT16 (out[j]);
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+#endif /* CUSTOM_MODES */
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+   va_start(ap, request);
+   switch (request)
+   {
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value=st->error;
+         st->error = 0;
+      }
+      break;
+      case OPUS_GET_LOOKAHEAD_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->overlap/st->downsample;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldLogE = oldBandE + 2*st->mode->nbEBands;
+         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+               opus_custom_decoder_get_size(st->mode, st->channels)-
+               ((char*)&st->DECODER_RESET_START - (char*)st));
+         for (i=0;i<2*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      break;
+      case OPUS_GET_PITCH_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->postfilter_period;
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+      va_end(ap);
+  return OPUS_UNIMPLEMENTED;
+}
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
index 66aed1de09..7ffe90a357 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt_lpc.h"
 #include "stack_alloc.h"
 #include "mathops.h"
+#include "pitch.h"
 void _celt_lpc(
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
@@ -87,42 +88,71 @@ int          p
 #endif
 }
-void celt_fir(const opus_val16 *x,
+void celt_fir(const opus_val16 *_x,
         const opus_val16 *num,
-         opus_val16 *y,
+         opus_val16 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
      for (j=0;j<ord;j++)
      {
-         sum += MULT16_16(num[j],mem[j]);
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
-      }
-      for (j=ord-1;j>=1;j--)
-      {
-         mem[j]=mem[j-1];
      }
-      mem[0] = x[i];
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
-      y[i] = ROUND16(sum, SIG_SHIFT);
   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
 }
-void celt_iir(const opus_val32 *x,
+void celt_iir(const opus_val32 *_x,
         const opus_val16 *den,
-         opus_val32 *y,
+         opus_val32 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
+#ifdef SMALL_FOOTPRINT
   int i,j;
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = x[i];
+      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
      {
         sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
         mem[j]=mem[j-1];
      }
      mem[0] = ROUND16(sum,SIG_SHIFT);
-      y[i] = sum;
+      _y[i] = sum;
   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
 }
-void _celt_autocorr(
+int _celt_autocorr(
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
                   const opus_val16       *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
                  )
 {
   opus_val32 d;
-   int i;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
   VARDECL(opus_val16, xx);
   SAVE_STACK;
   ALLOC(xx, n, opus_val16);
   celt_assert(n>0);
   celt_assert(overlap>=0);
-   for (i=0;i<n;i++)
+   if (overlap == 0)
-      xx[i] = x[i];
-   for (i=0;i<overlap;i++)
   {
-      xx[i] = MULT16_16_Q15(x[i],window[i]);
+      xptr = x;
-      xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
   }
+   shift=0;
 #ifdef FIXED_POINT
   {
-      opus_val32 ac0=0;
+      opus_val32 ac0;
-      int shift;
+      ac0 = 1+(n<<7);
-      for(i=0;i<n;i++)
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
-         ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
+      for(i=(n&1);i<n;i+=2)
-      ac0 += 1+n;
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
      shift = celt_ilog2(ac0)-30+10;
-      shift = (shift+1)/2;
+      shift = (shift)/2;
-      for(i=0;i<n;i++)
+      if (shift>0)
-         xx[i] = VSHR32(xx[i], shift);
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
   }
 #endif
-   while (lag>=0)
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   for (k=0;k<=lag;k++)
   {
-      for (i = lag, d = 0; i < n; i++)
+      for (i = k+fastN, d = 0; i < n; i++)
-         d += xx[i] * xx[i-lag];
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
-      ac[lag] = d;
+      ac[k] += d;
-      /*printf ("%f ", ac[lag]);*/
-      lag--;
   }
-   /*printf ("\n");*/
+#ifdef FIXED_POINT
-   ac[0] += 10;
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
   RESTORE_STACK;
+   return shift;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
index 2baa77edf8..19279a0ed6 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
@@ -48,6 +48,6 @@ void celt_iir(const opus_val32 *x,
         int ord,
         opus_val16 *mem);
-void _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
 #endif /* PLC_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h b/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h
new file mode 100644
index 0000000000..c442a55663
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2013 Nils Wallménius */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_CF_H
+#define FIXED_CF_H
+#undef MULT16_32_Q15
+static inline int32_t MULT16_32_Q15_cf(int32_t a, int32_t b)
+{
+  int32_t r;
+  asm volatile ("mac.l %[a], %[b], %%acc0;"
+                "movclr.l %%acc0, %[r];"
+                : [r] "=r" (r)
+                : [a] "r" (a<<16), [b] "r" (b)
+                : "cc");
+  return r;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_cf(a, b))
+#undef MULT32_32_Q31
+static inline int32_t MULT32_32_Q31_cf(int32_t a, int32_t b)
+{
+  int32_t r;
+  asm volatile ("mac.l %[a], %[b], %%acc0;"
+                "movclr.l %%acc0, %[r];"
+                : [r] "=r" (r)
+                : [a] "r" (a), [b] "r" (b)
+                : "cc");
+  return r;
+}
+#define MULT32_32_Q31(a, b) (MULT32_32_Q31_cf(a, b))
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h b/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h
new file mode 100644
index 0000000000..fe0fe2f3ca
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h
@@ -0,0 +1,57 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_CF_H
+#define KISS_FFT_CF_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    { \
+      asm volatile("move.l (%[bp]), %%d2;" \
+                   "clr.l %%d3;" \
+                   "move.w %%d2, %%d3;" \
+                   "swap %%d3;" \
+                   "clr.w %%d2;" \
+                   "movem.l (%[ap]), %%d0-%%d1;" \
+                   "mac.l %%d0, %%d2, %%acc0;" \
+                   "mac.l %%d1, %%d3, %%acc0;" \
+                   "mac.l %%d1, %%d2, %%acc1;" \
+                   "msac.l %%d0, %%d3, %%acc1;" \
+                   "movclr.l %%acc0, %[mr];" \
+                   "movclr.l %%acc1, %[mi];" \
+                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
+                   : [ap] "a" (&(a)), [bp] "a" (&(b)) \
+                   : "d0", "d1", "d2", "d3", "cc"); \
+    }
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_CF_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/cpu_support.h b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
new file mode 100644
index 0000000000..41481feb9c
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
@@ -0,0 +1,51 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "arm/armcpu.h"
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+#else
+#define OPUS_ARCHMASK 0
+static inline int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c
index b8ade96fce..eb8fa1c807 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.c
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c
@@ -28,14 +28,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "os_support.h"
 #include "cwrs.h"
 #include "mathops.h"
 #include "arch.h"
-#include "rate.h"
 #ifdef CUSTOM_MODES
@@ -72,64 +71,6 @@ int log2_frac(opus_uint32 val, int frac)
 }
 #endif
-#ifndef SMALL_FOOTPRINT
-#define MASK32 (0xFFFFFFFF)
-/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
-static const opus_uint32 INV_TABLE[53]={
-  0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
-  0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
-  0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
-  0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
-  0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
-  0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
-  0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
-  0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
-  0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
-  0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
-  0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
-  0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
-  0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
-  0xD8FD8FD9,
-};
-/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
-  _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
-   fits in 32 bits, but currently the table for multiplicative inverses is only
-   valid for _d<=52.*/
-static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
-  celt_assert(_d<=52);
-  return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
-}
-/*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
-  _d does not actually have to be even, but imusdiv32odd will be faster when
-   it's odd, so you should use that instead.
-  _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
-   table for multiplicative inverses is only valid for _d<=54).
-  _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
-   32 bits.*/
-static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
-  opus_uint32 inv;
-  int           mask;
-  int           shift;
-  int           one;
-  celt_assert(_d>0);
-  celt_assert(_d<=54);
-  shift=EC_ILOG(_d^(_d-1));
-  inv=INV_TABLE[(_d-1)>>shift];
-  shift--;
-  one=1<<shift;
-  mask=one-1;
-  return (_a*(_b>>shift)-(_c>>shift)+
-   ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32;
-}
-#endif /* SMALL_FOOTPRINT */
 /*Although derived separately, the pulse vector coding scheme is equivalent to
   a Pyramid Vector Quantizer \cite{Fis86}.
  Some additional notes about an early version appear at
@@ -249,46 +190,346 @@ static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
    year=1986
  }*/
-#ifndef SMALL_FOOTPRINT
+#if !defined(SMALL_FOOTPRINT)
-/*Compute U(2,_k).
-  Note that this may be called with _k=32768 (maxK[2]+1).*/
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
-static inline unsigned ucwrs2(unsigned _k){
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
-  celt_assert(_k>0);
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
-  return _k+(_k-1);
+   with K pulses allocated to it.*/
-}
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+  Thus, the number of entries in row I is the larger of the maximum number of
+   pulses we will ever allocate for a given N=I (K=128, or however many fit in
+   32 bits, whichever is smaller), plus one, and the maximum N for which
+   K=I-1 pulses fit in 32 bits.
+  The largest band size in an Opus Custom mode is 208.
+  Otherwise, we can limit things to the set of N which can be achieved by
+   splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+   44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272] ICONST_ATTR ={
+#endif
+  /*N=0, K=0...176:*/
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
+#endif
+  /*N=1, K=1...176:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1,
+#endif
+  /*N=2, K=2...176:*/
+  3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+  43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+  81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+  115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+  175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+  205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+  235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+  265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+  295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+  325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+  383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+  413, 415,
+#endif
+  /*N=3, K=3...176:*/
+  13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+  685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+  1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+  3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+  6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+  9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+  13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+  17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+  21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+  26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+  31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+  37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+  43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+  50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+  57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+  70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+  78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+  /*N=4, K=4...176:*/
+  63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+  7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+  30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+  82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+  161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+  267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+  410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+  597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+  833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+  1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+  1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+  1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+  2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+  2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+  3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+  3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+  4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+  5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+  6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+  7085049, 7207551,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+  8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+  9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+  10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+  11912575,
+#endif
+  /*N=5, K=5...176:*/
+  321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+  50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+  330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+  1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+  2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+  4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+  8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+  13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+  20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+  29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+  40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+  55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+  73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+  95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+  122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+  155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+  193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+  238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+  290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+  351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+  420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+  500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+  590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+  755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+  878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+  1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+  1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+  /*N=6, K=6...96:*/
+  1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+  335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+  2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+  11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+  29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+  64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+  128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+  235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+  402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+  655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+  1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+  1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+  2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+  /*...109:*/
+  2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+  3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+  4012305913U,
+#endif
+  /*N=7, K=7...54*/
+  8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+  1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+  19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+  88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+  292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+  793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+  1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+  /*...60:*/
+  2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+  /*N=8, K=8...37*/
+  48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+  9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+  104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+  638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+  2229491905U,
+#if defined(CUSTOM_MODES)
+  /*...40:*/
+  2691463695U, 3233240945U, 3866006015U,
+#endif
+  /*N=9, K=9...28:*/
+  265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+  39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+  628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+  /*...29:*/
+  2883810113U,
+#endif
+  /*N=10, K=10...24:*/
+  1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+  254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+  3375210671U,
+  /*N=11, K=11...19:*/
+  8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+  948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+  /*...20:*/
+  2684641785U,
+#endif
+  /*N=12, K=12...18:*/
+  45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+  3248227095U,
+  /*N=13, K=13...16:*/
+  251595969, 579168825, 1267854873, 2653649025U,
+  /*N=14, K=14:*/
+  1409933619
+};
-/*Compute V(2,_k).*/
+#if defined(CUSTOM_MODES)
-static inline opus_uint32 ncwrs2(int _k){
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
-  celt_assert(_k>0);
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
-  return 4*(opus_uint32)_k;
+  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+  CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+  CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+  CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
 }
+#endif
-/*Compute U(3,_k).
+static opus_uint32 icwrs(int _n,const int *_y){
-  Note that this may be called with _k=32768 (maxK[3]+1).*/
+  opus_uint32 i;
-static inline opus_uint32 ucwrs3(unsigned _k){
+  int         j;
-  celt_assert(_k>0);
+  int         k;
-  return (2*(opus_uint32)_k-2)*_k+1;
+  celt_assert(_n>=2);
+  j=_n-1;
+  i=_y[j]<0;
+  k=abs(_y[j]);
+  do{
+    j--;
+    i+=CELT_PVQ_U(_n-j,k);
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+  }
+  while(j>0);
+  return i;
 }
-/*Compute V(3,_k).*/
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
-static inline opus_uint32 ncwrs3(int _k){
  celt_assert(_k>0);
-  return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
+  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
 }
-/*Compute U(4,_k).*/
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
-static inline opus_uint32 ucwrs4(int _k){
+  opus_uint32 p;
+  int         s;
+  int         k0;
  celt_assert(_k>0);
-  return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
+  celt_assert(_n>1);
+  while(_n>2){
+    opus_uint32 q;
+    /*Lots of pulses case:*/
+    if(_k>=_n){
+      const opus_uint32 *row;
+      row=CELT_PVQ_U_ROW[_n];
+      /*Are the pulses in this dimension negative?*/
+      p=row[_k+1];
+      s=-(_i>=p);
+      _i-=p&s;
+      /*Count how many pulses were placed in this dimension.*/
+      k0=_k;
+      q=row[_n];
+      if(q>_i){
+        celt_assert(p>q);
+        _k=_n;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+      }
+      else for(p=row[_k];p>_i;p=row[_k])_k--;
+      _i-=p;
+      *_y++=(k0-_k+s)^s;
+    }
+    /*Lots of dimensions case:*/
+    else{
+      /*Are there any pulses in this dimension at all?*/
+      p=CELT_PVQ_U_ROW[_k][_n];
+      q=CELT_PVQ_U_ROW[_k+1][_n];
+      if(p<=_i&&_i<q){
+        _i-=p;
+        *_y++=0;
+      }
+      else{
+        /*Are the pulses in this dimension negative?*/
+        s=-(_i>=q);
+        _i-=q&s;
+        /*Count how many pulses were placed in this dimension.*/
+        k0=_k;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+        _i-=p;
+        *_y++=(k0-_k+s)^s;
+      }
+    }
+    _n--;
+  }
+  /*_n==2*/
+  p=2*_k+1;
+  s=-(_i>=p);
+  _i-=p&s;
+  k0=_k;
+  _k=(_i+1)>>1;
+  if(_k)_i-=2*_k-1;
+  *_y++=(k0-_k+s)^s;
+  /*_n==1*/
+  s=-(int)_i;
+  *_y=(_k+s)^s;
 }
-/*Compute V(4,_k).*/
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-static inline opus_uint32 ncwrs4(int _k){
+  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
-  celt_assert(_k>0);
-  return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
 }
-#endif /* SMALL_FOOTPRINT */
+#else /* SMALL_FOOTPRINT */
 /*Computes the next row/column of any recurrence that obeys the relation
   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
@@ -333,125 +574,18 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
  celt_assert(len>=3);
  _u[0]=0;
  _u[1]=um2=1;
-#ifndef SMALL_FOOTPRINT
+  /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
-  /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE,
+  /*If _n==1, _u[i] should be 1 for i>1.*/
-    but _k isn't tested here because k<=52 for n=7*/
+  celt_assert(_n>=2);
-  if(_n<=6)
+  /*If _k==0, the following do-while loop will overflow the buffer.*/
-#endif
+  celt_assert(_k>0);
- {
+  k=2;
-    /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+  do _u[k]=(k<<1)-1;
-    /*If _n==1, _u[i] should be 1 for i>1.*/
+  while(++k<len);
-    celt_assert(_n>=2);
+  for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
-    /*If _k==0, the following do-while loop will overflow the buffer.*/
-    celt_assert(_k>0);
-    k=2;
-    do _u[k]=(k<<1)-1;
-    while(++k<len);
-    for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
-  }
-#ifndef SMALL_FOOTPRINT
-  else{
-    opus_uint32 um1;
-    opus_uint32 n2m1;
-    _u[2]=n2m1=um1=(_n<<1)-1;
-    for(k=3;k<len;k++){
-      /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
-      _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
-      if(++k>=len)break;
-      _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1;
-    }
-  }
-#endif /* SMALL_FOOTPRINT */
  return _u[_k]+_u[_k+1];
 }
-#ifndef SMALL_FOOTPRINT
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 1 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
-  int s;
-  s=-(int)_i;
-  _y[0]=(_k+s)^s;
-}
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 2 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  p=ucwrs2(_k+1U);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  _k=(_i+1)>>1;
-  p=_k?ucwrs2(_k):0;
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi1(_k,_i,_y+1);
-}
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 3 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static void cwrsi3(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  p=ucwrs3(_k+1U);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
-     _i<2147418113=U(3,32768)).*/
-  _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0;
-  p=_k?ucwrs3(_k):0;
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi2(_k,_i,_y+1);
-}
-/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
-   of size 4 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static void cwrsi4(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  int           kl;
-  int           kr;
-  p=ucwrs4(_k+1);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  /*We could solve a cubic for k here, but the form of the direct solution does
-     not lend itself well to exact integer arithmetic.
-    Instead we do a binary search on U(4,K).*/
-  kl=0;
-  kr=_k;
-  for(;;){
-    _k=(kl+kr)>>1;
-    p=_k?ucwrs4(_k):0;
-    if(p<_i){
-      if(_k>=kr)break;
-      kl=_k+1;
-    }
-    else if(p>_i)kr=_k-1;
-    else break;
-  }
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi3(_k,_i,_y+1);
-}
-#endif /* SMALL_FOOTPRINT */
 /*Returns the _i'th combination of _k elements chosen from a set of size _n
   with associated sign bits.
  _y: Returns the vector of pulses.
@@ -488,55 +622,6 @@ static inline opus_uint32 icwrs1(const int *_y,int *_k){
  return _y[0]<0;
 }
-#ifndef SMALL_FOOTPRINT
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 2 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs2(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs1(_y+1,&k);
-  i+=k?ucwrs2(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs2(k+1U);
-  *_k=k;
-  return i;
-}
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 3 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs3(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs2(_y+1,&k);
-  i+=k?ucwrs3(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs3(k+1U);
-  *_k=k;
-  return i;
-}
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 4 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs4(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs3(_y+1,&k);
-  i+=k?ucwrs4(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs4(k+1);
-  *_k=k;
-  return i;
-}
-#endif /* SMALL_FOOTPRINT */
 /*Returns the index of the given combination of K elements chosen from a set
   of size _n with associated sign bits.
  _y:  The vector of pulses, whose sum of absolute values must be _k.
@@ -544,8 +629,8 @@ static inline opus_uint32 icwrs4(const int *_y,int *_k){
 static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
 opus_uint32 *_u){
  opus_uint32 i;
-  int           j;
+  int         j;
-  int           k;
+  int         k;
  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
  celt_assert(_n>=2);
  _u[0]=0;
@@ -590,58 +675,23 @@ void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
  opus_uint32 i;
+  VARDECL(opus_uint32,u);
+  opus_uint32 nc;
+  SAVE_STACK;
  celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
+  ALLOC(u,_k+2U,opus_uint32);
-  switch(_n){
+  i=icwrs(_n,_k,&nc,_y,u);
-    case 2:{
+  ec_enc_uint(_enc,i,nc);
-      i=icwrs2(_y,&_k);
+  RESTORE_STACK;
-      ec_enc_uint(_enc,i,ncwrs2(_k));
-    }break;
-    case 3:{
-      i=icwrs3(_y,&_k);
-      ec_enc_uint(_enc,i,ncwrs3(_k));
-    }break;
-    case 4:{
-      i=icwrs4(_y,&_k);
-      ec_enc_uint(_enc,i,ncwrs4(_k));
-    }break;
-     default:
-    {
-#endif
-      VARDECL(opus_uint32,u);
-      opus_uint32 nc;
-      SAVE_STACK;
-      ALLOC(u,_k+2U,opus_uint32);
-      i=icwrs(_n,_k,&nc,_y,u);
-      ec_enc_uint(_enc,i,nc);
-      RESTORE_STACK;
-#ifndef SMALL_FOOTPRINT
-    }
-    break;
-  }
-#endif
 }
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-{
+  VARDECL(opus_uint32,u);
+  SAVE_STACK;
  celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
+  ALLOC(u,_k+2U,opus_uint32);
-   switch(_n){
+  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
-    case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
+  RESTORE_STACK;
-    case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
-    case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
-    default:
-    {
-#endif
-/*      VARDECL(opus_uint32,u);
-      SAVE_STACK;
-      ALLOC(u,_k+2U,opus_uint32); */
-      opus_uint32 u[MAX_PULSES+2];
-      cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
-/*      RESTORE_STACK; */
-#ifndef SMALL_FOOTPRINT
-    }
-    break;
-  }
-#endif
 }
+#endif /* SMALL_FOOTPRINT */
diff --git a/lib/rbcodec/codecs/libopus/celt/ecintrin.h b/lib/rbcodec/codecs/libopus/celt/ecintrin.h
index 3dffa5f95c..be57dd40de 100644
--- a/lib/rbcodec/codecs/libopus/celt/ecintrin.h
+++ b/lib/rbcodec/codecs/libopus/celt/ecintrin.h
@@ -48,7 +48,7 @@
 /*Count leading zeros.
  This macro should only be used for implementing ec_ilog(), if it is defined.
  All other code should use EC_ILOG() instead.*/
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
 # include <intrin.h>
 /*In _DEBUG mode this is not an intrinsic by default.*/
 # pragma intrinsic(_BitScanReverse)
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c
index 80e64fefaa..fa5d7c7c2c 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.c
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.c
@@ -26,13 +26,18 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "entcode.h"
 #include "arch.h"
 #if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+   a BSR or CLZ instruction (see ecintrin.h).
+  If you are optimizing Opus on a new platform and it has a native CLZ or
+   BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+   an easy performance win.*/
 int ec_ilog(opus_uint32 _v){
  /*On a Pentium M, this branchless version tested as the fastest on
     1,000,000,000 random 32-bit integers, edging out a similar version with
diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c
index ff8442d534..3c264685c2 100644
--- a/lib/rbcodec/codecs/libopus/celt/entdec.c
+++ b/lib/rbcodec/codecs/libopus/celt/entdec.c
@@ -26,7 +26,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <stddef.h>
@@ -85,7 +85,7 @@
   number=3,
   pages="256--294",
   month=Jul,
-   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+   URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
  }*/
 static int ec_read_byte(ec_dec *_this){
diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c
index 0ec6e91fd7..a7e34ecef9 100644
--- a/lib/rbcodec/codecs/libopus/celt/entenc.c
+++ b/lib/rbcodec/codecs/libopus/celt/entenc.c
@@ -26,7 +26,7 @@
 */
 #if defined(HAVE_CONFIG_H)
-# include "opus_config.h"
+# include "config.h"
 #endif
 #include "os_support.h"
 #include "arch.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
index 28a1598d3e..0e77976e83 100644
--- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
@@ -42,64 +42,12 @@
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16))
-#if defined(CPU_COLDFIRE)
-static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
-{
-  int32_t r;
-  asm volatile ("mac.l %[a], %[b], %%acc0;"
-                "movclr.l %%acc0, %[r];"
-                : [r] "=r" (r)
-                : [a] "r" (a<<16), [b] "r" (b)
-                : "cc");
-  return r;
-}
-#elif defined(CPU_ARM)
-static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
-{
-  int32_t lo, hi;
-  asm volatile("smull %[lo], %[hi], %[b], %[a] \n\t"
-               "mov %[lo], %[lo], lsr #15 \n\t"
-               "orr %[hi], %[lo], %[hi], lsl #17 \n\t"
-               : [lo] "=&r" (lo), [hi] "=&r" (hi)
-               : [a] "r" (a), [b] "r" (b) );
-  return(hi);
-}
-#else
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
-#endif
-#if defined(CPU_COLDFIRE)
-static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
-{
-  int32_t r;
-  asm volatile ("mac.l %[a], %[b], %%acc0;"
-                "movclr.l %%acc0, %[r];"
-                : [r] "=r" (r)
-                : [a] "r" (a), [b] "r" (b)
-                : "cc");
-  return r;
-}
-#elif defined(CPU_ARM)
-static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
-{
-  int32_t lo, hi;
-  asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t"
-               "mov %[lo], %[lo], lsr #31 \n\t"
-               "orr %[hi], %[lo], %[hi], lsl #1 \n\t"
-               : [lo] "=&r" (lo), [hi] "=&r" (hi)
-               : [a] "r" (a), [b] "r" (b) );
-  return(hi);
-}
-#else
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
-//#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
-#define MULT32_32_Q31(a,b) (opus_val32)((((int64_t)(a)) * ((int64_t)(b)))>>31)
-#endif
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
@@ -136,6 +84,8 @@ static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
 #define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
 /** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
 #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
 /** Divide by two */
@@ -160,7 +110,9 @@ static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
 /** 16x16 multiply-add where the result fits in 32 bits */
 #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
-/** 16x32 multiply-add, followed by a 15-bit shift right. Results fits in 32 bits */
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index 01049d5344..e2b8f3b3da 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -31,7 +31,7 @@
 #ifndef SKIP_CONFIG_H
 #  ifdef HAVE_CONFIG_H
-#    include "opus_config.h"
+#    include "config.h"
 #  endif
 #endif
@@ -40,7 +40,6 @@
 #include "os_support.h"
 #include "mathops.h"
 #include "stack_alloc.h"
-#include "os_support.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
   complex numbers.  It also delares the kf_ internal functions.
@@ -145,8 +144,6 @@ static void kf_bfly4(
         C_ADDTO(*Fout, scratch[1]);
         C_ADD( scratch[3] , scratch[0] , scratch[2] );
         C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         Fout[m2].r = PSHR32(Fout[m2].r, 2);
-         Fout[m2].i = PSHR32(Fout[m2].i, 2);
         C_SUB( Fout[m2], *Fout, scratch[3] );
         tw1 += fstride;
         tw2 += fstride*2;
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
index c6bb4bfd45..66cf1f2126 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
@@ -128,7 +128,14 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
    f[k].r and f[k].i
 * */
 void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
-void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) ICODE_ATTR;
+#if defined(CPU_COLDFIRE)
+#define IFFT_ICODE ICODE_ATTR
+#else
+#define IFFT_ICODE
+#endif
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE;
 void opus_fft_free(const kiss_fft_state *cfg);
diff --git a/lib/rbcodec/codecs/libopus/celt/laplace.c b/lib/rbcodec/codecs/libopus/celt/laplace.c
index 6fa4009d57..a7bca874b6 100644
--- a/lib/rbcodec/codecs/libopus/celt/laplace.c
+++ b/lib/rbcodec/codecs/libopus/celt/laplace.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "laplace.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.c b/lib/rbcodec/codecs/libopus/celt/mathops.c
index 1af6672592..21fd942960 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.c
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.c
@@ -32,7 +32,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "mathops.h"
@@ -123,6 +123,8 @@ opus_val32 celt_sqrt(opus_val32 x)
   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
   if (x==0)
      return 0;
+   else if (x>=1073741824)
+      return 32767;
   k = (celt_ilog2(x)>>1)-7;
   x = VSHR32(x, 2*k);
   n = x-32768;
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.h b/lib/rbcodec/codecs/libopus/celt/mathops.h
index 4e97795606..44fa97c697 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.h
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.h
@@ -43,6 +43,41 @@
 unsigned isqrt32(opus_uint32 _val);
+#ifndef OVERRIDE_CELT_MAXABS16
+static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   opus_val16 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX16(maxval, x[i]);
+      minval = MIN16(minval, x[i]);
+   }
+   return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   opus_val32 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX32(maxval, x[i]);
+      minval = MIN32(minval, x[i]);
+   }
+   return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
 #ifndef FIXED_POINT
 #define PI 3.141592653f
@@ -117,27 +152,6 @@ static inline opus_int16 celt_ilog2(opus_int32 x)
 }
 #endif
-#ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val16 celt_maxabs16(opus_val16 *x, int len)
-{
-   int i;
-   opus_val16 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX16(maxval, ABS16(x[i]));
-   return maxval;
-}
-#endif
-#ifndef OVERRIDE_CELT_MAXABS32
-static inline opus_val32 celt_maxabs32(opus_val32 *x, int len)
-{
-   int i;
-   opus_val32 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX32(maxval, ABS32(x[i]));
-   return maxval;
-}
-#endif
 /** Integer log in base2. Defined for zero, but not for negative numbers */
 static inline opus_int16 celt_zlog2(opus_val32 x)
@@ -176,6 +190,13 @@ static inline opus_val16 celt_log2(opus_val32 x)
 #define D1 22804
 #define D2 14819
 #define D3 10204
+static inline opus_val32 celt_exp2_frac(opus_val16 x)
+{
+   opus_val16 frac;
+   frac = SHL16(x, 4);
+   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
 static inline opus_val32 celt_exp2(opus_val16 x)
 {
@@ -186,8 +207,7 @@ static inline opus_val32 celt_exp2(opus_val16 x)
      return 0x7f000000;
   else if (integer < -15)
      return 0;
-   frac = SHL16(x-SHL16(integer,10),4);
+   frac = celt_exp2_frac(x-SHL16(integer,10));
-   frac = ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
   return VSHR32(EXTEND32(frac), -integer-2);
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 0df77fd5ec..72ea180568 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -41,7 +41,7 @@
 #ifndef SKIP_CONFIG_H
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #endif
@@ -110,12 +110,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
   int N, N2, N4;
   kiss_twiddle_scalar sine;
   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
   SAVE_STACK;
   N = l->n;
   N >>= shift;
   N2 = N>>1;
   N4 = N>>2;
   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
   /* sin(x) ~= x here */
 #ifdef FIXED_POINT
   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -132,7 +134,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
      kiss_fft_scalar * OPUS_RESTRICT yp = f;
      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
-      for(i=0;i<(overlap>>2);i++)
+      for(i=0;i<((overlap+3)>>2);i++)
      {
         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
@@ -144,7 +146,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
      }
      wp1 = window;
      wp2 = window+overlap-1;
-      for(;i<N4-(overlap>>2);i++)
+      for(;i<N4-((overlap+3)>>2);i++)
      {
         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
         *yp++ = *xp2;
@@ -181,12 +183,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
   }
   /* N/4 complex FFT, down-scales by 4/N */
-   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in);
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
   /* Post-rotate */
   {
      /* Temp pointers to make it really clear to the compiler what we're doing */
-      const kiss_fft_scalar * OPUS_RESTRICT fp = in;
+      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
      const kiss_twiddle_scalar *t = &l->trig[0];
@@ -208,35 +210,20 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
 }
 #endif
-#define S_F_BUF_SIZE (1920>>1) /* N = 1920 for static modes */
-static kiss_fft_scalar s_f2[S_F_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR;
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
 {
   int i;
   int N, N2, N4;
-   int tstride = 1<<shift;
   kiss_twiddle_scalar sine;
-   VARDECL(kiss_fft_scalar, f);
+/*   VARDECL(kiss_fft_scalar, f2);
-   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK; */
-   SAVE_STACK;
   N = l->n;
   N >>= shift;
   N2 = N>>1;
   N4 = N>>2;
-   kiss_fft_scalar s_f[S_F_BUF_SIZE];
+/*   ALLOC(f2, N2, kiss_fft_scalar); */
+   kiss_fft_scalar f2[N2]; /* worst case 3840b */
-   if (S_F_BUF_SIZE >= N2)
-   {
-      f  = s_f;
-      f2 = s_f2;
-   }
-   else
-   {
-      ALLOC(f , N2, kiss_fft_scalar);
-      ALLOC(f2, N2, kiss_fft_scalar);
-   }
   /* sin(x) ~= x here */
 #ifdef FIXED_POINT
   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -250,102 +237,78 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
-      const kiss_twiddle_scalar *t0 = &l->trig[0];
+      const kiss_twiddle_scalar *t = &l->trig[0];
-      const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift];
      for(i=0;i<N4;i++)
      {
         kiss_fft_scalar yr, yi;
-         yr = -S_MUL(*xp2, *t0) + S_MUL(*xp1, *t1);
+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
-         yi = -S_MUL(*xp2, *t1) - S_MUL(*xp1, *t0);
+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
         /* works because the cos is nearly one */
         *yp++ = yr - S_MUL(yi,sine);
         *yp++ = yi + S_MUL(yr,sine);
         xp1+=2*stride;
         xp2-=2*stride;
-         t0 += tstride;
-         t1 -= tstride;
      }
   }
   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
-   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f);
+   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
-   /* Post-rotate */
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
   {
-      kiss_fft_scalar * OPUS_RESTRICT fp = f;
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
-      const kiss_twiddle_scalar *t0 = &l->trig[0];
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
-      const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift];
+      const kiss_twiddle_scalar *t = &l->trig[0];
-      for(i=0;i<N4;i++)
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
      {
         kiss_fft_scalar re, im, yr, yi;
-         re = fp[0];
+         kiss_twiddle_scalar t0, t1;
-         im = fp[1];
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i<<shift];
+         t1 = t[(N4-i)<<shift];
         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL(re, *t0) - S_MUL(im, *t1);
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
-         yi = S_MUL(im, *t0) + S_MUL(re, *t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         re = yp1[0];
+         im = yp1[1];
         /* works because the cos is nearly one */
-         *fp++ = yr - S_MUL(yi,sine);
+         yp0[0] = -(yr - S_MUL(yi,sine));
-         *fp++ = yi + S_MUL(yr,sine);
+         yp1[1] = yi + S_MUL(yr,sine);
-         t0 += tstride;
-         t1 -= tstride;
-      }
-   }
-   /* De-shuffle the components for the middle of the window only */
-   {
-      const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
-      const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
-      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
-      for(i = 0; i < N4; i++)
-      {
-         *yp++ =-*fp1;
-         *yp++ = *fp2;
-         fp1 += 2;
-         fp2 -= 2;
-      }
-   }
-   out -= (N2-overlap)>>1;
-   /* Mirror on both sides for TDAC */
-   {
-      kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
-      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
-      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
-      const opus_val16 * OPUS_RESTRICT wp1 = window;
-      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
-      i = N4-overlap/2;
+         t0 = t[(N4-i-1)<<shift];
-      xp1 -= N4-overlap/2;
+         t1 = t[(i+1)<<shift];
-      fp1 -= N4-overlap/2;
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-      OPUS_COPY(xp1+1, fp1+1, N4-overlap/2);
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
-      for(; i < N4; i++)
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
-      {
+         /* works because the cos is nearly one */
-         kiss_fft_scalar x1;
+         yp1[0] = -(yr - S_MUL(yi,sine));
-         x1 = *fp1--;
+         yp0[1] = yi + S_MUL(yr,sine);
-         *yp1++ +=-MULT16_32_Q15(*wp1, x1);
+         yp0 += 2;
-         *xp1-- += MULT16_32_Q15(*wp2, x1);
+         yp1 -= 2;
-         wp1++;
-         wp2--;
      }
   }
+   /* Mirror on both sides for TDAC */
   {
-      kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4;
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
-      kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
-      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
      const opus_val16 * OPUS_RESTRICT wp1 = window;
      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
-      i = N4-overlap/2;
+      for(i = 0; i < overlap/2; i++)
-      OPUS_COPY(xp2, fp2, N4-overlap/2);
-      xp2 += N4-overlap/2;
-      fp2 += N4-overlap/2;
-      for(; i < N4; i++)
      {
-         kiss_fft_scalar x2;
+         kiss_fft_scalar x1, x2;
-         x2 = *fp2++;
+         x1 = *xp1;
-         *yp2--  = MULT16_32_Q15(*wp1, x2);
+         x2 = *yp1;
-         *xp2++  = MULT16_32_Q15(*wp2, x2);
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
         wp1++;
         wp2--;
      }
   }
-   RESTORE_STACK;
+/*   RESTORE_STACK; */
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.h b/lib/rbcodec/codecs/libopus/celt/mdct.h
index 933aafcda1..d72182138a 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.h
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.h
@@ -61,11 +61,6 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
      kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 *window, int overlap, int shift, int stride);
-#if defined(CPU_COLDFIRE)
-#define MDCT_ICODE ICODE_ATTR
-#else
-#define MDCT_ICODE
-#endif
 /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
    (scales implicitly by 1/2) */
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
diff --git a/lib/rbcodec/codecs/libopus/celt/modes.c b/lib/rbcodec/codecs/libopus/celt/modes.c
index d44cb3b9de..42e68e1cb7 100644
--- a/lib/rbcodec/codecs/libopus/celt/modes.c
+++ b/lib/rbcodec/codecs/libopus/celt/modes.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt.h"
@@ -345,6 +345,14 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
   mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
   if (mode->eBands==NULL)
      goto failure;
+#if !defined(SMALL_FOOTPRINT)
+   /* Make sure we don't allocate a band larger than our PVQ table.
+      208 should be enough, but let's be paranoid. */
+   if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+    208) {
+       goto failure;
+   }
+#endif
   mode->effEBands = mode->nbEBands;
   while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index 1b7efd945d..0d8be13025 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -32,7 +32,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "pitch.h"
@@ -77,7 +77,7 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
 #ifndef FIXED_POINT
         /* Considering the range of xcorr16, this should avoid both underflows
            and overflows (inf) when squaring xcorr16 */
-         xcorr16 *= 1e-12;
+         xcorr16 *= 1e-12f;
 #endif
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
@@ -102,13 +102,57 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
   }
 }
+static void celt_fir5(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         opus_val16 *mem)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=mem[0];
+   mem1=mem[1];
+   mem2=mem[2];
+   mem3=mem[3];
+   mem4=mem[4];
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   mem[0]=mem0;
+   mem[1]=mem1;
+   mem[2]=mem2;
+   mem[3]=mem3;
+   mem[4]=mem4;
+}
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C)
 {
   int i;
   opus_val32 ac[5];
   opus_val16 tmp=Q15ONE;
-   opus_val16 lpc[4], mem[4]={0,0,0,0};
+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
 #ifdef FIXED_POINT
   int shift;
   opus_val32 maxabs = celt_maxabs32(x[0], len);
@@ -161,14 +205,89 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
   }
-   celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
-   mem[0]=0;
+#if 0 /* This is a simple version of the pitch correlation that should work
-   lpc[0]=QCONST16(.8f,12);
+         well on DSPs like Blackfin and TI C5x/C6x */
-   celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i, j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, x[j],y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
 }
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i,j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+}
+#endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                  int len, int max_pitch, int *pitch)
 {
@@ -179,8 +298,8 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   VARDECL(opus_val16, y_lp4);
   VARDECL(opus_val32, xcorr);
 #ifdef FIXED_POINT
-   opus_val32 maxcorr=1;
+   opus_val32 maxcorr;
-   opus_val16 xmax, ymax;
+   opus_val32 xmax, ymax;
   int shift=0;
 #endif
   int offset;
@@ -204,7 +323,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #ifdef FIXED_POINT
   xmax = celt_maxabs16(x_lp4, len>>2);
   ymax = celt_maxabs16(y_lp4, lag>>2);
-   shift = celt_ilog2(MAX16(1, MAX16(xmax, ymax)))-11;
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
   if (shift>0)
   {
      for (j=0;j<len>>2;j++)
@@ -220,16 +339,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   /* Coarse search with 4x decimation */
-   for (i=0;i<max_pitch>>2;i++)
-   {
-      opus_val32 sum = 0;
-      for (j=0;j<len>>2;j++)
-         sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]);
-      xcorr[i] = MAX32(-1, sum);
 #ifdef FIXED_POINT
-      maxcorr = MAX32(maxcorr, sum);
+   maxcorr =
 #endif
-   }
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
 #ifdef FIXED_POINT
                   , 0, maxcorr
@@ -288,11 +402,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   int k, i, T, T0;
   opus_val16 g, g0;
   opus_val16 pg;
-   opus_val32 xy,xx,yy;
+   opus_val32 xy,xx,yy,xy2;
   opus_val32 xcorr[3];
   opus_val32 best_xy, best_yy;
   int offset;
   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
   minperiod0 = minperiod;
   maxperiod /= 2;
@@ -305,13 +421,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      *T0_=maxperiod-1;
   T = T0 = *T0_;
-   xx=xy=yy=0;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
-   for (i=0;i<N;i++)
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
   {
-      xy = MAC16_16(xy, x[i], x[i-T0]);
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
-      xx = MAC16_16(xx, x[i], x[i]);
+      yy_lookup[i] = MAX32(0, yy);
-      yy = MAC16_16(yy, x[i-T0],x[i-T0]);
   }
+   yy = yy_lookup[T0];
   best_xy = xy;
   best_yy = yy;
 #ifdef FIXED_POINT
@@ -332,6 +451,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int T1, T1b;
      opus_val16 g1;
      opus_val16 cont=0;
+      opus_val16 thresh;
      T1 = (2*T0+k)/(2*k);
      if (T1 < minperiod)
         break;
@@ -346,15 +466,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      {
         T1b = (2*second_check[k]*T0+k)/(2*k);
      }
-      xy=yy=0;
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
-      for (i=0;i<N;i++)
+      xy += xy2;
-      {
+      yy = yy_lookup[T1] + yy_lookup[T1b];
-         xy = MAC16_16(xy, x[i], x[i-T1]);
-         yy = MAC16_16(yy, x[i-T1], x[i-T1]);
-         xy = MAC16_16(xy, x[i], x[i-T1b]);
-         yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
-      }
 #ifdef FIXED_POINT
      {
         opus_val32 x2y2;
@@ -373,7 +487,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
         cont = HALF32(prev_gain);
      else
         cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
      {
         best_xy = xy;
         best_yy = yy;
@@ -407,6 +528,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   if (*T0_<minperiod0)
      *T0_=minperiod0;
+   RESTORE_STACK;
   return pg;
 }
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h
index 2757071a6f..caffd24bc4 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.h
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.h
@@ -36,6 +36,10 @@
 #include "modes.h"
+#if defined(__SSE__) && !defined(FIXED_POINT)
+#include "x86/pitch_sse.h"
+#endif
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C);
@@ -45,4 +49,97 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int N, int *T0, int prev_period, opus_val16 prev_gain);
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+   by the prefilter and by the PLC. */
+#ifndef OVERRIDE_XCORR_KERNEL
+static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+   y_0=*y++;
+   y_1=*y++;
+   y_2=*y++;
+   for (j=0;j<len-3;j+=4)
+   {
+      opus_val16 tmp;
+      tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+      tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+      tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+      tmp=*x++;
+      y_2=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_3);
+      sum[1] = MAC16_16(sum[1],tmp,y_0);
+      sum[2] = MAC16_16(sum[2],tmp,y_1);
+      sum[3] = MAC16_16(sum[3],tmp,y_2);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+   }
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+   }
+}
+#endif /* OVERRIDE_XCORR_KERNEL */
+#ifndef OVERRIDE_DUAL_INNER_PROD
+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   for (i=0;i<N;i++)
+   {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+#endif
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/quant_bands.c b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
index 5ad5311f84..79685e17cb 100644
--- a/lib/rbcodec/codecs/libopus/celt/quant_bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "quant_bands.h"
@@ -40,8 +40,8 @@
 #include "rate.h"
 #ifdef FIXED_POINT
-/* Mean energy in each band quantized in Q6 */
+/* Mean energy in each band quantized in Q4 */
-static const signed char eMeans[25] = {
+const signed char eMeans[25] = {
      103,100, 92, 85, 81,
       77, 72, 70, 78, 75,
       73, 71, 78, 74, 69,
@@ -49,8 +49,8 @@ static const signed char eMeans[25] = {
       60, 60, 60, 60, 60
 };
 #else
-/* Mean energy in each band quantized in Q6 and converted back to float */
+/* Mean energy in each band quantized in Q4 and converted back to float */
-static const opus_val16 eMeans[25] = {
+const opus_val16 eMeans[25] = {
      6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
      4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
      4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
@@ -157,7 +157,7 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
      const opus_val16 *eBands, opus_val16 *oldEBands,
      opus_int32 budget, opus_int32 tell,
      const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
-      int C, int LM, int intra, opus_val16 max_decay)
+      int C, int LM, int intra, opus_val16 max_decay, int lfe)
 {
   int i, c;
   int badness = 0;
@@ -222,6 +222,8 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
            if (bits_left < 16)
               qi = IMAX(-1, qi);
         }
+         if (lfe && i>=2)
+            qi = IMIN(qi, 0);
         if (budget-tell >= 15)
         {
            int pi;
@@ -253,13 +255,13 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
      } while (++c < C);
   }
-   return badness;
+   return lfe ? 0 : badness;
 }
 void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
      opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
-      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate)
+      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
 {
   int intra;
   opus_val16 max_decay;
@@ -280,15 +282,17 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
   if (tell+3 > budget)
      two_pass = intra = 0;
-   /* Encode the global flags using a simple probability model
+   max_decay = QCONST16(16.f,DB_SHIFT);
-      (first symbols in the stream) */
+   if (end-start>10)
+   {
 #ifdef FIXED_POINT
-      max_decay = MIN32(QCONST16(16.f,DB_SHIFT), SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
 #else
-   max_decay = MIN32(16.f, .125f*nbAvailableBytes);
+      max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
 #endif
+   }
+   if (lfe)
+      max_decay=3;
   enc_start_state = *enc;
   ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
@@ -298,7 +302,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
   if (two_pass || intra)
   {
      badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
-            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay);
+            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
   }
   if (!intra)
@@ -325,7 +329,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      *enc = enc_start_state;
      badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
-            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay);
+            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
      if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
      {
@@ -532,25 +536,6 @@ void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *
   }
 }
-void log2Amp(const CELTMode *m, int start, int end,
-      celt_ener *eBands, const opus_val16 *oldEBands, int C)
-{
-   int c, i;
-   c=0;
-   do {
-      for (i=0;i<start;i++)
-         eBands[i+c*m->nbEBands] = 0;
-      for (;i<end;i++)
-      {
-         opus_val16 lg = ADD16(oldEBands[i+c*m->nbEBands],
-                         SHL16((opus_val16)eMeans[i],6));
-         eBands[i+c*m->nbEBands] = PSHR32(celt_exp2(lg),4);
-      }
-      for (;i<m->nbEBands;i++)
-         eBands[i+c*m->nbEBands] = 0;
-   } while (++c < C);
-}
 void amp2Log2(const CELTMode *m, int effEnd, int end,
      celt_ener *bandE, opus_val16 *bandLogE, int C)
 {
diff --git a/lib/rbcodec/codecs/libopus/celt/quant_bands.h b/lib/rbcodec/codecs/libopus/celt/quant_bands.h
index bec2855cf0..0490bca4b4 100644
--- a/lib/rbcodec/codecs/libopus/celt/quant_bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/quant_bands.h
@@ -35,6 +35,12 @@
 #include "entdec.h"
 #include "mathops.h"
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
 void amp2Log2(const CELTMode *m, int effEnd, int end,
      celt_ener *bandE, opus_val16 *bandLogE, int C);
@@ -45,7 +51,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
      opus_val16 *error, ec_enc *enc, int C, int LM,
      int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
-      int two_pass, int loss_rate);
+      int two_pass, int loss_rate, int lfe);
 void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.c b/lib/rbcodec/codecs/libopus/celt/rate.c
index 3b056d8dc7..e474cf5004 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.c
+++ b/lib/rbcodec/codecs/libopus/celt/rate.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <math.h>
@@ -248,7 +248,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
 static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
-      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev)
+      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
 {
   opus_int32 psum;
   int lo, hi;
@@ -353,7 +353,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int
 #ifdef FUZZING
            if ((rand()&0x1) == 0)
 #else
-            if (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4)
+            if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
 #endif
            {
               ec_enc_bit_logp(ec, 1, 1);
@@ -524,7 +524,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int
 }
 int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
-      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev)
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
 {
   int lo, hi, len, j;
   int codedBands;
@@ -631,7 +631,7 @@ int compute_allocation(const CELTMode *m, int start, int end, const int *offsets
   }
   codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
         total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
-         pulses, ebits, fine_priority, C, LM, ec, encode, prev);
+         pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
   RESTORE_STACK;
   return codedBands;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.h b/lib/rbcodec/codecs/libopus/celt/rate.h
index e0d5022326..263fde9820 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.h
+++ b/lib/rbcodec/codecs/libopus/celt/rate.h
@@ -96,6 +96,6 @@ static inline int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
 @return Total number of bits allocated
 */
 int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
-      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev);
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
index a6f06d2263..1c093a8cdc 100644
--- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
+++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
@@ -146,4 +146,26 @@ extern char *global_stack_top;
 #endif /* VAR_ARRAYS */
+#ifdef ENABLE_VALGRIND
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+#else
+static inline int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+#endif
 #endif /* STACK_ALLOC_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index f6b6e4fc64..af991bb052 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "mathops.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.h b/lib/rbcodec/codecs/libopus/celt/vq.h
index 1ceeeeb268..ffdc69cdc4 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.h
+++ b/lib/rbcodec/codecs/libopus/celt/vq.h
@@ -40,11 +40,9 @@
 /** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
  * the pitch and a combination of pulses such that its norm is still equal
  * to 1. This is the function that will typically require the most CPU.
- * @param x Residual signal to quantise/encode (returns quantised version)
+ * @param X Residual signal to quantise/encode (returns quantised version)
- * @param W Perceptual weight to use when optimising (currently unused)
 * @param N Number of samples to encode
 * @param K Number of pulses to use
- * @param p Pitch vector (it is assumed that p+x is a unit vector)
 * @param enc Entropy encoder state
 * @ret A mask indicating which blocks in the band received pulses
 */
@@ -56,10 +54,9 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
      );
 /** Algebraic pulse decoder
- * @param x Decoded normalised spectrum (returned)
+ * @param X Decoded normalised spectrum (returned)
 * @param N Number of samples to decode
 * @param K Number of pulses to use
- * @param p Pitch vector (automatically added to x)
 * @param dec Entropy decoder state
 * @ret A mask indicating which blocks in the band received pulses
 */