Sync opus codec to upstream git

Sync opus codec to upstream commit 02fed471a4568852d6618e041c4f2af0d7730ee2 (August 30 2013) This brings in a lot of optimizations but also makes the diff between our codec and the upstream much smaller as most of our optimizations have been upstreamed or supeceded. Speedups across the board for CELT mode files: 64kbps 128kbps H300 9.82MHz 15.48MHz c200 4.86MHz 9.63MHz fuze v1 10.32MHz 15.92MHz For the silk mode test file (16kbps) arm targets get a speedup of about 2MHz while the H300 is 7.8MHz slower, likely because it's now using the pseudostack more rather than the real stack which is in iram. Patches to get around that are upcomming. Change-Id: Ifecf963e461c51ac42e09dac1e91bc4bc3b12fa3
author: Nils Wallménius <nils@rockbox.org> 2013-05-20 22:25:57 +0200
committer: Nils Wallménius <nils@rockbox.org> 2013-08-31 08:30:51 +0200
commit: 580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree: 807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt
parent: 74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download: rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz
rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip
39 files changed, 3681 insertions, 3968 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
index 63e2548843..866a6520ca 100644
--- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
+++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
@@ -61,76 +61,9 @@
      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
-#if defined (CPU_COLDFIRE)
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile("move.l (%[bp]), %%d2;" \
-                   "clr.l %%d3;" \
-                   "move.w %%d2, %%d3;" \
-                   "swap %%d3;" \
-                   "clr.w %%d2;" \
-                   "movem.l (%[ap]), %%d0-%%d1;" \
-                   "mac.l %%d0, %%d2, %%acc0;" \
-                   "mac.l %%d1, %%d3, %%acc0;" \
-                   "mac.l %%d1, %%d2, %%acc1;" \
-                   "msac.l %%d0, %%d3, %%acc1;" \
-                   "movclr.l %%acc0, %[mr];" \
-                   "movclr.l %%acc1, %[mi];" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "a" (&(a)), [bp] "a" (&(b)) \
-                   : "d0", "d1", "d2", "d3", "cc"); \
-    }
-#elif defined(CPU_ARM)
-#if (ARM_ARCH < 5)
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile( \
-                   "ldm %[ap], {r0,r1}                  \n\t" \
-                   "ldrsh r2, [%[bp], #0]                 \n\t" \
-                   "ldrsh r3, [%[bp], #2]                 \n\t" \
-                   \
-                   "smull r4, %[mr], r0, r2               \n\t" \
-                   "smlal r4, %[mr], r1, r3               \n\t" \
-                   "mov   r4, r4, lsr #15                 \n\t" \
-                   "orr   %[mr], r4, %[mr], lsl #17       \n\t" \
-                   \
-                   "smull r4, %[mi], r1, r2               \n\t" \
-                   "rsb   r3, r3, #0                      \n\t" \
-                   "smlal r4, %[mi], r0, r3               \n\t" \
-                   "mov   r4, r4, lsr #15                 \n\t" \
-                   "orr   %[mi], r4, %[mi], lsl #17       \n\t" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "r" (&(a)), [bp] "r" (&(b)) \
-                   : "r0", "r1", "r2", "r3", "r4"); \
-}
-#else
-/*same as above but using armv5 packed multiplies*/
-#   define C_MULC(m,a,b) \
-    { \
-      asm volatile( \
-                   "ldm %[ap], {r0,r1}            \n\t" \
-                   "ldr r2, [%[bp], #0]                 \n\t" \
-                                \
-                   "smulwb r4, r0, r2               \n\t"  /*r4=a.r*b.r*/    \
-                   "smlawt %[mr], r1, r2, r4        \n\t"  /*m.r=r4+a.i*b.i*/\
-                   "mov   %[mr], %[mr], lsl #1      \n\t"  /*Q15 not Q16*/   \
-                    \
-                   "smulwb r1, r1, r2               \n\t"  /*r1=a.i*b.r*/    \
-                   "smulwt r4, r0, r2               \n\t"  /*r4=a.r*b.i*/    \
-                   "sub %[mi], r1, r4               \n\t" \
-                   "mov   %[mi], %[mi], lsl #1      \n\t" \
-                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
-                   : [ap] "r" (&(a)), [bp] "r" (&(b)) \
-                   : "r0", "r1", "r2", "r4"); \
-}
-#endif /*ARMv5 code*/
-#else
 #   define C_MULC(m,a,b) \
      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
-#endif
 #   define C_MUL4(m,a,b) \
      do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \
@@ -161,6 +94,18 @@
    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
    }while(0)
+#if defined(ARMv4_ASM)
+#include "arm/kiss_fft_armv4.h"
+#endif
+#if defined(ARMv5E_ASM)
+#include "arm/kiss_fft_armv5e.h"
+#endif
+#if defined(CF_ASM)
+#include "cf/kiss_fft_cf.h"
+#endif
 #else  /* not FIXED_POINT*/
 #   define S_MUL(a,b) ( (a)*(b) )
diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h
index 03cda40f69..c0f9413d00 100644
--- a/lib/rbcodec/codecs/libopus/celt/arch.h
+++ b/lib/rbcodec/codecs/libopus/celt/arch.h
@@ -100,6 +100,7 @@ typedef opus_val32 celt_ener;
 #define DB_SHIFT 10
 #define EPSILON 1
+#define VERY_SMALL 0
 #define VERY_LARGE16 ((opus_val16)32767)
 #define Q15_ONE ((opus_val16)32767)
@@ -112,16 +113,18 @@ typedef opus_val32 celt_ener;
 #include "fixed_generic.h"
-#ifdef ARM5E_ASM
+#ifdef ARMv5E_ASM
-#include "fixed_arm5e.h"
+#include "arm/fixed_armv5e.h"
-#elif defined (ARM4_ASM)
+#elif defined (ARMv4_ASM)
-#include "fixed_arm4.h"
+#include "arm/fixed_armv4.h"
 #elif defined (BFIN_ASM)
 #include "fixed_bfin.h"
 #elif defined (TI_C5X_ASM)
 #include "fixed_c5x.h"
 #elif defined (TI_C6X_ASM)
 #include "fixed_c6x.h"
+#elif defined (CF_ASM)
+#include "cf/fixed_cf.h"
 #endif
 #endif
@@ -140,6 +143,7 @@ typedef float celt_ener;
 #define NORM_SCALING 1.f
 #define EPSILON 1e-15f
+#define VERY_SMALL 1e-30f
 #define VERY_LARGE16 1e15f
 #define Q15_ONE ((opus_val16)1.f)
@@ -161,6 +165,7 @@ typedef float celt_ener;
 #define SHR(a,shift)    (a)
 #define SHL(a,shift)    (a)
 #define SATURATE(x,a)   (x)
+#define SATURATE16(x)   (x)
 #define ROUND16(a,shift)  (a)
 #define HALF16(x)       (.5f*(x))
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
new file mode 100644
index 0000000000..bcacc343e8
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
@@ -0,0 +1,76 @@
+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_ARMv4_H
+#define FIXED_ARMv4_H
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b),"r"(a<<16)
+  );
+  return rd_hi;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
+{
+  unsigned rd_lo;
+  int rd_hi;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smull %0, %1, %2, %3\n\t"
+      : "=&r"(rd_lo), "=&r"(rd_hi)
+      : "%r"(b), "r"(a<<16)
+  );
+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/
+  return rd_hi<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
new file mode 100644
index 0000000000..80632c4a94
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
@@ -0,0 +1,116 @@
+/* Copyright (C) 2007-2009 Xiph.Org Foundation
+   Copyright (C) 2003-2008 Jean-Marc Valin
+   Copyright (C) 2007-2008 CSIRO
+   Copyright (C) 2013      Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_ARMv5E_H
+#define FIXED_ARMv5E_H
+#include "fixed_armv4.h"
+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q16
+static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q16\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b),"r"(a)
+  );
+  return res;
+}
+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))
+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#undef MULT16_32_Q15
+static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_32_Q15\n\t"
+      "smulwb %0, %1, %2\n\t"
+      : "=r"(res)
+      : "r"(b), "r"(a)
+  );
+  return res<<1;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
+#undef MAC16_32_Q15
+static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
+ opus_val32 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_32_Q15\n\t"
+      "smlawb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(b<<1), "r"(a), "r"(c)
+  );
+  return res;
+}
+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
+/** 16x16 multiply-add where the result fits in 32 bits */
+#undef MAC16_16
+static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
+ opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MAC16_16\n\t"
+      "smlabb %0, %1, %2, %3;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b), "r"(c)
+  );
+  return res;
+}
+#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b))
+/** 16x16 multiplication where the result fits in 32 bits */
+#undef MULT16_16
+static inline opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
+{
+  int res;
+  __asm__(
+      "#MULT16_16\n\t"
+      "smulbb %0, %1, %2;\n"
+      : "=r"(res)
+      : "r"(a), "r"(b)
+  );
+  return res;
+}
+#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h
new file mode 100644
index 0000000000..e4faad6f2b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv4.h
@@ -0,0 +1,121 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_ARMv4_H
+#define KISS_FFT_ARMv4_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MUL4\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mi], r1, %[br]\n\t" \
+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mr], r0, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #17\n\t" \
+            "smlal %[br], %[mr], r1, %[bi]\n\t" \
+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \
+            "mov %[br], %[br], lsr #17\n\t" \
+            "orr %[mr], %[br], %[mr], lsl #15\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+       int br__; \
+       int bi__; \
+       int tt__; \
+        __asm__ __volatile__( \
+            "#C_MULC\n\t" \
+            "ldrsh %[br], [%[bp], #0]\n\t" \
+            "ldm %[ap], {r0,r1}\n\t" \
+            "ldrsh %[bi], [%[bp], #2]\n\t" \
+            "smull %[tt], %[mr], r0, %[br]\n\t" \
+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \
+            "rsb %[bi], %[bi], #0\n\t" \
+            "smull %[br], %[mi], r1, %[br]\n\t" \
+            "mov %[tt], %[tt], lsr #15\n\t" \
+            "smlal %[br], %[mi], r0, %[bi]\n\t" \
+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \
+            "mov %[br], %[br], lsr #15\n\t" \
+            "orr %[mi], %[br], %[mi], lsl #17\n\t" \
+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \
+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \
+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \
+            : "r0", "r1" \
+        ); \
+    } \
+    while(0)
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_ARMv4_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h
new file mode 100644
index 0000000000..9eca183d77
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/kiss_fft_armv5e.h
@@ -0,0 +1,118 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_ARMv5E_H
+#define KISS_FFT_ARMv5E_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#if defined(__thumb__)||defined(__thumb2__)
+#define LDRD_CONS "Q"
+#else
+#define LDRD_CONS "Uq"
+#endif
+#undef C_MUL
+#define C_MUL(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHL32(mi__, 1); \
+    } \
+    while(0)
+#undef C_MUL4
+#define C_MUL4(m,a,b) \
+    do{ \
+        int mr1__; \
+        int mr2__; \
+        int mi__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MUL4\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mi], %H[aval], %[bval]\n\t" \
+            "smulwb %[mr1], %[aval], %[bval]\n\t" \
+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \
+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \
+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \
+        (m).i = SHR32(mi__, 1); \
+    } \
+    while(0)
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    do{ \
+        int mr__; \
+        int mi1__; \
+        int mi2__; \
+        long long aval__; \
+        int bval__; \
+        __asm__( \
+            "#C_MULC\n\t" \
+            "ldrd %[aval], %H[aval], %[ap]\n\t" \
+            "ldr %[bval], %[bp]\n\t" \
+            "smulwb %[mr], %[aval], %[bval]\n\t" \
+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \
+            "smulwt %[mi2], %[aval], %[bval]\n\t" \
+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \
+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \
+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \
+            : [ap]LDRD_CONS(a), [bp]"m"(b) \
+        ); \
+        (m).r = SHL32(mr__, 1); \
+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \
+    } \
+    while(0)
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_GUTS_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c
index c7cb0d5500..5c715aff53 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/bands.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <math.h>
@@ -40,6 +40,23 @@
 #include "os_support.h"
 #include "mathops.h"
 #include "rate.h"
+#include "quant_bands.h"
+#include "pitch.h"
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
 opus_uint32 celt_lcg_rand(opus_uint32 seed)
 {
@@ -174,7 +191,8 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
 #endif
 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
-void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, celt_sig * OPUS_RESTRICT freq, const celt_ener *bandE, int end, int C, int M)
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M)
 {
   int i, c, N;
   const opus_int16 *eBands = m->eBands;
@@ -184,18 +202,39 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, cel
      celt_sig * OPUS_RESTRICT f;
      const celt_norm * OPUS_RESTRICT x;
      f = freq+c*N;
-      x = X+c*N;
+      x = X+c*N+M*eBands[start];
-      for (i=0;i<end;i++)
+      for (i=0;i<M*eBands[start];i++)
+         *f++ = 0;
+      for (i=start;i<end;i++)
      {
         int j, band_end;
-         opus_val32 g = SHR32(bandE[i+c*m->nbEBands],1);
+         opus_val16 g;
+         opus_val16 lg;
+#ifdef FIXED_POINT
+         int shift;
+#endif
         j=M*eBands[i];
         band_end = M*eBands[i+1];
+         lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6));
+#ifdef FIXED_POINT
+         /* Handle the integer part of the log energy */
+         shift = 16-(lg>>DB_SHIFT);
+         if (shift>31)
+         {
+            shift=0;
+            g=0;
+         } else {
+            /* Handle the fractional part. */
+            g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
+         }
+#else
+         g = celt_exp2(lg);
+#endif
         do {
-            *f++ = SHL32(MULT16_32_Q15(*x, g),2);
+            *f++ = SHR32(MULT16_16(*x++, g), shift);
-            x++;
         } while (++j<band_end);
      }
+      celt_assert(start <= end);
      for (i=M*eBands[end];i<N;i++)
         *f++ = 0;
   } while (++c<C);
@@ -347,11 +386,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N)
   opus_val32 t, lgain, rgain;
   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
-   for (j=0;j<N;j++)
+   dual_inner_prod(Y, X, Y, N, &xp, &side);
-   {
-      xp = MAC16_16(xp, X[j], Y[j]);
-      side = MAC16_16(side, Y[j], Y[j]);
-   }
   /* Compensating for the mid normalization */
   xp = MULT16_32_Q15(mid, xp);
   /* mid and side are in Q15, not Q14 like X and Y */
@@ -487,50 +522,6 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average,
 }
 #endif
-#ifdef MEASURE_NORM_MSE
-float MSE[30] = {0};
-int nbMSEBands = 0;
-int MSECount[30] = {0};
-void dump_norm_mse(void)
-{
-   int i;
-   for (i=0;i<nbMSEBands;i++)
-   {
-      printf ("%g ", MSE[i]/MSECount[i]);
-   }
-   printf ("\n");
-}
-void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C)
-{
-   static int init = 0;
-   int i;
-   if (!init)
-   {
-      atexit(dump_norm_mse);
-      init = 1;
-   }
-   for (i=0;i<m->nbEBands;i++)
-   {
-      int j;
-      int c;
-      float g;
-      if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1))
-         continue;
-      c=0; do {
-         g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]);
-         for (j=M*m->eBands[i];j<M*m->eBands[i+1];j++)
-            MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]);
-      } while (++c<C);
-      MSECount[i]+=C;
-   }
-   nbMSEBands = m->nbEBands;
-}
-#endif
 /* Indexing table for converting from natural Hadamard to ordery Hadamard
   This is essentially a bit-reversed Gray, on top of which we've added
   an inversion of the order because we want the DC at the end rather than
@@ -633,289 +624,304 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
   return qn;
 }
-/* This function is responsible for encoding and decoding a band for both
+struct band_ctx {
-   the mono and stereo case. Even in the mono case, it can split the band
+   int encode;
-   in two and transmit the energy difference with the two half-bands. It
+   const CELTMode *m;
-   can be called recursively so bands can end up being split in 8 parts. */
+   int i;
-static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y,
+   int intensity;
-      int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, ec_ctx *ec,
+   int spread;
-      opus_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level,
+   int tf_change;
-      opus_uint32 *seed, opus_val16 gain, celt_norm *lowband_scratch, int fill)
+   ec_ctx *ec;
+   opus_int32 remaining_bits;
+   const celt_ener *bandE;
+   opus_uint32 seed;
+};
+struct split_ctx {
+   int inv;
+   int imid;
+   int iside;
+   int delta;
+   int itheta;
+   int qalloc;
+};
+static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
+      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
+      int LM,
+      int stereo, int *fill)
 {
-   const unsigned char *cache;
+   int qn;
-   int q;
+   int itheta=0;
-   int curr_bits;
+   int delta;
-   int stereo, split;
+   int imid, iside;
-   int imid=0, iside=0;
+   int qalloc;
-   int N0=N;
+   int pulse_cap;
-   int N_B=N;
+   int offset;
-   int N_B0;
+   opus_int32 tell;
-   int B0=B;
+   int inv=0;
-   int time_divide=0;
+   int encode;
-   int recombine=0;
+   const CELTMode *m;
-   int inv = 0;
+   int i;
-   opus_val16 mid=0, side=0;
+   int intensity;
-   int longBlocks;
+   ec_ctx *ec;
-   unsigned cm=0;
+   const celt_ener *bandE;
-#ifdef RESYNTH
-   int resynth = 1;
+   encode = ctx->encode;
-#else
+   m = ctx->m;
-   int resynth = !encode;
+   i = ctx->i;
-#endif
+   intensity = ctx->intensity;
+   ec = ctx->ec;
+   bandE = ctx->bandE;
+   /* Decide on the resolution to give to the split parameter theta */
+   pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
+   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
+   if (stereo && i>=intensity)
+      qn = 1;
+   if (encode)
+   {
+      /* theta is the atan() of the ratio between the (normalized)
+         side and mid. With just that parameter, we can re-scale both
+         mid and side because we know that 1) they have unit norm and
+         2) they are orthogonal. */
+      itheta = stereo_itheta(X, Y, stereo, N);
+   }
+   tell = ec_tell_frac(ec);
+   if (qn!=1)
+   {
+      if (encode)
+         itheta = (itheta*qn+8192)>>14;
-   longBlocks = B0==1;
+      /* Entropy coding of the angle. We use a uniform pdf for the
+         time split, a step for stereo, and a triangular one for the rest. */
+      if (stereo && N>2)
+      {
+         int p0 = 3;
+         int x = itheta;
+         int x0 = qn/2;
+         int ft = p0*(x0+1) + x0;
+         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
+         if (encode)
+         {
+            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+         } else {
+            int fs;
+            fs=ec_decode(ec,ft);
+            if (fs<(x0+1)*p0)
+               x=fs/p0;
+            else
+               x=x0+1+(fs-(x0+1)*p0);
+            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
+            itheta = x;
+         }
+      } else if (B0>1 || stereo) {
+         /* Uniform pdf */
+         if (encode)
+            ec_enc_uint(ec, itheta, qn+1);
+         else
+            itheta = ec_dec_uint(ec, qn+1);
+      } else {
+         int fs=1, ft;
+         ft = ((qn>>1)+1)*((qn>>1)+1);
+         if (encode)
+         {
+            int fl;
-   N_B /= B;
+            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
-   N_B0 = N_B;
+            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
-   split = stereo = Y != NULL;
+            ec_encode(ec, fl, fl+fs, ft);
+         } else {
+            /* Triangular pdf */
+            int fl=0;
+            int fm;
+            fm = ec_decode(ec, ft);
-   /* Special case for one sample */
+            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
-   if (N==1)
-   {
-      int c;
-      celt_norm *x = X;
-      c=0; do {
-         int sign=0;
-         if (*remaining_bits>=1<<BITRES)
-         {
-            if (encode)
            {
-               sign = x[0]<0;
+               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
-               ec_enc_bits(ec, sign, 1);
+               fs = itheta + 1;
-            } else {
+               fl = itheta*(itheta + 1)>>1;
-               sign = ec_dec_bits(ec, 1);
+            }
+            else
+            {
+               itheta = (2*(qn + 1)
+                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
+               fs = qn + 1 - itheta;
+               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
            }
-            *remaining_bits -= 1<<BITRES;
-            b-=1<<BITRES;
-         }
-         if (resynth)
-            x[0] = sign ? -NORM_SCALING : NORM_SCALING;
-         x = Y;
-      } while (++c<1+stereo);
-      if (lowband_out)
-         lowband_out[0] = SHR16(X[0],4);
-      return 1;
-   }
-   if (!stereo && level == 0)
-   {
-      int k;
-      if (tf_change>0)
-         recombine = tf_change;
-      /* Band recombining to increase frequency resolution */
-      if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+            ec_dec_update(ec, fl, fl+fs, ft);
-      {
+         }
-         int j;
-         for (j=0;j<N;j++)
-            lowband_scratch[j] = lowband[j];
-         lowband = lowband_scratch;
      }
+      itheta = (opus_int32)itheta*16384/qn;
-      for (k=0;k<recombine;k++)
+      if (encode && stereo)
      {
-         static const unsigned char bit_interleave_table[16]={
+         if (itheta==0)
-           0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+            intensity_stereo(m, X, Y, bandE, i, N);
-         };
+         else
-         if (encode)
+            stereo_split(X, Y, N);
-            haar1(X, N>>k, 1<<k);
-         if (lowband)
-            haar1(lowband, N>>k, 1<<k);
-         fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
      }
-      B>>=recombine;
+      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
-      N_B<<=recombine;
+               Let's do that at higher complexity */
+   } else if (stereo) {
-      /* Increasing the time resolution */
+      if (encode)
-      while ((N_B&1) == 0 && tf_change<0)
      {
-         if (encode)
+         inv = itheta > 8192;
-            haar1(X, N_B, B);
+         if (inv)
-         if (lowband)
+         {
-            haar1(lowband, N_B, B);
+            int j;
-         fill |= fill<<B;
+            for (j=0;j<N;j++)
-         B <<= 1;
+               Y[j] = -Y[j];
-         N_B >>= 1;
+         }
-         time_divide++;
+         intensity_stereo(m, X, Y, bandE, i, N);
-         tf_change++;
      }
-      B0=B;
+      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
-      N_B0 = N_B;
-      /* Reorganize the samples in time order instead of frequency order */
-      if (B0>1)
      {
         if (encode)
-            deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+            ec_enc_bit_logp(ec, inv, 2);
-         if (lowband)
+         else
-            deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+            inv = ec_dec_bit_logp(ec, 2);
-      }
+      } else
+         inv = 0;
+      itheta = 0;
   }
+   qalloc = ec_tell_frac(ec) - tell;
+   *b -= qalloc;
-   /* If we need 1.5 more bit than we can produce, split the band in two. */
+   if (itheta == 0)
-   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
-   if (!stereo && LM != -1 && b > cache[cache[0]]+12 && N>2)
   {
-      N >>= 1;
+      imid = 32767;
-      Y = X+N;
+      iside = 0;
-      split = 1;
+      *fill &= (1<<B)-1;
-      LM -= 1;
+      delta = -16384;
-      if (B==1)
+   } else if (itheta == 16384)
-         fill = (fill&1)|(fill<<1);
+   {
-      B = (B+1)>>1;
+      imid = 0;
+      iside = 32767;
+      *fill &= ((1<<B)-1)<<B;
+      delta = 16384;
+   } else {
+      imid = bitexact_cos((opus_int16)itheta);
+      iside = bitexact_cos((opus_int16)(16384-itheta));
+      /* This is the mid vs side allocation that minimizes squared error
+         in that band. */
+      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
   }
-   if (split)
+   sctx->inv = inv;
-   {
+   sctx->imid = imid;
-      int qn;
+   sctx->iside = iside;
-      int itheta=0;
+   sctx->delta = delta;
-      int mbits, sbits, delta;
+   sctx->itheta = itheta;
-      int qalloc;
+   sctx->qalloc = qalloc;
-      int pulse_cap;
+}
-      int offset;
+static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
-      int orig_fill;
+      celt_norm *lowband_out)
-      opus_int32 tell;
+{
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int c;
+   int stereo;
+   celt_norm *x = X;
+   int encode;
+   ec_ctx *ec;
-      /* Decide on the resolution to give to the split parameter theta */
+   encode = ctx->encode;
-      pulse_cap = m->logN[i]+LM*(1<<BITRES);
+   ec = ctx->ec;
-      offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
-      qn = compute_qn(N, b, offset, pulse_cap, stereo);
+   stereo = Y != NULL;
-      if (stereo && i>=intensity)
+   c=0; do {
-         qn = 1;
+      int sign=0;
-      if (encode)
+      if (ctx->remaining_bits>=1<<BITRES)
-      {
-         /* theta is the atan() of the ratio between the (normalized)
-            side and mid. With just that parameter, we can re-scale both
-            mid and side because we know that 1) they have unit norm and
-            2) they are orthogonal. */
-         itheta = stereo_itheta(X, Y, stereo, N);
-      }
-      tell = ec_tell_frac(ec);
-      if (qn!=1)
      {
         if (encode)
-            itheta = (itheta*qn+8192)>>14;
-         /* Entropy coding of the angle. We use a uniform pdf for the
-            time split, a step for stereo, and a triangular one for the rest. */
-         if (stereo && N>2)
         {
-            int p0 = 3;
+            sign = x[0]<0;
-            int x = itheta;
+            ec_enc_bits(ec, sign, 1);
-            int x0 = qn/2;
-            int ft = p0*(x0+1) + x0;
-            /* Use a probability of p0 up to itheta=8192 and then use 1 after */
-            if (encode)
-            {
-               ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
-            } else {
-               int fs;
-               fs=ec_decode(ec,ft);
-               if (fs<(x0+1)*p0)
-                  x=fs/p0;
-               else
-                  x=x0+1+(fs-(x0+1)*p0);
-               ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
-               itheta = x;
-            }
-         } else if (B0>1 || stereo) {
-            /* Uniform pdf */
-            if (encode)
-               ec_enc_uint(ec, itheta, qn+1);
-            else
-               itheta = ec_dec_uint(ec, qn+1);
         } else {
-            int fs=1, ft;
+            sign = ec_dec_bits(ec, 1);
-            ft = ((qn>>1)+1)*((qn>>1)+1);
+         }
-            if (encode)
+         ctx->remaining_bits -= 1<<BITRES;
-            {
+         b-=1<<BITRES;
-               int fl;
+      }
+      if (resynth)
+         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
+      x = Y;
+   } while (++c<1+stereo);
+   if (lowband_out)
+      lowband_out[0] = SHR16(X[0],4);
+   return 1;
+}
-               fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
+/* This function is responsible for encoding and decoding a mono partition.
-               fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
+   It can split the band in two and transmit the energy difference with
-                ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
+   the two half-bands. It can be called recursively so bands can end up being
+   split in 8 parts. */
+static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM,
+      opus_val16 gain, int fill)
+{
+   const unsigned char *cache;
+   int q;
+   int curr_bits;
+   int imid=0, iside=0;
+   int N_B=N;
+   int B0=B;
+   opus_val16 mid=0, side=0;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   celt_norm *Y=NULL;
+   int encode;
+   const CELTMode *m;
+   int i;
+   int spread;
+   ec_ctx *ec;
-               ec_encode(ec, fl, fl+fs, ft);
+   encode = ctx->encode;
-            } else {
+   m = ctx->m;
-               /* Triangular pdf */
+   i = ctx->i;
-               int fl=0;
+   spread = ctx->spread;
-               int fm;
+   ec = ctx->ec;
-               fm = ec_decode(ec, ft);
-               if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
+   N_B /= B;
-               {
-                  itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
-                  fs = itheta + 1;
-                  fl = itheta*(itheta + 1)>>1;
-               }
-               else
-               {
-                  itheta = (2*(qn + 1)
-                   - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
-                  fs = qn + 1 - itheta;
-                  fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
-               }
-               ec_dec_update(ec, fl, fl+fs, ft);
+   /* If we need 1.5 more bit than we can produce, split the band in two. */
-            }
+   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
-         }
+   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
-         itheta = (opus_int32)itheta*16384/qn;
+   {
-         if (encode && stereo)
+      int mbits, sbits, delta;
-         {
+      int itheta;
-            if (itheta==0)
+      int qalloc;
-               intensity_stereo(m, X, Y, bandE, i, N);
+      struct split_ctx sctx;
-            else
+      celt_norm *next_lowband2=NULL;
-               stereo_split(X, Y, N);
+      opus_int32 rebalance;
-         }
-         /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
-                  Let's do that at higher complexity */
-      } else if (stereo) {
-         if (encode)
-         {
-            inv = itheta > 8192;
-            if (inv)
-            {
-               int j;
-               for (j=0;j<N;j++)
-                  Y[j] = -Y[j];
-            }
-            intensity_stereo(m, X, Y, bandE, i, N);
-         }
-         if (b>2<<BITRES && *remaining_bits > 2<<BITRES)
-         {
-            if (encode)
-               ec_enc_bit_logp(ec, inv, 2);
-            else
-               inv = ec_dec_bit_logp(ec, 2);
-         } else
-            inv = 0;
-         itheta = 0;
-      }
-      qalloc = ec_tell_frac(ec) - tell;
-      b -= qalloc;
-      orig_fill = fill;
+      N >>= 1;
-      if (itheta == 0)
+      Y = X+N;
-      {
+      LM -= 1;
-         imid = 32767;
+      if (B==1)
-         iside = 0;
+         fill = (fill&1)|(fill<<1);
-         fill &= (1<<B)-1;
+      B = (B+1)>>1;
-         delta = -16384;
-      } else if (itheta == 16384)
-      {
-         imid = 0;
-         iside = 32767;
-         fill &= ((1<<B)-1)<<B;
-         delta = 16384;
-      } else {
-         imid = bitexact_cos(itheta);
-         iside = bitexact_cos(16384-itheta);
-         /* This is the mid vs side allocation that minimizes squared error
-            in that band. */
-         delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
-      }
+      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
+            LM, 0, &fill);
+      imid = sctx.imid;
+      iside = sctx.iside;
+      delta = sctx.delta;
+      itheta = sctx.itheta;
+      qalloc = sctx.qalloc;
 #ifdef FIXED_POINT
      mid = imid;
      side = iside;
@@ -924,136 +930,59 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
      side = (1.f/32768)*iside;
 #endif
-      /* This is a special case for N=2 that only works for stereo and takes
+      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
-         advantage of the fact that mid and side are orthogonal to encode
+      if (B0>1 && (itheta&0x3fff))
-         the side with just one bit. */
-      if (N==2 && stereo)
      {
-         int c;
+         if (itheta > 8192)
-         int sign=0;
+            /* Rough approximation for pre-echo masking */
-         celt_norm *x2, *y2;
+            delta -= delta>>(4-LM);
-         mbits = b;
-         sbits = 0;
-         /* Only need one bit for the side */
-         if (itheta != 0 && itheta != 16384)
-            sbits = 1<<BITRES;
-         mbits -= sbits;
-         c = itheta > 8192;
-         *remaining_bits -= qalloc+sbits;
-         x2 = c ? Y : X;
-         y2 = c ? X : Y;
-         if (sbits)
-         {
-            if (encode)
-            {
-               /* Here we only need to encode a sign for the side */
-               sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
-               ec_enc_bits(ec, sign, 1);
-            } else {
-               sign = ec_dec_bits(ec, 1);
-            }
-         }
-         sign = 1-2*sign;
-         /* We use orig_fill here because we want to fold the side, but if
-             itheta==16384, we'll have cleared the low bits of fill. */
-         cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill);
-         /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
-             and there's no need to worry about mixing with the other channel. */
-         y2[0] = -sign*x2[1];
-         y2[1] = sign*x2[0];
-         if (resynth)
-         {
-            celt_norm tmp;
-            X[0] = MULT16_16_Q15(mid, X[0]);
-            X[1] = MULT16_16_Q15(mid, X[1]);
-            Y[0] = MULT16_16_Q15(side, Y[0]);
-            Y[1] = MULT16_16_Q15(side, Y[1]);
-            tmp = X[0];
-            X[0] = SUB16(tmp,Y[0]);
-            Y[0] = ADD16(tmp,Y[0]);
-            tmp = X[1];
-            X[1] = SUB16(tmp,Y[1]);
-            Y[1] = ADD16(tmp,Y[1]);
-         }
-      } else {
-         /* "Normal" split code */
-         celt_norm *next_lowband2=NULL;
-         celt_norm *next_lowband_out1=NULL;
-         int next_level=0;
-         opus_int32 rebalance;
-         /* Give more bits to low-energy MDCTs than they would otherwise deserve */
-         if (B0>1 && !stereo && (itheta&0x3fff))
-         {
-            if (itheta > 8192)
-               /* Rough approximation for pre-echo masking */
-               delta -= delta>>(4-LM);
-            else
-               /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
-               delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
-         }
-         mbits = IMAX(0, IMIN(b, (b-delta)/2));
-         sbits = b-mbits;
-         *remaining_bits -= qalloc;
-         if (lowband && !stereo)
-            next_lowband2 = lowband+N; /* >32-bit split case */
-         /* Only stereo needs to pass on lowband_out. Otherwise, it's
-            handled at the end */
-         if (stereo)
-            next_lowband_out1 = lowband_out;
         else
-            next_level = level+1;
+            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
+            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
-         rebalance = *remaining_bits;
-         if (mbits >= sbits)
-         {
-            /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
-               mid for folding later */
-            cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
-                  lowband, ec, remaining_bits, LM, next_lowband_out1,
-                  NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
-            rebalance = mbits - (rebalance-*remaining_bits);
-            if (rebalance > 3<<BITRES && itheta!=0)
-               sbits += rebalance - (3<<BITRES);
-            /* For a stereo split, the high bits of fill are always zero, so no
-               folding will be done to the side. */
-            cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
-                  next_lowband2, ec, remaining_bits, LM, NULL,
-                  NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1));
-         } else {
-            /* For a stereo split, the high bits of fill are always zero, so no
-               folding will be done to the side. */
-            cm = quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change,
-                  next_lowband2, ec, remaining_bits, LM, NULL,
-                  NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1));
-            rebalance = sbits - (rebalance-*remaining_bits);
-            if (rebalance > 3<<BITRES && itheta!=16384)
-               mbits += rebalance - (3<<BITRES);
-            /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
-               mid for folding later */
-            cm |= quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change,
-                  lowband, ec, remaining_bits, LM, next_lowband_out1,
-                  NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill);
-         }
      }
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+      if (lowband)
+         next_lowband2 = lowband+N; /* >32-bit split case */
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         cm = quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+      } else {
+         cm = quant_partition(ctx, Y, N, sbits, B,
+               next_lowband2, LM,
+               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         cm |= quant_partition(ctx, X, N, mbits, B,
+               lowband, LM,
+               MULT16_16_P15(gain,mid), fill);
+      }
   } else {
      /* This is the basic no-split case */
      q = bits2pulses(m, i, LM, b);
      curr_bits = pulses2bits(m, i, LM, q);
-      *remaining_bits -= curr_bits;
+      ctx->remaining_bits -= curr_bits;
      /* Ensures we can never bust the budget */
-      while (*remaining_bits < 0 && q > 0)
+      while (ctx->remaining_bits < 0 && q > 0)
      {
-         *remaining_bits += curr_bits;
+         ctx->remaining_bits += curr_bits;
         q--;
         curr_bits = pulses2bits(m, i, LM, q);
-         *remaining_bits -= curr_bits;
+         ctx->remaining_bits -= curr_bits;
      }
      if (q!=0)
@@ -1077,7 +1006,7 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
         if (resynth)
         {
            unsigned cm_mask;
-            /*B can be as large as 16, so this shift might overflow an int on a
+            /* B can be as large as 16, so this shift might overflow an int on a
               16-bit platform; use a long to get defined behavior.*/
            cm_mask = (unsigned)(1UL<<B)-1;
            fill &= cm_mask;
@@ -1091,8 +1020,8 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
                  /* Noise */
                  for (j=0;j<N;j++)
                  {
-                     *seed = celt_lcg_rand(*seed);
+                     ctx->seed = celt_lcg_rand(ctx->seed);
-                     X[j] = (celt_norm)((opus_int32)*seed>>20);
+                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
                  }
                  cm = cm_mask;
               } else {
@@ -1100,10 +1029,10 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
                  for (j=0;j<N;j++)
                  {
                     opus_val16 tmp;
-                     *seed = celt_lcg_rand(*seed);
+                     ctx->seed = celt_lcg_rand(ctx->seed);
                     /* About 48 dB below the "normal" folding level */
                     tmp = QCONST16(1.0f/256, 10);
-                     tmp = (*seed)&0x8000 ? tmp : -tmp;
+                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
                     X[j] = lowband[j]+tmp;
                  }
                  cm = fill;
@@ -1114,64 +1043,307 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c
      }
   }
+   return cm;
+}
+/* This function is responsible for encoding and decoding a band for the mono case. */
+static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
+      int N, int b, int B, celt_norm *lowband,
+      int LM, celt_norm *lowband_out,
+      opus_val16 gain, celt_norm *lowband_scratch, int fill)
+{
+   int N0=N;
+   int N_B=N;
+   int N_B0;
+   int B0=B;
+   int time_divide=0;
+   int recombine=0;
+   int longBlocks;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int k;
+   int encode;
+   int tf_change;
+   encode = ctx->encode;
+   tf_change = ctx->tf_change;
+   longBlocks = B0==1;
+   N_B /= B;
+   N_B0 = N_B;
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, NULL, b, lowband_out);
+   }
+   if (tf_change>0)
+      recombine = tf_change;
+   /* Band recombining to increase frequency resolution */
+   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
+   {
+      int j;
+      for (j=0;j<N;j++)
+         lowband_scratch[j] = lowband[j];
+      lowband = lowband_scratch;
+   }
+   for (k=0;k<recombine;k++)
+   {
+      static const unsigned char bit_interleave_table[16]={
+            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
+      };
+      if (encode)
+         haar1(X, N>>k, 1<<k);
+      if (lowband)
+         haar1(lowband, N>>k, 1<<k);
+      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
+   }
+   B>>=recombine;
+   N_B<<=recombine;
+   /* Increasing the time resolution */
+   while ((N_B&1) == 0 && tf_change<0)
+   {
+      if (encode)
+         haar1(X, N_B, B);
+      if (lowband)
+         haar1(lowband, N_B, B);
+      fill |= fill<<B;
+      B <<= 1;
+      N_B >>= 1;
+      time_divide++;
+      tf_change++;
+   }
+   B0=B;
+   N_B0 = N_B;
+   /* Reorganize the samples in time order instead of frequency order */
+   if (B0>1)
+   {
+      if (encode)
+         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      if (lowband)
+         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
+   }
+   cm = quant_partition(ctx, X, N, b, B, lowband,
+         LM, gain, fill);
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
   if (resynth)
   {
-      if (stereo)
+      /* Undo the sample reorganization going from time order to frequency order */
+      if (B0>1)
+         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      /* Undo time-freq changes that we did earlier */
+      N_B = N_B0;
+      B = B0;
+      for (k=0;k<time_divide;k++)
      {
-         if (N!=2)
+         B >>= 1;
-            stereo_merge(X, Y, mid, N);
+         N_B <<= 1;
-         if (inv)
+         cm |= cm>>B;
-         {
+         haar1(X, N_B, B);
-            int j;
+      }
-            for (j=0;j<N;j++)
-               Y[j] = -Y[j];
+      for (k=0;k<recombine;k++)
-         }
-      } else if (level == 0)
      {
-         int k;
+         static const unsigned char bit_deinterleave_table[16]={
+               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+         };
+         cm = bit_deinterleave_table[cm];
+         haar1(X, N0>>k, 1<<k);
+      }
+      B<<=recombine;
-         /* Undo the sample reorganization going from time order to frequency order */
+      /* Scale output for later folding */
-         if (B0>1)
+      if (lowband_out)
-            interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
+      {
+         int j;
+         opus_val16 n;
+         n = celt_sqrt(SHL32(EXTEND32(N0),22));
+         for (j=0;j<N0;j++)
+            lowband_out[j] = MULT16_16_Q15(n,X[j]);
+      }
+      cm &= (1<<B)-1;
+   }
+   return cm;
+}
-         /* Undo time-freq changes that we did earlier */
-         N_B = N_B0;
-         B = B0;
-         for (k=0;k<time_divide;k++)
-         {
-            B >>= 1;
-            N_B <<= 1;
-            cm |= cm>>B;
-            haar1(X, N_B, B);
-         }
-         for (k=0;k<recombine;k++)
+/* This function is responsible for encoding and decoding a band for the stereo case. */
-         {
+static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
-            static const unsigned char bit_deinterleave_table[16]={
+      int N, int b, int B, celt_norm *lowband,
-              0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
+      int LM, celt_norm *lowband_out,
-              0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
+      celt_norm *lowband_scratch, int fill)
-            };
+{
-            cm = bit_deinterleave_table[cm];
+   int imid=0, iside=0;
-            haar1(X, N0>>k, 1<<k);
+   int inv = 0;
-         }
+   opus_val16 mid=0, side=0;
-         B<<=recombine;
+   unsigned cm=0;
+#ifdef RESYNTH
+   int resynth = 1;
+#else
+   int resynth = !ctx->encode;
+#endif
+   int mbits, sbits, delta;
+   int itheta;
+   int qalloc;
+   struct split_ctx sctx;
+   int orig_fill;
+   int encode;
+   ec_ctx *ec;
+   encode = ctx->encode;
+   ec = ctx->ec;
+   /* Special case for one sample */
+   if (N==1)
+   {
+      return quant_band_n1(ctx, X, Y, b, lowband_out);
+   }
+   orig_fill = fill;
+   compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
+         LM, 1, &fill);
+   inv = sctx.inv;
+   imid = sctx.imid;
+   iside = sctx.iside;
+   delta = sctx.delta;
+   itheta = sctx.itheta;
+   qalloc = sctx.qalloc;
+#ifdef FIXED_POINT
+   mid = imid;
+   side = iside;
+#else
+   mid = (1.f/32768)*imid;
+   side = (1.f/32768)*iside;
+#endif
-         /* Scale output for later folding */
+   /* This is a special case for N=2 that only works for stereo and takes
-         if (lowband_out)
+      advantage of the fact that mid and side are orthogonal to encode
+      the side with just one bit. */
+   if (N==2)
+   {
+      int c;
+      int sign=0;
+      celt_norm *x2, *y2;
+      mbits = b;
+      sbits = 0;
+      /* Only need one bit for the side. */
+      if (itheta != 0 && itheta != 16384)
+         sbits = 1<<BITRES;
+      mbits -= sbits;
+      c = itheta > 8192;
+      ctx->remaining_bits -= qalloc+sbits;
+      x2 = c ? Y : X;
+      y2 = c ? X : Y;
+      if (sbits)
+      {
+         if (encode)
         {
-            int j;
+            /* Here we only need to encode a sign for the side. */
-            opus_val16 n;
+            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
-            n = celt_sqrt(SHL32(EXTEND32(N0),22));
+            ec_enc_bits(ec, sign, 1);
-            for (j=0;j<N0;j++)
+         } else {
-               lowband_out[j] = MULT16_16_Q15(n,X[j]);
+            sign = ec_dec_bits(ec, 1);
         }
-         cm &= (1<<B)-1;
+      }
+      sign = 1-2*sign;
+      /* We use orig_fill here because we want to fold the side, but if
+         itheta==16384, we'll have cleared the low bits of fill. */
+      cm = quant_band(ctx, x2, N, mbits, B, lowband,
+            LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
+      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
+         and there's no need to worry about mixing with the other channel. */
+      y2[0] = -sign*x2[1];
+      y2[1] = sign*x2[0];
+      if (resynth)
+      {
+         celt_norm tmp;
+         X[0] = MULT16_16_Q15(mid, X[0]);
+         X[1] = MULT16_16_Q15(mid, X[1]);
+         Y[0] = MULT16_16_Q15(side, Y[0]);
+         Y[1] = MULT16_16_Q15(side, Y[1]);
+         tmp = X[0];
+         X[0] = SUB16(tmp,Y[0]);
+         Y[0] = ADD16(tmp,Y[0]);
+         tmp = X[1];
+         X[1] = SUB16(tmp,Y[1]);
+         Y[1] = ADD16(tmp,Y[1]);
+      }
+   } else {
+      /* "Normal" split code */
+      opus_int32 rebalance;
+      mbits = IMAX(0, IMIN(b, (b-delta)/2));
+      sbits = b-mbits;
+      ctx->remaining_bits -= qalloc;
+      rebalance = ctx->remaining_bits;
+      if (mbits >= sbits)
+      {
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm = quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+         rebalance = mbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=0)
+            sbits += rebalance - (3<<BITRES);
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm |= quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+      } else {
+         /* For a stereo split, the high bits of fill are always zero, so no
+            folding will be done to the side. */
+         cm = quant_band(ctx, Y, N, sbits, B,
+               NULL, LM, NULL,
+               side, NULL, fill>>B);
+         rebalance = sbits - (rebalance-ctx->remaining_bits);
+         if (rebalance > 3<<BITRES && itheta!=16384)
+            mbits += rebalance - (3<<BITRES);
+         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
+            mid for folding later. */
+         cm |= quant_band(ctx, X, N, mbits, B,
+               lowband, LM, lowband_out,
+               Q15ONE, lowband_scratch, fill);
+      }
+   }
+   /* This code is used by the decoder and by the resynthesis-enabled encoder */
+   if (resynth)
+   {
+      if (N!=2)
+         stereo_merge(X, Y, mid, N);
+      if (inv)
+      {
+         int j;
+         for (j=0;j<N;j++)
+            Y[j] = -Y[j];
      }
   }
   return cm;
 }
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
@@ -1182,27 +1354,41 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
   VARDECL(celt_norm, _norm);
-   VARDECL(celt_norm, lowband_scratch);
+   celt_norm *lowband_scratch;
   int B;
   int M;
   int lowband_offset;
   int update_lowband = 1;
   int C = Y_ != NULL ? 2 : 1;
+   int norm_offset;
 #ifdef RESYNTH
   int resynth = 1;
 #else
   int resynth = !encode;
 #endif
+   struct band_ctx ctx;
   SAVE_STACK;
   M = 1<<LM;
   B = shortBlocks ? M : 1;
-   ALLOC(_norm, C*M*eBands[m->nbEBands], celt_norm);
+   norm_offset = M*eBands[start];
-   ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm);
+   /* No need to allocate norm for the last band because we don't need an
+      output in that band. */
+   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
   norm = _norm;
-   norm2 = norm + M*eBands[m->nbEBands];
+   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
+   /* We can use the last band as scratch space because we don't need that
+      scratch space for the last band. */
+   lowband_scratch = X_+M*eBands[m->nbEBands-1];
   lowband_offset = 0;
+   ctx.bandE = bandE;
+   ctx.ec = ec;
+   ctx.encode = encode;
+   ctx.intensity = intensity;
+   ctx.m = m;
+   ctx.seed = *seed;
+   ctx.spread = spread;
   for (i=start;i<end;i++)
   {
      opus_int32 tell;
@@ -1214,6 +1400,10 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      int tf_change=0;
      unsigned x_cm;
      unsigned y_cm;
+      int last;
+      ctx.i = i;
+      last = (i==end-1);
      X = X_+M*eBands[i];
      if (Y_!=NULL)
@@ -1227,6 +1417,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      if (i != start)
         balance -= tell;
      remaining_bits = total_bits-tell-1;
+      ctx.remaining_bits = remaining_bits;
      if (i <= codedBands-1)
      {
         curr_balance = balance / IMIN(3, codedBands-i);
@@ -1239,26 +1430,30 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
            lowband_offset = i;
      tf_change = tf_res[i];
+      ctx.tf_change = tf_change;
      if (i>=m->effEBands)
      {
         X=norm;
         if (Y_!=NULL)
            Y = norm;
+         lowband_scratch = NULL;
      }
+      if (i==end-1)
+         lowband_scratch = NULL;
      /* Get a conservative estimate of the collapse_mask's for the bands we're
-          going to be folding from. */
+         going to be folding from. */
      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
      {
         int fold_start;
         int fold_end;
         int fold_i;
         /* This ensures we never repeat spectral content within one band */
-         effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N);
+         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
         fold_start = lowband_offset;
-         while(M*eBands[--fold_start] > effective_lowband);
+         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
         fold_end = lowband_offset-1;
-         while(M*eBands[++fold_end] < effective_lowband+N);
+         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
         x_cm = y_cm = 0;
         fold_i = fold_start; do {
           x_cm |= collapse_masks[fold_i*C+0];
@@ -1266,7 +1461,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
         } while (++fold_i<fold_end);
      }
      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
-          always) be non-zero.*/
+         always) be non-zero. */
      else
         x_cm = y_cm = (1<<B)-1;
@@ -1274,33 +1469,42 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      {
         int j;
-         /* Switch off dual stereo to do intensity */
+         /* Switch off dual stereo to do intensity. */
         dual_stereo = 0;
         if (resynth)
-            for (j=M*eBands[start];j<M*eBands[i];j++)
+            for (j=0;j<M*eBands[i]-norm_offset;j++)
               norm[j] = HALF32(norm[j]+norm2[j]);
      }
      if (dual_stereo)
      {
-         x_cm = quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change,
+         x_cm = quant_band(&ctx, X, N, b/2, B,
-               effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM,
+               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
-               norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm);
+               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
-         y_cm = quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change,
+         y_cm = quant_band(&ctx, Y, N, b/2, B,
-               effective_lowband != -1 ? norm2+effective_lowband : NULL, ec, &remaining_bits, LM,
+               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
-               norm2+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, y_cm);
+               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
      } else {
-         x_cm = quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change,
+         if (Y!=NULL)
-               effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM,
+         {
-               norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm|y_cm);
+            x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
+         } else {
+            x_cm = quant_band(&ctx, X, N, b, B,
+                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
+                        last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
+         }
         y_cm = x_cm;
      }
      collapse_masks[i*C+0] = (unsigned char)x_cm;
      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
      balance += pulses[i] + tell;
-      /* Update the folding position only as long as we have 1 bit/sample depth */
+      /* Update the folding position only as long as we have 1 bit/sample depth. */
      update_lowband = b>(N<<BITRES);
   }
+   *seed = ctx.seed;
   RESTORE_STACK;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.h b/lib/rbcodec/codecs/libopus/celt/bands.h
index 9ff8ffd7ba..96ba52a649 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/bands.h
@@ -39,7 +39,7 @@
 /** Compute the amplitude (sqrt energy) in each of the bands
 * @param m Mode data
 * @param X Spectrum
- * @param bands Square root of the energy for each band (returned)
+ * @param bandE Square root of the energy for each band (returned)
 */
 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M);
@@ -49,16 +49,17 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
    equal to 1
 * @param m Mode data
 * @param X Spectrum (returned normalised)
- * @param bands Square root of the energy for each band
+ * @param bandE Square root of the energy for each band
 */
 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M);
 /** Denormalise each band of X to restore full amplitude
 * @param m Mode data
 * @param X Spectrum (returned de-normalised)
- * @param bands Square root of the energy for each band
+ * @param bandE Square root of the energy for each band
 */
-void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, celt_sig * OPUS_RESTRICT freq, const celt_ener *bandE, int end, int C, int M);
+void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
+      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M);
 #define SPREAD_NONE       (0)
 #define SPREAD_LIGHT      (1)
@@ -76,14 +77,30 @@ void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, floa
 void haar1(celt_norm *X, int N0, int stride);
 /** Quantisation/encoding of the residual spectrum
+ * @param encode flag that indicates whether we're encoding (1) or decoding (0)
 * @param m Mode data
+ * @param start First band to process
+ * @param end Last band to process + 1
 * @param X Residual (normalised)
+ * @param Y Residual (normalised) for second channel (or NULL for mono)
+ * @param collapse_masks Anti-collapse tracking mask
+ * @param bandE Square root of the energy for each band
+ * @param pulses Bit allocation (per band) for PVQ
+ * @param shortBlocks Zero for long blocks, non-zero for short blocks
+ * @param spread Amount of spreading to use
+ * @param dual_stereo Zero for MS stereo, non-zero for dual stereo
+ * @param intensity First band to use intensity stereo
+ * @param tf_res Time-frequency resolution change
 * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
- * @param enc Entropy encoder
+ * @param balance Number of unallocated bits
+ * @param en Entropy coder state
+ * @param LM log2() of the number of 2.5 subframes in the frame
+ * @param codedBands Last band to receive bits + 1
+ * @param seed Random generator seed
 */
 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
      celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses,
-      int time_domain, int fold, int dual_stereo, int intensity, int *tf_res,
+      int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res,
      opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed);
 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
@@ -92,4 +109,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
 opus_uint32 celt_lcg_rand(opus_uint32 seed);
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
 #endif /* BANDS_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 52a66d1b68..3e0ce6e6a5 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #define CELT_C
@@ -50,62 +50,12 @@
 #include "celt_lpc.h"
 #include "vq.h"
-#ifndef OPUS_VERSION
+#ifndef PACKAGE_VERSION
-#define OPUS_VERSION "unknown"
+#define PACKAGE_VERSION "unknown"
 #endif
-#ifdef CUSTOM_MODES
-#define OPUS_CUSTOM_NOSTATIC
-#else
-#define OPUS_CUSTOM_NOSTATIC static inline
-#endif
-static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
-/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
-static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
-static const unsigned char tapset_icdf[3]={2,1,0};
-#ifdef CUSTOM_MODES
-static const unsigned char toOpusTable[20] = {
-      0xE0, 0xE8, 0xF0, 0xF8,
-      0xC0, 0xC8, 0xD0, 0xD8,
-      0xA0, 0xA8, 0xB0, 0xB8,
-      0x00, 0x00, 0x00, 0x00,
-      0x80, 0x88, 0x90, 0x98,
-};
-static const unsigned char fromOpusTable[16] = {
-      0x80, 0x88, 0x90, 0x98,
-      0x40, 0x48, 0x50, 0x58,
-      0x20, 0x28, 0x30, 0x38,
-      0x00, 0x08, 0x10, 0x18
-};
-static inline int toOpus(unsigned char c)
-{
-   int ret=0;
-   if (c<0xA0)
-      ret = toOpusTable[c>>3];
-   if (ret == 0)
-      return -1;
-   else
-      return ret|(c&0x7);
-}
-static inline int fromOpus(unsigned char c)
+int resampling_factor(opus_int32 rate)
-{
-   if (c<0x80)
-      return -1;
-   else
-      return fromOpusTable[(c>>3)-16] | (c&0x7);
-}
-#endif /* CUSTOM_MODES */
-#define COMBFILTER_MAXPERIOD 1024
-#define COMBFILTER_MINPERIOD 15
-static int resampling_factor(opus_int32 rate)
 {
   int ret;
   switch (rate)
@@ -135,693 +85,101 @@ static int resampling_factor(opus_int32 rate)
   return ret;
 }
-/** Encoder state
+#ifndef OVERRIDE_COMB_FILTER_CONST
- @brief Encoder state
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
- */
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
-struct OpusCustomEncoder {
-   const OpusCustomMode *mode;     /**< Mode used by the encoder */
-   int overlap;
-   int channels;
-   int stream_channels;
-   int force_intra;
-   int clip;
-   int disable_pf;
-   int complexity;
-   int upsample;
-   int start, end;
-   opus_int32 bitrate;
-   int vbr;
-   int signalling;
-   int constrained_vbr;      /* If zero, VBR can do whatever it likes with the rate */
-   int loss_rate;
-   int lsb_depth;
-   /* Everything beyond this point gets cleared on a reset */
-#define ENCODER_RESET_START rng
-   opus_uint32 rng;
-   int spread_decision;
-   opus_val32 delayedIntra;
-   int tonal_average;
-   int lastCodedBands;
-   int hf_average;
-   int tapset_decision;
-   int prefilter_period;
-   opus_val16 prefilter_gain;
-   int prefilter_tapset;
-#ifdef RESYNTH
-   int prefilter_period_old;
-   opus_val16 prefilter_gain_old;
-   int prefilter_tapset_old;
-#endif
-   int consec_transient;
-   opus_val32 preemph_memE[2];
-   opus_val32 preemph_memD[2];
-   /* VBR-related parameters */
-   opus_int32 vbr_reservoir;
-   opus_int32 vbr_drift;
-   opus_int32 vbr_offset;
-   opus_int32 vbr_count;
-#ifdef RESYNTH
-   celt_sig syn_mem[2][2*MAX_PERIOD];
-#endif
-   celt_sig in_mem[1]; /* Size = channels*mode->overlap */
-   /* celt_sig prefilter_mem[],  Size = channels*COMBFILTER_PERIOD */
-   /* celt_sig overlap_mem[],  Size = channels*mode->overlap */
-   /* opus_val16 oldEBands[], Size = 2*channels*mode->nbEBands */
-};
-#if 0
-int celt_encoder_get_size(int channels)
-{
-   CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
-   return opus_custom_encoder_get_size(mode, channels);
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
-{
-   int size = sizeof(struct CELTEncoder)
-         + (2*channels*mode->overlap-1)*sizeof(celt_sig)
-         + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig)
-         + 3*channels*mode->nbEBands*sizeof(opus_val16);
-   return size;
-}
-#ifdef CUSTOM_MODES
-CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
-{
-   int ret;
-   CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
-   /* init will handle the NULL case */
-   ret = opus_custom_encoder_init(st, mode, channels);
-   if (ret != OPUS_OK)
-   {
-      opus_custom_encoder_destroy(st);
-      st = NULL;
-   }
-   if (error)
-      *error = ret;
-   return st;
-}
-#endif /* CUSTOM_MODES */
-int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->upsample = resampling_factor(sampling_rate);
-   return OPUS_OK;
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
-{
-   if (channels < 0 || channels > 2)
-      return OPUS_BAD_ARG;
-   if (st==NULL || mode==NULL)
-      return OPUS_ALLOC_FAIL;
-   OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
-   st->mode = mode;
-   st->overlap = mode->overlap;
-   st->stream_channels = st->channels = channels;
-   st->upsample = 1;
-   st->start = 0;
-   st->end = st->mode->effEBands;
-   st->signalling = 1;
-   st->constrained_vbr = 1;
-   st->clip = 1;
-   st->bitrate = OPUS_BITRATE_MAX;
-   st->vbr = 0;
-   st->force_intra  = 0;
-   st->complexity = 5;
-   st->lsb_depth=24;
-   opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
-   return OPUS_OK;
-}
-#ifdef CUSTOM_MODES
-void opus_custom_encoder_destroy(CELTEncoder *st)
-{
-   opus_free(st);
-}
-#endif /* CUSTOM_MODES */
-#endif
-static inline opus_val16 SIG2WORD16(celt_sig x)
-{
-#ifdef FIXED_POINT
-   x = PSHR32(x, SIG_SHIFT);
-   x = MAX32(x, -32768);
-   x = MIN32(x, 32767);
-   return EXTRACT16(x);
-#else
-   return (opus_val16)x;
-#endif
-}
-#if 0
-static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
-                              int overlap)
 {
+   opus_val32 x0, x1, x2, x3, x4;
   int i;
-   VARDECL(opus_val16, tmp);
+   x4 = x[-T-2];
-   opus_val32 mem0=0,mem1=0;
+   x3 = x[-T-1];
-   int is_transient = 0;
+   x2 = x[-T];
-   int block;
+   x1 = x[-T+1];
-   int N;
-   VARDECL(opus_val16, bins);
-   SAVE_STACK;
-   ALLOC(tmp, len, opus_val16);
-   block = overlap/2;
-   N=len/block;
-   ALLOC(bins, N, opus_val16);
-   if (C==1)
-   {
-      for (i=0;i<len;i++)
-         tmp[i] = SHR32(in[i],SIG_SHIFT);
-   } else {
-      for (i=0;i<len;i++)
-         tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1);
-   }
-   /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
-   for (i=0;i<len;i++)
-   {
-      opus_val32 x,y;
-      x = tmp[i];
-      y = ADD32(mem0, x);
-#ifdef FIXED_POINT
-      mem0 = mem1 + y - SHL32(x,1);
-      mem1 = x - SHR32(y,1);
-#else
-      mem0 = mem1 + y - 2*x;
-      mem1 = x - .5f*y;
-#endif
-      tmp[i] = EXTRACT16(SHR32(y,2));
-   }
-   /* First few samples are bad because we don't propagate the memory */
-   for (i=0;i<12;i++)
-      tmp[i] = 0;
   for (i=0;i<N;i++)
   {
-      int j;
+      x0=x[i-T+2];
-      opus_val16 max_abs=0;
+      y[i] = x[i]
-      for (j=0;j<block;j++)
+               + MULT16_32_Q15(g10,x2)
-         max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
+               + MULT16_32_Q15(g11,ADD32(x1,x3))
-      bins[i] = max_abs;
+               + MULT16_32_Q15(g12,ADD32(x0,x4));
-   }
+      x4=x3;
-   for (i=0;i<N;i++)
+      x3=x2;
-   {
+      x2=x1;
-      int j;
+      x1=x0;
-      int conseq=0;
-      opus_val16 t1, t2, t3;
-      t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]);
-      t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      for (j=0;j<i;j++)
-      {
-         if (bins[j] < t1)
-            conseq++;
-         if (bins[j] < t2)
-            conseq++;
-         else
-            conseq = 0;
-      }
-      if (conseq>=3)
-         is_transient=1;
-      conseq = 0;
-      for (j=i+1;j<N;j++)
-      {
-         if (bins[j] < t3)
-            conseq++;
-         else
-            conseq = 0;
-      }
-      if (conseq>=7)
-         is_transient=1;
-   }
-   RESTORE_STACK;
-#ifdef FUZZING
-   is_transient = rand()&0x1;
-#endif
-   return is_transient;
-}
-/** Apply window and compute the MDCT for all sub-frames and
-    all channels in a frame */
-static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, celt_sig * OPUS_RESTRICT out, int C, int LM)
-{
-   if (C==1 && !shortBlocks)
-   {
-      const int overlap = OVERLAP(mode);
-      clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM, 1);
-   } else {
-      const int overlap = OVERLAP(mode);
-      int N = mode->shortMdctSize<<LM;
-      int B = 1;
-      int b, c;
-      if (shortBlocks)
-      {
-         N = mode->shortMdctSize;
-         B = shortBlocks;
-      }
-      c=0; do {
-         for (b=0;b<B;b++)
-         {
-            /* Interleaving the sub-frames while doing the MDCTs */
-            clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
-         }
-      } while (++c<C);
   }
-}
-#endif
-/** Compute the IMDCT and apply window for all sub-frames and
-    all channels in a frame */
-static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
-      celt_sig * OPUS_RESTRICT out_mem[],
-      celt_sig * OPUS_RESTRICT overlap_mem[], int C, int LM)
-{
-   int c;
-   const int N = mode->shortMdctSize<<LM;
-   const int overlap = OVERLAP(mode);
-   VARDECL(opus_val32, x);
-   SAVE_STACK;
-   ALLOC(x, N+overlap, opus_val32);
-   c=0; do {
-      int j;
-      int b;
-      int N2 = N;
-      int B = 1;
-      if (shortBlocks)
-      {
-         N2 = mode->shortMdctSize;
-         B = shortBlocks;
-      }
-      /* Prevents problems from the imdct doing the overlap-add */
-      OPUS_CLEAR(x, overlap);
-      for (b=0;b<B;b++)
-      {
-         /* IMDCT on the interleaved the sub-frames */
-         clt_mdct_backward(&mode->mdct, &X[b+c*N2*B], x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
-      }
-      /* overlap can be divided by 4 */
-      for (j=0;j<overlap;j+=4)
-      {
-         out_mem[c][j  ] = x[j  ] + overlap_mem[c][j  ];
-         out_mem[c][j+1] = x[j+1] + overlap_mem[c][j+1];
-         out_mem[c][j+2] = x[j+2] + overlap_mem[c][j+2];
-         out_mem[c][j+3] = x[j+3] + overlap_mem[c][j+3];
-      }
-      OPUS_COPY(out_mem[c]+overlap, x+overlap, N-overlap);
-      OPUS_COPY(overlap_mem[c]    , x+N      ,   overlap);
-   } while (++c<C);
-   RESTORE_STACK;
 }
-static void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, /* int downsample,*/ const opus_val16 *coef, celt_sig *mem)
-{
-   int c;
-/*   int count=0;*/
-   c=0; do {
-      int j;
-      celt_sig * OPUS_RESTRICT x;
-      opus_val16  * OPUS_RESTRICT y;
-      opus_val16 coef0 = coef[0];
-#ifdef CUSTOM_MODES
-      opus_val16 coef1 = coef[1];
-      opus_val16 coef3 = coef[3];
 #endif
-      celt_sig m = mem[c];
-      x =in[c];
-      y = pcm+c;
-      for (j=0;j<N;j++)
-      {
-         celt_sig tmp = *x + m;
-         m = MULT16_32_Q15(coef0, tmp);
-#ifdef CUSTOM_MODES
-         m -= MULT16_32_Q15(coef1, *x);
-         tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
-#endif
-         x++;
-         /* Technically the store could be moved outside of the if because
-            the stores we don't want will just be overwritten */
-         /* ROCKBOX: we don't downsample
-         if (count==0) */
-            *y = SCALEOUT(SIG2WORD16(tmp));
-         /* if (++count==downsample) */
-         {
-            y+=C;
-         /*   count=0; */
-         }
-      }
-      mem[c] = m;
-   } while (++c<C);
-}
-static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap)
 {
-   /* Multiply-adds are only needed if g0 or g1 are non-zero. In all other cases a simple
+   int i;
-    * copy of vector x to y is possible. */
+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
-   if (g0!=0 || g1!=0)
+   opus_val16 g00, g01, g02, g10, g11, g12;
-   {
+   opus_val32 x0, x1, x2, x3, x4;
-      int i;
+   static const opus_val16 gains[3][3] = {
-      opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1;
-      static const opus_val16 gains[3][3] = {
         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
-      g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
-      g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
-      g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
-      g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
-      g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
-      g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
-      /* printf("g0 %d g1 %d\n", g0,g1); */
-      idx0 = -T0;
-      idx1 = -T1;
-      for (i=0;i<overlap;i++,idx0++,idx1++)
-      {
-         opus_val16 f0, f1;
-         f1 = MULT16_16_Q15(window[i],window[i]);
-         f0 = Q15ONE - f1;
-         y[i] = x[i]
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0-2])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g00), x[idx0  ])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0+2])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1-2])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1-1])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g10), x[idx1  ])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1+1])
-               + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1+2]);
-      }
-      /* No multiply-add required if g1=0 as all multiplicants are =0. */
-      if (g1!=0)
-      {
-         idx1 = overlap-T1;
-         for (i=overlap;i<N;i++,idx1++)
-         {
-            y[i] = x[i]
-                  + MULT16_32_Q15(g12, x[idx1-2])
-                  + MULT16_32_Q15(g11, x[idx1-1])
-                  + MULT16_32_Q15(g10, x[idx1  ])
-                  + MULT16_32_Q15(g11, x[idx1+1])
-                  + MULT16_32_Q15(g12, x[idx1+2]);
-         }
-      }
-      /* Only perform vector copy if source and destination are not same. */
-      else if (x != y)
-      {
-         /* Copy part of vector from x[overlap..N] to y[overlap..N] */
-         OPUS_COPY(y+overlap, x+overlap, N-overlap);
-      }
-   }
-   /* Only perform vector copy if source and destination are not same. */
-   else if (x != y)
-   {
-      /* Copy full vector from x[0..N] to y[0..N] */
-      OPUS_COPY(y, x, N);
-   }
-}
-static const signed char tf_select_table[4][8] = {
+   if (g0==0 && g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y, x, N);
+      return;
+   }
+   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   x1 = x[-T1+1];
+   x2 = x[-T1  ];
+   x3 = x[-T1-1];
+   x4 = x[-T1-2];
+   for (i=0;i<overlap;i++)
+   {
+      opus_val16 f;
+      x0=x[i-T1+2];
+      f = MULT16_16_Q15(window[i],window[i]);
+      y[i] = x[i]
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
+               + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
+               + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
+               + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+   if (g1==0)
+   {
+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
+      if (x!=y)
+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);
+      return;
+   }
+   /* Compute the part with the constant filter. */
+   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
+}
+const signed char tf_select_table[4][8] = {
      {0, -1, 0, -1,    0,-1, 0,-1},
      {0, -1, 0, -2,    1, 0, 1,-1},
      {0, -2, 0, -3,    2, 0, 1,-1},
      {0, -2, 0, -3,    3, 0, 1,-1},
 };
-#if 0
-static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
-{
-   int i, j;
-   static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)};
-   opus_val32 L1;
-   opus_val16 bias;
-   L1=0;
-   for (i=0;i<1<<LM;i++)
-   {
-      opus_val32 L2 = 0;
-      for (j=0;j<N>>LM;j++)
-         L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]);
-      L1 += celt_sqrt(L2);
-   }
-   L1 = MULT16_32_Q15(sqrtM_1[LM], L1);
-   if (width==1)
-      bias = QCONST16(.12f,15)*LM;
-   else if (width==2)
-      bias = QCONST16(.05f,15)*LM;
-   else
-      bias = QCONST16(.02f,15)*LM;
-   L1 = MAC16_32_Q15(L1, bias, L1);
-   return L1;
-}
-static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
+void init_caps(const CELTMode *m,int *cap,int LM,int C)
-      int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM,
-      int *tf_sum)
-{
-   int i;
-   VARDECL(int, metric);
-   int cost0;
-   int cost1;
-   VARDECL(int, path0);
-   VARDECL(int, path1);
-   VARDECL(celt_norm, tmp);
-   int lambda;
-   int tf_select=0;
-   SAVE_STACK;
-   if (nbCompressedBytes<15*C)
-   {
-      *tf_sum = 0;
-      for (i=0;i<len;i++)
-         tf_res[i] = isTransient;
-      return 0;
-   }
-   if (nbCompressedBytes<40)
-      lambda = 12;
-   else if (nbCompressedBytes<60)
-      lambda = 6;
-   else if (nbCompressedBytes<100)
-      lambda = 4;
-   else
-      lambda = 3;
-   ALLOC(metric, len, int);
-   ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
-   ALLOC(path0, len, int);
-   ALLOC(path1, len, int);
-   *tf_sum = 0;
-   for (i=0;i<len;i++)
-   {
-      int j, k, N;
-      opus_val32 L1, best_L1;
-      int best_level=0;
-      N = (m->eBands[i+1]-m->eBands[i])<<LM;
-      for (j=0;j<N;j++)
-         tmp[j] = X[j+(m->eBands[i]<<LM)];
-      /* Just add the right channel if we're in stereo */
-      if (C==2)
-         for (j=0;j<N;j++)
-            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));
-      L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM);
-      best_L1 = L1;
-      /*printf ("%f ", L1);*/
-      for (k=0;k<LM;k++)
-      {
-         int B;
-         if (isTransient)
-            B = (LM-k-1);
-         else
-            B = k+1;
-         if (isTransient)
-            haar1(tmp, N>>(LM-k), 1<<(LM-k));
-         else
-            haar1(tmp, N>>k, 1<<k);
-         L1 = l1_metric(tmp, N, B, N>>LM);
-         if (L1 < best_L1)
-         {
-            best_L1 = L1;
-            best_level = k+1;
-         }
-      }
-      /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
-      if (isTransient)
-         metric[i] = best_level;
-      else
-         metric[i] = -best_level;
-      *tf_sum += metric[i];
-   }
-   /*printf("\n");*/
-   /* NOTE: Future optimized implementations could detect extreme transients and set
-      tf_select = 1 but so far we have not found a reliable way of making this useful */
-   tf_select = 0;
-   cost0 = 0;
-   cost1 = isTransient ? 0 : lambda;
-   /* Viterbi forward pass */
-   for (i=1;i<len;i++)
-   {
-      int curr0, curr1;
-      int from0, from1;
-      from0 = cost0;
-      from1 = cost1 + lambda;
-      if (from0 < from1)
-      {
-         curr0 = from0;
-         path0[i]= 0;
-      } else {
-         curr0 = from1;
-         path0[i]= 1;
-      }
-      from0 = cost0 + lambda;
-      from1 = cost1;
-      if (from0 < from1)
-      {
-         curr1 = from0;
-         path1[i]= 0;
-      } else {
-         curr1 = from1;
-         path1[i]= 1;
-      }
-      cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]);
-      cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]);
-   }
-   tf_res[len-1] = cost0 < cost1 ? 0 : 1;
-   /* Viterbi backward pass to check the decisions */
-   for (i=len-2;i>=0;i--)
-   {
-      if (tf_res[i+1] == 1)
-         tf_res[i] = path1[i+1];
-      else
-         tf_res[i] = path0[i+1];
-   }
-   RESTORE_STACK;
-#ifdef FUZZING
-   tf_select = rand()&0x1;
-   tf_res[0] = rand()&0x1;
-   for (i=1;i<len;i++)
-      tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
-#endif
-   return tf_select;
-}
-static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
-{
-   int curr, i;
-   int tf_select_rsv;
-   int tf_changed;
-   int logp;
-   opus_uint32 budget;
-   opus_uint32 tell;
-   budget = enc->storage*8;
-   tell = ec_tell(enc);
-   logp = isTransient ? 2 : 4;
-   /* Reserve space to code the tf_select decision. */
-   tf_select_rsv = LM>0 && tell+logp+1 <= budget;
-   budget -= tf_select_rsv;
-   curr = tf_changed = 0;
-   for (i=start;i<end;i++)
-   {
-      if (tell+logp<=budget)
-      {
-         ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
-         tell = ec_tell(enc);
-         curr = tf_res[i];
-         tf_changed |= curr;
-      }
-      else
-         tf_res[i] = curr;
-      logp = isTransient ? 4 : 5;
-   }
-   /* Only code tf_select if it would actually make a difference. */
-   if (tf_select_rsv &&
-         tf_select_table[LM][4*isTransient+0+tf_changed]!=
-         tf_select_table[LM][4*isTransient+2+tf_changed])
-      ec_enc_bit_logp(enc, tf_select, 1);
-   else
-      tf_select = 0;
-   for (i=start;i<end;i++)
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
-   /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
-}
-#endif
-static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
-{
-   int i, curr, tf_select;
-   int tf_select_rsv;
-   int tf_changed;
-   int logp;
-   opus_uint32 budget;
-   opus_uint32 tell;
-   budget = dec->storage*8;
-   tell = ec_tell(dec);
-   logp = isTransient ? 2 : 4;
-   tf_select_rsv = LM>0 && tell+logp+1<=budget;
-   budget -= tf_select_rsv;
-   tf_changed = curr = 0;
-   for (i=start;i<end;i++)
-   {
-      if (tell+logp<=budget)
-      {
-         curr ^= ec_dec_bit_logp(dec, logp);
-         tell = ec_tell(dec);
-         tf_changed |= curr;
-      }
-      tf_res[i] = curr;
-      logp = isTransient ? 4 : 5;
-   }
-   tf_select = 0;
-   if (tf_select_rsv &&
-     tf_select_table[LM][4*isTransient+0+tf_changed] !=
-     tf_select_table[LM][4*isTransient+2+tf_changed])
-   {
-      tf_select = ec_dec_bit_logp(dec, 1);
-   }
-   for (i=start;i<end;i++)
-   {
-      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
-   }
-}
-static void init_caps(const CELTMode *m,int *cap,int LM,int C)
 {
   int i;
   for (i=0;i<m->nbEBands;i++)
@@ -832,2070 +190,6 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
   }
 }
-#if 0
-static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
-      const opus_val16 *bandLogE, int end, int LM, int C, int N0)
-{
-   int i;
-   opus_val32 diff=0;
-   int c;
-   int trim_index = 5;
-   if (C==2)
-   {
-      opus_val16 sum = 0; /* Q10 */
-      /* Compute inter-channel correlation for low frequencies */
-      for (i=0;i<8;i++)
-      {
-         int j;
-         opus_val32 partial = 0;
-         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-            partial = MAC16_16(partial, X[j], X[N0+j]);
-         sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
-      }
-      sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
-      /*printf ("%f\n", sum);*/
-      if (sum > QCONST16(.995f,10))
-         trim_index-=4;
-      else if (sum > QCONST16(.92f,10))
-         trim_index-=3;
-      else if (sum > QCONST16(.85f,10))
-         trim_index-=2;
-      else if (sum > QCONST16(.8f,10))
-         trim_index-=1;
-   }
-   /* Estimate spectral tilt */
-   c=0; do {
-      for (i=0;i<end-1;i++)
-      {
-         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands);
-      }
-   } while (++c<C);
-   /* We divide by two here to avoid making the tilt larger for stereo as a
-      result of a bug in the loop above */
-   diff /= 2*C*(end-1);
-   /*printf("%f\n", diff);*/
-   if (diff > QCONST16(2.f, DB_SHIFT))
-      trim_index--;
-   if (diff > QCONST16(8.f, DB_SHIFT))
-      trim_index--;
-   if (diff < -QCONST16(4.f, DB_SHIFT))
-      trim_index++;
-   if (diff < -QCONST16(10.f, DB_SHIFT))
-      trim_index++;
-   if (trim_index<0)
-      trim_index = 0;
-   if (trim_index>10)
-      trim_index = 10;
-#ifdef FUZZING
-   trim_index = rand()%11;
-#endif
-   return trim_index;
-}
-static int stereo_analysis(const CELTMode *m, const celt_norm *X,
-      int LM, int N0)
-{
-   int i;
-   int thetas;
-   opus_val32 sumLR = EPSILON, sumMS = EPSILON;
-   /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
-   for (i=0;i<13;i++)
-   {
-      int j;
-      for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
-      {
-         opus_val32 L, R, M, S;
-         /* We cast to 32-bit first because of the -32768 case */
-         L = EXTEND32(X[j]);
-         R = EXTEND32(X[N0+j]);
-         M = ADD32(L, R);
-         S = SUB32(L, R);
-         sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
-         sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
-      }
-   }
-   sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
-   thetas = 13;
-   /* We don't need thetas for lower bands with LM<=1 */
-   if (LM<=1)
-      thetas -= 8;
-   return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
-         > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
-}
-int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
-{
-   int i, c, N;
-   opus_int32 bits;
-   ec_enc _enc;
-   VARDECL(celt_sig, in);
-   VARDECL(celt_sig, freq);
-   VARDECL(celt_norm, X);
-   VARDECL(celt_ener, bandE);
-   VARDECL(opus_val16, bandLogE);
-   VARDECL(int, fine_quant);
-   VARDECL(opus_val16, error);
-   VARDECL(int, pulses);
-   VARDECL(int, cap);
-   VARDECL(int, offsets);
-   VARDECL(int, fine_priority);
-   VARDECL(int, tf_res);
-   VARDECL(unsigned char, collapse_masks);
-   celt_sig *prefilter_mem;
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2;
-   int shortBlocks=0;
-   int isTransient=0;
-   const int CC = st->channels;
-   const int C = st->stream_channels;
-   int LM, M;
-   int tf_select;
-   int nbFilledBytes, nbAvailableBytes;
-   int effEnd;
-   int codedBands;
-   int tf_sum;
-   int alloc_trim;
-   int pitch_index=COMBFILTER_MINPERIOD;
-   opus_val16 gain1 = 0;
-   int intensity=0;
-   int dual_stereo=0;
-   int effectiveBytes;
-   opus_val16 pf_threshold;
-   int dynalloc_logp;
-   opus_int32 vbr_rate;
-   opus_int32 total_bits;
-   opus_int32 total_boost;
-   opus_int32 balance;
-   opus_int32 tell;
-   int prefilter_tapset=0;
-   int pf_on;
-   int anti_collapse_rsv;
-   int anti_collapse_on=0;
-   int silence=0;
-   ALLOC_STACK;
-   if (nbCompressedBytes<2 || pcm==NULL)
-     return OPUS_BAD_ARG;
-   frame_size *= st->upsample;
-   for (LM=0;LM<=st->mode->maxLM;LM++)
-      if (st->mode->shortMdctSize<<LM==frame_size)
-         break;
-   if (LM>st->mode->maxLM)
-      return OPUS_BAD_ARG;
-   M=1<<LM;
-   N = M*st->mode->shortMdctSize;
-   prefilter_mem = st->in_mem+CC*(st->overlap);
-   oldBandE = (opus_val16*)(st->in_mem+CC*(2*st->overlap+COMBFILTER_MAXPERIOD));
-   oldLogE = oldBandE + CC*st->mode->nbEBands;
-   oldLogE2 = oldLogE + CC*st->mode->nbEBands;
-   if (enc==NULL)
-   {
-      tell=1;
-      nbFilledBytes=0;
-   } else {
-      tell=ec_tell(enc);
-      nbFilledBytes=(tell+4)>>3;
-   }
-#ifdef CUSTOM_MODES
-   if (st->signalling && enc==NULL)
-   {
-      int tmp = (st->mode->effEBands-st->end)>>1;
-      st->end = IMAX(1, st->mode->effEBands-tmp);
-      compressed[0] = tmp<<5;
-      compressed[0] |= LM<<3;
-      compressed[0] |= (C==2)<<2;
-      /* Convert "standard mode" to Opus header */
-      if (st->mode->Fs==48000 && st->mode->shortMdctSize==120)
-      {
-         int c0 = toOpus(compressed[0]);
-         if (c0<0)
-            return OPUS_BAD_ARG;
-         compressed[0] = c0;
-      }
-      compressed++;
-      nbCompressedBytes--;
-   }
-#else
-   celt_assert(st->signalling==0);
-#endif
-   /* Can't produce more than 1275 output bytes */
-   nbCompressedBytes = IMIN(nbCompressedBytes,1275);
-   nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
-   if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
-   {
-      opus_int32 den=st->mode->Fs>>BITRES;
-      vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
-#ifdef CUSTOM_MODES
-      if (st->signalling)
-         vbr_rate -= 8<<BITRES;
-#endif
-      effectiveBytes = vbr_rate>>(3+BITRES);
-   } else {
-      opus_int32 tmp;
-      vbr_rate = 0;
-      tmp = st->bitrate*frame_size;
-      if (tell>1)
-         tmp += tell;
-      if (st->bitrate!=OPUS_BITRATE_MAX)
-         nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
-               (tmp+4*st->mode->Fs)/(8*st->mode->Fs)-!!st->signalling));
-      effectiveBytes = nbCompressedBytes;
-   }
-   if (enc==NULL)
-   {
-      ec_enc_init(&_enc, compressed, nbCompressedBytes);
-      enc = &_enc;
-   }
-   if (vbr_rate>0)
-   {
-      /* Computes the max bit-rate allowed in VBR mode to avoid violating the
-          target rate and buffering.
-         We must do this up front so that bust-prevention logic triggers
-          correctly if we don't have enough bits. */
-      if (st->constrained_vbr)
-      {
-         opus_int32 vbr_bound;
-         opus_int32 max_allowed;
-         /* We could use any multiple of vbr_rate as bound (depending on the
-             delay).
-            This is clamped to ensure we use at least two bytes if the encoder
-             was entirely empty, but to allow 0 in hybrid mode. */
-         vbr_bound = vbr_rate;
-         max_allowed = IMIN(IMAX(tell==1?2:0,
-               (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
-               nbAvailableBytes);
-         if(max_allowed < nbAvailableBytes)
-         {
-            nbCompressedBytes = nbFilledBytes+max_allowed;
-            nbAvailableBytes = max_allowed;
-            ec_enc_shrink(enc, nbCompressedBytes);
-         }
-      }
-   }
-   total_bits = nbCompressedBytes*8;
-   effEnd = st->end;
-   if (effEnd > st->mode->effEBands)
-      effEnd = st->mode->effEBands;
-   ALLOC(in, CC*(N+st->overlap), celt_sig);
-   /* Find pitch period and gain */
-   {
-      VARDECL(celt_sig, _pre);
-      celt_sig *pre[2];
-      SAVE_STACK;
-      ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
-      pre[0] = _pre;
-      pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
-      silence = 1;
-      c=0; do {
-         int count = 0;
-         const opus_val16 * OPUS_RESTRICT pcmp = pcm+c;
-         celt_sig * OPUS_RESTRICT inp = in+c*(N+st->overlap)+st->overlap;
-         for (i=0;i<N;i++)
-         {
-            celt_sig x, tmp;
-            x = SCALEIN(*pcmp);
-#ifndef FIXED_POINT
-            if (!(x==x))
-               x = 0;
-            if (st->clip)
-               x = MAX32(-65536.f, MIN32(65536.f,x));
-#endif
-            if (++count==st->upsample)
-            {
-               count=0;
-               pcmp+=CC;
-            } else {
-               x = 0;
-            }
-            /* Apply pre-emphasis */
-            tmp = MULT16_16(st->mode->preemph[2], x);
-            *inp = tmp + st->preemph_memE[c];
-            st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
-                                   - MULT16_32_Q15(st->mode->preemph[0], tmp);
-            silence = silence && *inp == 0;
-            inp++;
-         }
-         OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
-         OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
-      } while (++c<CC);
-#ifdef FUZZING
-      if ((rand()&0x3F)==0)
-         silence = 1;
-#endif
-      if (tell==1)
-         ec_enc_bit_logp(enc, silence, 15);
-      else
-         silence=0;
-      if (silence)
-      {
-         /*In VBR mode there is no need to send more than the minimum. */
-         if (vbr_rate>0)
-         {
-            effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
-            total_bits=nbCompressedBytes*8;
-            nbAvailableBytes=2;
-            ec_enc_shrink(enc, nbCompressedBytes);
-         }
-         /* Pretend we've filled all the remaining bits with zeros
-            (that's what the initialiser did anyway) */
-         tell = nbCompressedBytes*8;
-         enc->nbits_total+=tell-ec_tell(enc);
-      }
-      if (nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5)
-      {
-         VARDECL(opus_val16, pitch_buf);
-         ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
-         pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
-         pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
-         pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
-         gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
-               N, &pitch_index, st->prefilter_period, st->prefilter_gain);
-         if (pitch_index > COMBFILTER_MAXPERIOD-2)
-            pitch_index = COMBFILTER_MAXPERIOD-2;
-         gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
-         if (st->loss_rate>2)
-            gain1 = HALF32(gain1);
-         if (st->loss_rate>4)
-            gain1 = HALF32(gain1);
-         if (st->loss_rate>8)
-            gain1 = 0;
-         prefilter_tapset = st->tapset_decision;
-      } else {
-         gain1 = 0;
-      }
-      /* Gain threshold for enabling the prefilter/postfilter */
-      pf_threshold = QCONST16(.2f,15);
-      /* Adjusting the threshold based on rate and continuity */
-      if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
-         pf_threshold += QCONST16(.2f,15);
-      if (nbAvailableBytes<25)
-         pf_threshold += QCONST16(.1f,15);
-      if (nbAvailableBytes<35)
-         pf_threshold += QCONST16(.1f,15);
-      if (st->prefilter_gain > QCONST16(.4f,15))
-         pf_threshold -= QCONST16(.1f,15);
-      if (st->prefilter_gain > QCONST16(.55f,15))
-         pf_threshold -= QCONST16(.1f,15);
-      /* Hard threshold at 0.2 */
-      pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
-      if (gain1<pf_threshold)
-      {
-         if(st->start==0 && tell+16<=total_bits)
-            ec_enc_bit_logp(enc, 0, 1);
-         gain1 = 0;
-         pf_on = 0;
-      } else {
-         /*This block is not gated by a total bits check only because
-           of the nbAvailableBytes check above.*/
-         int qg;
-         int octave;
-         if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
-            gain1=st->prefilter_gain;
-#ifdef FIXED_POINT
-         qg = ((gain1+1536)>>10)/3-1;
-#else
-         qg = (int)floor(.5f+gain1*32/3)-1;
-#endif
-         qg = IMAX(0, IMIN(7, qg));
-         ec_enc_bit_logp(enc, 1, 1);
-         pitch_index += 1;
-         octave = EC_ILOG(pitch_index)-5;
-         ec_enc_uint(enc, octave, 6);
-         ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
-         pitch_index -= 1;
-         ec_enc_bits(enc, qg, 3);
-         if (ec_tell(enc)+2<=total_bits)
-            ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
-         else
-           prefilter_tapset = 0;
-         gain1 = QCONST16(0.09375f,15)*(qg+1);
-         pf_on = 1;
-      }
-      /*printf("%d %f\n", pitch_index, gain1);*/
-      c=0; do {
-         int offset = st->mode->shortMdctSize-st->mode->overlap;
-         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
-         OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap);
-         if (offset)
-            comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD,
-                  st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
-                  st->prefilter_tapset, st->prefilter_tapset, NULL, 0);
-         comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
-               st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
-               st->prefilter_tapset, prefilter_tapset, st->mode->window, st->mode->overlap);
-         OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap);
-         if (N>COMBFILTER_MAXPERIOD)
-         {
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
-         } else {
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
-            OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
-         }
-      } while (++c<CC);
-      RESTORE_STACK;
-   }
-   isTransient = 0;
-   shortBlocks = 0;
-   if (LM>0 && ec_tell(enc)+3<=total_bits)
-   {
-      if (st->complexity > 1)
-      {
-         isTransient = transient_analysis(in, N+st->overlap, CC,
-                  st->overlap);
-         if (isTransient)
-            shortBlocks = M;
-      }
-      ec_enc_bit_logp(enc, isTransient, 3);
-   }
-   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
-   ALLOC(bandE,st->mode->nbEBands*CC, celt_ener);
-   ALLOC(bandLogE,st->mode->nbEBands*CC, opus_val16);
-   /* Compute MDCTs */
-   compute_mdcts(st->mode, shortBlocks, in, freq, CC, LM);
-   if (CC==2&&C==1)
-   {
-      for (i=0;i<N;i++)
-         freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
-   }
-   if (st->upsample != 1)
-   {
-      c=0; do
-      {
-         int bound = N/st->upsample;
-         for (i=0;i<bound;i++)
-            freq[c*N+i] *= st->upsample;
-         for (;i<N;i++)
-            freq[c*N+i] = 0;
-      } while (++c<C);
-   }
-   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
-   compute_band_energies(st->mode, freq, bandE, effEnd, C, M);
-   amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C);
-   /* Band normalisation */
-   normalise_bands(st->mode, freq, X, bandE, effEnd, C, M);
-   ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum);
-   for (i=effEnd;i<st->end;i++)
-      tf_res[i] = tf_res[effEnd-1];
-   ALLOC(error, C*st->mode->nbEBands, opus_val16);
-   quant_coarse_energy(st->mode, st->start, st->end, effEnd, bandLogE,
-         oldBandE, total_bits, error, enc,
-         C, LM, nbAvailableBytes, st->force_intra,
-         &st->delayedIntra, st->complexity >= 4, st->loss_rate);
-   tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
-   st->spread_decision = SPREAD_NORMAL;
-   if (ec_tell(enc)+4<=total_bits)
-   {
-      if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
-      {
-         if (st->complexity == 0)
-            st->spread_decision = SPREAD_NONE;
-      } else {
-         st->spread_decision = spreading_decision(st->mode, X,
-               &st->tonal_average, st->spread_decision, &st->hf_average,
-               &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
-      }
-      ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
-   }
-   ALLOC(cap, st->mode->nbEBands, int);
-   ALLOC(offsets, st->mode->nbEBands, int);
-   init_caps(st->mode,cap,LM,C);
-   for (i=0;i<st->mode->nbEBands;i++)
-      offsets[i] = 0;
-   /* Dynamic allocation code */
-   /* Make sure that dynamic allocation can't make us bust the budget */
-   if (effectiveBytes > 50 && LM>=1)
-   {
-      int t1, t2;
-      if (LM <= 1)
-      {
-         t1 = 3;
-         t2 = 5;
-      } else {
-         t1 = 2;
-         t2 = 4;
-      }
-      for (i=st->start+1;i<st->end-1;i++)
-      {
-         opus_val32 d2;
-         d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1];
-         if (C==2)
-            d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]-
-                  bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]);
-#ifdef FUZZING
-         if((rand()&0xF)==0)
-         {
-            offsets[i] += 1;
-            if((rand()&0x3)==0)
-               offsets[i] += 1+(rand()&0x3);
-         }
-#else
-         if (d2 > SHL16(t1,DB_SHIFT))
-            offsets[i] += 1;
-         if (d2 > SHL16(t2,DB_SHIFT))
-            offsets[i] += 1;
-#endif
-      }
-   }
-   dynalloc_logp = 6;
-   total_bits<<=BITRES;
-   total_boost = 0;
-   tell = ec_tell_frac(enc);
-   for (i=st->start;i<st->end;i++)
-   {
-      int width, quanta;
-      int dynalloc_loop_logp;
-      int boost;
-      int j;
-      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-      /* quanta is 6 bits, but no more than 1 bit/sample
-         and no less than 1/8 bit/sample */
-      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-      dynalloc_loop_logp = dynalloc_logp;
-      boost = 0;
-      for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
-            && boost < cap[i]; j++)
-      {
-         int flag;
-         flag = j<offsets[i];
-         ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
-         tell = ec_tell_frac(enc);
-         if (!flag)
-            break;
-         boost += quanta;
-         total_boost += quanta;
-         dynalloc_loop_logp = 1;
-      }
-      /* Making dynalloc more likely */
-      if (j)
-         dynalloc_logp = IMAX(2, dynalloc_logp-1);
-      offsets[i] = boost;
-   }
-   alloc_trim = 5;
-   if (tell+(6<<BITRES) <= total_bits - total_boost)
-   {
-      alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
-            st->end, LM, C, N);
-      ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
-      tell = ec_tell_frac(enc);
-   }
-   /* Variable bitrate */
-   if (vbr_rate>0)
-   {
-     opus_val16 alpha;
-     opus_int32 delta;
-     /* The target rate in 8th bits per frame */
-     opus_int32 target;
-     opus_int32 min_allowed;
-     int lm_diff = st->mode->maxLM - LM;
-     /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
-        The CELT allocator will just not be able to use more than that anyway. */
-     nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
-     target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES);
-     /* Shortblocks get a large boost in bitrate, but since they
-        are uncommon long blocks are not greatly affected */
-     if (shortBlocks || tf_sum < -2*(st->end-st->start))
-        target = 7*target/4;
-     else if (tf_sum < -(st->end-st->start))
-        target = 3*target/2;
-     else if (M > 1)
-        target-=(target+14)/28;
-     /* The current offset is removed from the target and the space used
-        so far is added*/
-     target=target+tell;
-     /* In VBR mode the frame size must not be reduced so much that it would
-         result in the encoder running out of bits.
-        The margin of 2 bytes ensures that none of the bust-prevention logic
-         in the decoder will have triggered so far. */
-     min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes;
-     nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
-     nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
-     nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes;
-     /* By how much did we "miss" the target on that frame */
-     delta = target - vbr_rate;
-     target=nbAvailableBytes<<(BITRES+3);
-     /*If the frame is silent we don't adjust our drift, otherwise
-       the encoder will shoot to very high rates after hitting a
-       span of silence, but we do allow the bitres to refill.
-       This means that we'll undershoot our target in CVBR/VBR modes
-       on files with lots of silence. */
-     if(silence)
-     {
-       nbAvailableBytes = 2;
-       target = 2*8<<BITRES;
-       delta = 0;
-     }
-     if (st->vbr_count < 970)
-     {
-        st->vbr_count++;
-        alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
-     } else
-        alpha = QCONST16(.001f,15);
-     /* How many bits have we used in excess of what we're allowed */
-     if (st->constrained_vbr)
-        st->vbr_reservoir += target - vbr_rate;
-     /*printf ("%d\n", st->vbr_reservoir);*/
-     /* Compute the offset we need to apply in order to reach the target */
-     st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
-     st->vbr_offset = -st->vbr_drift;
-     /*printf ("%d\n", st->vbr_drift);*/
-     if (st->constrained_vbr && st->vbr_reservoir < 0)
-     {
-        /* We're under the min value -- increase rate */
-        int adjust = (-st->vbr_reservoir)/(8<<BITRES);
-        /* Unless we're just coding silence */
-        nbAvailableBytes += silence?0:adjust;
-        st->vbr_reservoir = 0;
-        /*printf ("+%d\n", adjust);*/
-     }
-     nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
-     /* This moves the raw bits to take into account the new compressed size */
-     ec_enc_shrink(enc, nbCompressedBytes);
-   }
-   if (C==2)
-   {
-      int effectiveRate;
-      /* Always use MS for 2.5 ms frames until we can do a better analysis */
-      if (LM!=0)
-         dual_stereo = stereo_analysis(st->mode, X, LM, N);
-      /* Account for coarse energy */
-      effectiveRate = (8*effectiveBytes - 80)>>LM;
-      /* effectiveRate in kb/s */
-      effectiveRate = 2*effectiveRate/5;
-      if (effectiveRate<35)
-         intensity = 8;
-      else if (effectiveRate<50)
-         intensity = 12;
-      else if (effectiveRate<68)
-         intensity = 16;
-      else if (effectiveRate<84)
-         intensity = 18;
-      else if (effectiveRate<102)
-         intensity = 19;
-      else if (effectiveRate<130)
-         intensity = 20;
-      else
-         intensity = 100;
-      intensity = IMIN(st->end,IMAX(st->start, intensity));
-   }
-   /* Bit allocation */
-   ALLOC(fine_quant, st->mode->nbEBands, int);
-   ALLOC(pulses, st->mode->nbEBands, int);
-   ALLOC(fine_priority, st->mode->nbEBands, int);
-   /* bits =           packet size                    - where we are - safety*/
-   bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
-   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
-   bits -= anti_collapse_rsv;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
-         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
-         fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
-   st->lastCodedBands = codedBands;
-   quant_fine_energy(st->mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
-#ifdef MEASURE_NORM_MSE
-   float X0[3000];
-   float bandE0[60];
-   c=0; do
-      for (i=0;i<N;i++)
-         X0[i+c*N] = X[i+c*N];
-   while (++c<C);
-   for (i=0;i<C*st->mode->nbEBands;i++)
-      bandE0[i] = bandE[i];
-#endif
-   /* Residual quantisation */
-   ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
-   quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res,
-         nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
-   if (anti_collapse_rsv > 0)
-   {
-      anti_collapse_on = st->consec_transient<2;
-#ifdef FUZZING
-      anti_collapse_on = rand()&0x1;
-#endif
-      ec_enc_bits(enc, anti_collapse_on, 1);
-   }
-   quant_energy_finalise(st->mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
-   if (silence)
-   {
-      for (i=0;i<C*st->mode->nbEBands;i++)
-         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
-   }
-#ifdef RESYNTH
-   /* Re-synthesis of the coded audio if required */
-   {
-      celt_sig *out_mem[2];
-      celt_sig *overlap_mem[2];
-      log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-      if (silence)
-      {
-         for (i=0;i<C*st->mode->nbEBands;i++)
-            bandE[i] = 0;
-      }
-#ifdef MEASURE_NORM_MSE
-      measure_norm_mse(st->mode, X, X0, bandE, bandE0, M, N, C);
-#endif
-      if (anti_collapse_on)
-      {
-         anti_collapse(st->mode, X, collapse_masks, LM, C, N,
-               st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-      }
-      /* Synthesis */
-      denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
-      OPUS_MOVE(st->syn_mem[0], st->syn_mem[0]+N, MAX_PERIOD);
-      if (CC==2)
-         OPUS_MOVE(st->syn_mem[1], st->syn_mem[1]+N, MAX_PERIOD);
-      c=0; do
-         for (i=0;i<M*st->mode->eBands[st->start];i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      c=0; do
-         for (i=M*st->mode->eBands[st->end];i<N;i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      if (CC==2&&C==1)
-      {
-         for (i=0;i<N;i++)
-            freq[N+i] = freq[i];
-      }
-      out_mem[0] = st->syn_mem[0]+MAX_PERIOD;
-      if (CC==2)
-         out_mem[1] = st->syn_mem[1]+MAX_PERIOD;
-      overlap_mem[0] = prefilter_mem+CC*COMBFILTER_MAXPERIOD;
-      if (CC==2)
-         overlap_mem[1] = overlap_mem[0] + st->overlap;
-      compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, CC, LM);
-      c=0; do {
-         st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
-         st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
-         comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, st->mode->shortMdctSize,
-               st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
-               st->mode->window, st->overlap);
-         if (LM!=0)
-            comb_filter(out_mem[c]+st->mode->shortMdctSize, out_mem[c]+st->mode->shortMdctSize, st->prefilter_period, pitch_index, N-st->mode->shortMdctSize,
-                  st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
-                  st->mode->window, st->mode->overlap);
-      } while (++c<CC);
-      deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, st->mode->preemph, st->preemph_memD);
-      st->prefilter_period_old = st->prefilter_period;
-      st->prefilter_gain_old = st->prefilter_gain;
-      st->prefilter_tapset_old = st->prefilter_tapset;
-   }
-#endif
-   st->prefilter_period = pitch_index;
-   st->prefilter_gain = gain1;
-   st->prefilter_tapset = prefilter_tapset;
-#ifdef RESYNTH
-   if (LM!=0)
-   {
-      st->prefilter_period_old = st->prefilter_period;
-      st->prefilter_gain_old = st->prefilter_gain;
-      st->prefilter_tapset_old = st->prefilter_tapset;
-   }
-#endif
-   if (CC==2&&C==1) {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
-   }
-   if (!isTransient)
-   {
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
-   } else {
-      for (i=0;i<CC*st->mode->nbEBands;i++)
-         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
-   }
-   /* In case start or end were to change */
-   c=0; do
-   {
-      for (i=0;i<st->start;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      for (i=st->end;i<st->mode->nbEBands;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-   } while (++c<CC);
-   if (isTransient)
-      st->consec_transient++;
-   else
-      st->consec_transient=0;
-   st->rng = enc->rng;
-   /* If there's any room left (can only happen for very high rates),
-      it's already filled with zeros */
-   ec_enc_done(enc);
-#ifdef CUSTOM_MODES
-   if (st->signalling)
-      nbCompressedBytes++;
-#endif
-   RESTORE_STACK;
-   if (ec_get_error(enc))
-      return OPUS_INTERNAL_ERROR;
-   else
-      return nbCompressedBytes;
-}
-#ifdef CUSTOM_MODES
-#ifdef FIXED_POINT
-int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
-}
-#ifndef DISABLE_FLOAT_API
-int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   int j, ret, C, N;
-   VARDECL(opus_int16, in);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(in, C*N, opus_int16);
-   for (j=0;j<C*N;j++)
-     in[j] = FLOAT2INT16(pcm[j]);
-   ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
-#ifdef RESYNTH
-   for (j=0;j<C*N;j++)
-      ((float*)pcm)[j]=in[j]*(1.f/32768.f);
-#endif
-   RESTORE_STACK;
-   return ret;
-}
-#endif /* DISABLE_FLOAT_API */
-#else
-int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   int j, ret, C, N;
-   VARDECL(celt_sig, in);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C=st->channels;
-   N=frame_size;
-   ALLOC(in, C*N, celt_sig);
-   for (j=0;j<C*N;j++) {
-     in[j] = SCALEOUT(pcm[j]);
-   }
-   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
-#ifdef RESYNTH
-   for (j=0;j<C*N;j++)
-      ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
-#endif
-   RESTORE_STACK;
-   return ret;
-}
-int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
-{
-   return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
-}
-#endif
-#endif /* CUSTOM_MODES */
-int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
-{
-   va_list ap;
-   va_start(ap, request);
-   switch (request)
-   {
-      case OPUS_SET_COMPLEXITY_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>10)
-            goto bad_arg;
-         st->complexity = value;
-      }
-      break;
-      case CELT_SET_START_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<0 || value>=st->mode->nbEBands)
-            goto bad_arg;
-         st->start = value;
-      }
-      break;
-      case CELT_SET_END_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>st->mode->nbEBands)
-            goto bad_arg;
-         st->end = value;
-      }
-      break;
-      case CELT_SET_PREDICTION_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>2)
-            goto bad_arg;
-         st->disable_pf = value<=1;
-         st->force_intra = value==0;
-      }
-      break;
-      case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
-      {
-         int value = va_arg(ap, opus_int32);
-         if (value<0 || value>100)
-            goto bad_arg;
-         st->loss_rate = value;
-      }
-      break;
-      case OPUS_SET_VBR_CONSTRAINT_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->constrained_vbr = value;
-      }
-      break;
-      case OPUS_SET_VBR_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->vbr = value;
-      }
-      break;
-      case OPUS_SET_BITRATE_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<=500 && value!=OPUS_BITRATE_MAX)
-            goto bad_arg;
-         value = IMIN(value, 260000*st->channels);
-         st->bitrate = value;
-      }
-      break;
-      case CELT_SET_CHANNELS_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>2)
-            goto bad_arg;
-         st->stream_channels = value;
-      }
-      break;
-      case OPUS_SET_LSB_DEPTH_REQUEST:
-      {
-          opus_int32 value = va_arg(ap, opus_int32);
-          if (value<8 || value>24)
-             goto bad_arg;
-          st->lsb_depth=value;
-      }
-      break;
-      case OPUS_GET_LSB_DEPTH_REQUEST:
-      {
-          opus_int32 *value = va_arg(ap, opus_int32*);
-          *value=st->lsb_depth;
-      }
-      break;
-      case OPUS_RESET_STATE:
-      {
-         int i;
-         opus_val16 *oldBandE, *oldLogE, *oldLogE2;
-         oldBandE = (opus_val16*)(st->in_mem+st->channels*(2*st->overlap+COMBFILTER_MAXPERIOD));
-         oldLogE = oldBandE + st->channels*st->mode->nbEBands;
-         oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
-         OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
-               opus_custom_encoder_get_size(st->mode, st->channels)-
-               ((char*)&st->ENCODER_RESET_START - (char*)st));
-         for (i=0;i<st->channels*st->mode->nbEBands;i++)
-            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
-         st->vbr_offset = 0;
-         st->delayedIntra = 1;
-         st->spread_decision = SPREAD_NORMAL;
-         st->tonal_average = 256;
-         st->hf_average = 0;
-         st->tapset_decision = 0;
-      }
-      break;
-#ifdef CUSTOM_MODES
-      case CELT_SET_INPUT_CLIPPING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->clip = value;
-      }
-      break;
-#endif
-      case CELT_SET_SIGNALLING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->signalling = value;
-      }
-      break;
-      case CELT_GET_MODE_REQUEST:
-      {
-         const CELTMode ** value = va_arg(ap, const CELTMode**);
-         if (value==0)
-            goto bad_arg;
-         *value=st->mode;
-      }
-      break;
-      case OPUS_GET_FINAL_RANGE_REQUEST:
-      {
-         opus_uint32 * value = va_arg(ap, opus_uint32 *);
-         if (value==0)
-            goto bad_arg;
-         *value=st->rng;
-      }
-      break;
-      default:
-         goto bad_request;
-   }
-   va_end(ap);
-   return OPUS_OK;
-bad_arg:
-   va_end(ap);
-   return OPUS_BAD_ARG;
-bad_request:
-   va_end(ap);
-   return OPUS_UNIMPLEMENTED;
-}
-#endif
-/**********************************************************************/
-/*                                                                    */
-/*                             DECODER                                */
-/*                                                                    */
-/**********************************************************************/
-#define DECODE_BUFFER_SIZE 2048
-/** Decoder state
- @brief Decoder state
- */
-struct OpusCustomDecoder {
-   const OpusCustomMode *mode;
-   int overlap;
-   int channels;
-   int stream_channels;
-   int downsample;
-   int start, end;
-   int signalling;
-   /* Everything beyond this point gets cleared on a reset */
-#define DECODER_RESET_START rng
-   opus_uint32 rng;
-   int error;
-   int last_pitch_index;
-   int loss_count;
-   int postfilter_period;
-   int postfilter_period_old;
-   opus_val16 postfilter_gain;
-   opus_val16 postfilter_gain_old;
-   int postfilter_tapset;
-   int postfilter_tapset_old;
-   celt_sig preemph_memD[2];
-   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
-   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
-   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
-   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
-   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
-   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
-};
-int celt_decoder_get_size(int channels)
-{
-   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
-   return opus_custom_decoder_get_size(mode, channels);
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
-{
-   int size = sizeof(struct CELTDecoder)
-            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
-            + channels*LPC_ORDER*sizeof(opus_val16)
-            + 4*2*mode->nbEBands*sizeof(opus_val16);
-   return size;
-}
-#ifdef CUSTOM_MODES
-CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
-{
-   int ret;
-   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
-   ret = opus_custom_decoder_init(st, mode, channels);
-   if (ret != OPUS_OK)
-   {
-      opus_custom_decoder_destroy(st);
-      st = NULL;
-   }
-   if (error)
-      *error = ret;
-   return st;
-}
-#endif /* CUSTOM_MODES */
-int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
-{
-   int ret;
-   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
-   if (ret != OPUS_OK)
-      return ret;
-   st->downsample = resampling_factor(sampling_rate);
-   if (st->downsample==0)
-      return OPUS_BAD_ARG;
-   else
-      return OPUS_OK;
-}
-OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
-{
-   if (channels < 0 || channels > 2)
-      return OPUS_BAD_ARG;
-   if (st==NULL)
-      return OPUS_ALLOC_FAIL;
-   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
-   st->mode = mode;
-   st->overlap = mode->overlap;
-   st->stream_channels = st->channels = channels;
-   st->downsample = 1;
-   st->start = 0;
-   st->end = st->mode->effEBands;
-   st->signalling = 1;
-   st->loss_count = 0;
-   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
-   return OPUS_OK;
-}
-#ifdef CUSTOM_MODES
-void opus_custom_decoder_destroy(CELTDecoder *st)
-{
-   opus_free(st);
-}
-#endif /* CUSTOM_MODES */
-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
-{
-   int c;
-   int pitch_index;
-   int overlap = st->mode->overlap;
-   opus_val16 fade = Q15ONE;
-   int i, len;
-   const int C = st->channels;
-   int offset;
-   celt_sig *out_mem[2];
-   celt_sig *decode_mem[2];
-   celt_sig *overlap_mem[2];
-   opus_val16 *lpc;
-   opus_val32 *out_syn[2];
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
-   SAVE_STACK;
-   c=0; do {
-      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
-      overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   } while (++c<C);
-   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C);
-   oldBandE = lpc+C*LPC_ORDER;
-   oldLogE = oldBandE + 2*st->mode->nbEBands;
-   oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-   backgroundLogE = oldLogE2  + 2*st->mode->nbEBands;
-   out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-   if (C==2)
-      out_syn[1] = out_mem[1]+MAX_PERIOD-N;
-   len = N+st->mode->overlap;
-   if (st->loss_count >= 5 || st->start!=0)
-   {
-      /* Noise-based PLC/CNG */
-      VARDECL(celt_sig, freq);
-      VARDECL(celt_norm, X);
-      VARDECL(celt_ener, bandE);
-      opus_uint32 seed;
-      int effEnd;
-      effEnd = st->end;
-      if (effEnd > st->mode->effEBands)
-         effEnd = st->mode->effEBands;
-      ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */
-      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
-      ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
-      if (st->loss_count >= 5)
-         log2Amp(st->mode, st->start, st->end, bandE, backgroundLogE, C);
-      else {
-         /* Energy decay */
-         opus_val16 decay = st->loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
-         c=0; do
-         {
-            for (i=st->start;i<st->end;i++)
-               oldBandE[c*st->mode->nbEBands+i] -= decay;
-         } while (++c<C);
-         log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-      }
-      seed = st->rng;
-      for (c=0;c<C;c++)
-      {
-         for (i=0;i<(st->mode->eBands[st->start]<<LM);i++)
-            X[c*N+i] = 0;
-         for (i=st->start;i<st->mode->effEBands;i++)
-         {
-            int j;
-            int boffs;
-            int blen;
-            boffs = N*c+(st->mode->eBands[i]<<LM);
-            blen = (st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-            for (j=0;j<blen;j++)
-            {
-               seed = celt_lcg_rand(seed);
-               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
-            }
-            renormalise_vector(X+boffs, blen, Q15ONE);
-         }
-         for (i=(st->mode->eBands[st->end]<<LM);i<N;i++)
-            X[c*N+i] = 0;
-      }
-      st->rng = seed;
-      denormalise_bands(st->mode, X, freq, bandE, st->mode->effEBands, C, 1<<LM);
-      c=0; do
-         for (i=0;i<st->mode->eBands[st->start]<<LM;i++)
-            freq[c*N+i] = 0;
-      while (++c<C);
-      c=0; do {
-         int bound = st->mode->eBands[effEnd]<<LM;
-         if (st->downsample!=1)
-            bound = IMIN(bound, N/st->downsample);
-         for (i=bound;i<N;i++)
-            freq[c*N+i] = 0;
-      } while (++c<C);
-      compute_inv_mdcts(st->mode, 0, freq, out_syn, overlap_mem, C, LM);
-   } else {
-      /* Pitch-based PLC */
-      if (st->loss_count == 0)
-      {
-         opus_val16 pitch_buf[DECODE_BUFFER_SIZE>>1];
-         /* Corresponds to a min pitch of 67 Hz. It's possible to save CPU in this
-         search by using only part of the decode buffer */
-         int poffset = 720;
-         pitch_downsample(decode_mem, pitch_buf, DECODE_BUFFER_SIZE, C);
-         /* Max pitch is 100 samples (480 Hz) */
-         pitch_search(pitch_buf+((poffset)>>1), pitch_buf, DECODE_BUFFER_SIZE-poffset,
-               poffset-100, &pitch_index);
-         pitch_index = poffset-pitch_index;
-         st->last_pitch_index = pitch_index;
-      } else {
-         pitch_index = st->last_pitch_index;
-         fade = QCONST16(.8f,15);
-      }
-      c=0; do {
-         VARDECL(opus_val32, e);
-         opus_val16 exc[MAX_PERIOD];
-         opus_val32 ac[LPC_ORDER+1];
-         opus_val16 decay = 1;
-         opus_val32 S1=0;
-         opus_val16 mem[LPC_ORDER]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-         ALLOC(e, MAX_PERIOD+2*st->mode->overlap, opus_val32);
-         offset = MAX_PERIOD-pitch_index;
-         for (i=0;i<MAX_PERIOD;i++)
-            exc[i] = ROUND16(out_mem[c][i], SIG_SHIFT);
-         if (st->loss_count == 0)
-         {
-            _celt_autocorr(exc, ac, st->mode->window, st->mode->overlap,
-                  LPC_ORDER, MAX_PERIOD);
-            /* Noise floor -40 dB */
-#ifdef FIXED_POINT
-            ac[0] += SHR32(ac[0],13);
-#else
-            ac[0] *= 1.0001f;
-#endif
-            /* Lag windowing */
-            for (i=1;i<=LPC_ORDER;i++)
-            {
-               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
-#ifdef FIXED_POINT
-               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
-#else
-               ac[i] -= ac[i]*(.008f*i)*(.008f*i);
-#endif
-            }
-            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
-         }
-         for (i=0;i<LPC_ORDER;i++)
-            mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
-         celt_fir(exc, lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem);
-         /*for (i=0;i<MAX_PERIOD;i++)printf("%d ", exc[i]); printf("\n");*/
-         /* Check if the waveform is decaying (and if so how fast) */
-         {
-            opus_val32 E1=1, E2=1;
-            int period;
-            if (pitch_index <= MAX_PERIOD/2)
-               period = pitch_index;
-            else
-               period = MAX_PERIOD/2;
-            for (i=0;i<period;i++)
-            {
-               E1 += SHR32(MULT16_16(exc[MAX_PERIOD-period+i],exc[MAX_PERIOD-period+i]),8);
-               E2 += SHR32(MULT16_16(exc[MAX_PERIOD-2*period+i],exc[MAX_PERIOD-2*period+i]),8);
-            }
-            if (E1 > E2)
-               E1 = E2;
-            decay = celt_sqrt(frac_div32(SHR32(E1,1),E2));
-         }
-         /* Copy excitation, taking decay into account */
-         for (i=0;i<len+st->mode->overlap;i++)
-         {
-            opus_val16 tmp;
-            if (offset+i >= MAX_PERIOD)
-            {
-               offset -= pitch_index;
-               decay = MULT16_16_Q15(decay, decay);
-            }
-            e[i] = SHL32(EXTEND32(MULT16_16_Q15(decay, exc[offset+i])), SIG_SHIFT);
-            tmp = ROUND16(out_mem[c][offset+i],SIG_SHIFT);
-            S1 += SHR32(MULT16_16(tmp,tmp),8);
-         }
-         for (i=0;i<LPC_ORDER;i++)
-            mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT);
-         for (i=0;i<len+st->mode->overlap;i++)
-            e[i] = MULT16_32_Q15(fade, e[i]);
-         celt_iir(e, lpc+c*LPC_ORDER, e, len+st->mode->overlap, LPC_ORDER, mem);
-         {
-            opus_val32 S2=0;
-            for (i=0;i<len+overlap;i++)
-            {
-               opus_val16 tmp = ROUND16(e[i],SIG_SHIFT);
-               S2 += SHR32(MULT16_16(tmp,tmp),8);
-            }
-            /* This checks for an "explosion" in the synthesis */
-#ifdef FIXED_POINT
-            if (!(S1 > SHR32(S2,2)))
-#else
-               /* Float test is written this way to catch NaNs at the same time */
-               if (!(S1 > 0.2f*S2))
-#endif
-               {
-                  for (i=0;i<len+overlap;i++)
-                     e[i] = 0;
-               } else if (S1 < S2)
-               {
-                  opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
-                  for (i=0;i<len+overlap;i++)
-                     e[i] = MULT16_32_Q15(ratio, e[i]);
-               }
-         }
-         /* Apply post-filter to the MDCT overlap of the previous frame */
-         comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
-               st->postfilter_gain, st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
-               NULL, 0);
-         for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++)
-            out_mem[c][i] = out_mem[c][N+i];
-         /* Apply TDAC to the concealed audio so that it blends with the
-         previous and next frames */
-         for (i=0;i<overlap/2;i++)
-         {
-            opus_val32 tmp;
-            tmp = MULT16_32_Q15(st->mode->window[i],           e[N+overlap-1-i]) +
-                  MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i          ]);
-            out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp);
-            out_mem[c][MAX_PERIOD+overlap-i-1] = MULT16_32_Q15(st->mode->window[i], tmp);
-         }
-         for (i=0;i<N;i++)
-            out_mem[c][MAX_PERIOD-N+i] = e[i];
-         /* Apply pre-filter to the MDCT overlap for the next frame (post-filter will be applied then) */
-         comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap,
-               -st->postfilter_gain, -st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset,
-               NULL, 0);
-         for (i=0;i<overlap;i++)
-            out_mem[c][MAX_PERIOD+i] = e[i];
-      } while (++c<C);
-   }
-   deemphasis(out_syn, pcm, N, C, /*st->downsample,*/ st->mode->preemph, st->preemph_memD);
-   st->loss_count++;
-   RESTORE_STACK;
-}
-#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */
-static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */
-static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
-{
-   int c, i, N;
-   int spread_decision;
-   opus_int32 bits;
-   ec_dec _dec;
-   VARDECL(celt_sig, freq);
-   VARDECL(celt_norm, X);
-   VARDECL(celt_ener, bandE);
-   VARDECL(int, fine_quant);
-   VARDECL(int, pulses);
-   VARDECL(int, cap);
-   VARDECL(int, offsets);
-   VARDECL(int, fine_priority);
-   VARDECL(int, tf_res);
-   VARDECL(unsigned char, collapse_masks);
-   celt_sig *out_mem[2];
-   celt_sig *decode_mem[2];
-   celt_sig *overlap_mem[2];
-   celt_sig *out_syn[2];
-   opus_val16 *lpc;
-   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
-   int shortBlocks;
-   int isTransient;
-   int intra_ener;
-   const int CC = st->channels;
-   int LM, M;
-   int effEnd;
-   int codedBands;
-   int alloc_trim;
-   int postfilter_pitch;
-   opus_val16 postfilter_gain;
-   int intensity=0;
-   int dual_stereo=0;
-   opus_int32 total_bits;
-   opus_int32 balance;
-   opus_int32 tell;
-   int dynalloc_logp;
-   int postfilter_tapset;
-   int anti_collapse_rsv;
-   int anti_collapse_on=0;
-   int silence;
-   int C = st->stream_channels;
-   ALLOC_STACK;
-   frame_size *= st->downsample;
-   c=0; do {
-      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap);
-      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
-      overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE;
-   } while (++c<CC);
-   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*CC);
-   oldBandE = lpc+CC*LPC_ORDER;
-   oldLogE = oldBandE + 2*st->mode->nbEBands;
-   oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-   backgroundLogE = oldLogE2  + 2*st->mode->nbEBands;
-#ifdef CUSTOM_MODES
-   if (st->signalling && data!=NULL)
-   {
-      int data0=data[0];
-      /* Convert "standard mode" to Opus header */
-      if (st->mode->Fs==48000 && st->mode->shortMdctSize==120)
-      {
-         data0 = fromOpus(data0);
-         if (data0<0)
-            return OPUS_INVALID_PACKET;
-      }
-      st->end = IMAX(1, st->mode->effEBands-2*(data0>>5));
-      LM = (data0>>3)&0x3;
-      C = 1 + ((data0>>2)&0x1);
-      data++;
-      len--;
-      if (LM>st->mode->maxLM)
-         return OPUS_INVALID_PACKET;
-      if (frame_size < st->mode->shortMdctSize<<LM)
-         return OPUS_BUFFER_TOO_SMALL;
-      else
-         frame_size = st->mode->shortMdctSize<<LM;
-   } else {
-#else
-   {
-#endif
-      for (LM=0;LM<=st->mode->maxLM;LM++)
-         if (st->mode->shortMdctSize<<LM==frame_size)
-            break;
-      if (LM>st->mode->maxLM)
-         return OPUS_BAD_ARG;
-   }
-   M=1<<LM;
-   if (len<0 || len>1275 || pcm==NULL)
-      return OPUS_BAD_ARG;
-   N = M*st->mode->shortMdctSize;
-   effEnd = st->end;
-   if (effEnd > st->mode->effEBands)
-      effEnd = st->mode->effEBands;
-   /**< Interleaved signal MDCTs */
-   if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
-      freq = s_freq;
-   else
-      ALLOC(freq, IMAX(CC,C)*N, celt_sig);
-   /**< Interleaved normalised MDCTs */
-   if (FREQ_X_BUF_SIZE >= C*N)
-      X = s_X;
-   else
-      ALLOC(X, C*N, celt_norm);
-   ALLOC(bandE, st->mode->nbEBands*C, celt_ener);
-   c=0; do
-      for (i=0;i<M*st->mode->eBands[st->start];i++)
-         X[c*N+i] = 0;
-   while (++c<C);
-   c=0; do
-      for (i=M*st->mode->eBands[effEnd];i<N;i++)
-         X[c*N+i] = 0;
-   while (++c<C);
-   if (data == NULL || len<=1)
-   {
-      celt_decode_lost(st, pcm, N, LM);
-      RESTORE_STACK;
-      return frame_size/st->downsample;
-   }
-   if (dec == NULL)
-   {
-      ec_dec_init(&_dec,(unsigned char*)data,len);
-      dec = &_dec;
-   }
-   if (C==1)
-   {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[i]=MAX16(oldBandE[i],oldBandE[st->mode->nbEBands+i]);
-   }
-   total_bits = len*8;
-   tell = ec_tell(dec);
-   if (tell >= total_bits)
-      silence = 1;
-   else if (tell==1)
-      silence = ec_dec_bit_logp(dec, 15);
-   else
-      silence = 0;
-   if (silence)
-   {
-      /* Pretend we've read all the remaining bits */
-      tell = len*8;
-      dec->nbits_total+=tell-ec_tell(dec);
-   }
-   postfilter_gain = 0;
-   postfilter_pitch = 0;
-   postfilter_tapset = 0;
-   if (st->start==0 && tell+16 <= total_bits)
-   {
-      if(ec_dec_bit_logp(dec, 1))
-      {
-         int qg, octave;
-         octave = ec_dec_uint(dec, 6);
-         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
-         qg = ec_dec_bits(dec, 3);
-         if (ec_tell(dec)+2<=total_bits)
-            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
-         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
-      }
-      tell = ec_tell(dec);
-   }
-   if (LM > 0 && tell+3 <= total_bits)
-   {
-      isTransient = ec_dec_bit_logp(dec, 3);
-      tell = ec_tell(dec);
-   }
-   else
-      isTransient = 0;
-   if (isTransient)
-      shortBlocks = M;
-   else
-      shortBlocks = 0;
-   /* Decode the global flags (first symbols in the stream) */
-   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
-   /* Get band energies */
-   unquant_coarse_energy(st->mode, st->start, st->end, oldBandE,
-         intra_ener, dec, C, LM);
-   ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
-   tell = ec_tell(dec);
-   spread_decision = SPREAD_NORMAL;
-   if (tell+4 <= total_bits)
-      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
-   ALLOC(pulses, st->mode->nbEBands, int);
-   ALLOC(cap, st->mode->nbEBands, int);
-   ALLOC(offsets, st->mode->nbEBands, int);
-   ALLOC(fine_priority, st->mode->nbEBands, int);
-   init_caps(st->mode,cap,LM,C);
-   dynalloc_logp = 6;
-   total_bits<<=BITRES;
-   tell = ec_tell_frac(dec);
-   for (i=st->start;i<st->end;i++)
-   {
-      int width, quanta;
-      int dynalloc_loop_logp;
-      int boost;
-      width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
-      /* quanta is 6 bits, but no more than 1 bit/sample
-         and no less than 1/8 bit/sample */
-      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
-      dynalloc_loop_logp = dynalloc_logp;
-      boost = 0;
-      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
-      {
-         int flag;
-         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
-         tell = ec_tell_frac(dec);
-         if (!flag)
-            break;
-         boost += quanta;
-         total_bits -= quanta;
-         dynalloc_loop_logp = 1;
-      }
-      offsets[i] = boost;
-      /* Making dynalloc more likely */
-      if (boost>0)
-         dynalloc_logp = IMAX(2, dynalloc_logp-1);
-   }
-   ALLOC(fine_quant, st->mode->nbEBands, int);
-   alloc_trim = tell+(6<<BITRES) <= total_bits ?
-         ec_dec_icdf(dec, trim_icdf, 7) : 5;
-   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
-   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
-   bits -= anti_collapse_rsv;
-   codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
-         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
-         fine_quant, fine_priority, C, LM, dec, 0, 0);
-   unquant_fine_energy(st->mode, st->start, st->end, oldBandE, fine_quant, dec, C);
-   /* Decode fixed codebook */
-   ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
-   quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
-         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
-   if (anti_collapse_rsv > 0)
-   {
-      anti_collapse_on = ec_dec_bits(dec, 1);
-   }
-   unquant_energy_finalise(st->mode, st->start, st->end, oldBandE,
-         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
-   if (anti_collapse_on)
-      anti_collapse(st->mode, X, collapse_masks, LM, C, N,
-            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
-   log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C);
-   if (silence)
-   {
-      for (i=0;i<C*st->mode->nbEBands;i++)
-      {
-         bandE[i] = 0;
-         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
-      }
-   }
-   /* Synthesis */
-   denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M);
-   OPUS_MOVE(decode_mem[0], decode_mem[0]+N, DECODE_BUFFER_SIZE-N);
-   if (CC==2)
-      OPUS_MOVE(decode_mem[1], decode_mem[1]+N, DECODE_BUFFER_SIZE-N);
-   c=0; do
-      for (i=0;i<M*st->mode->eBands[st->start];i++)
-         freq[c*N+i] = 0;
-   while (++c<C);
-   c=0; do {
-      int bound = M*st->mode->eBands[effEnd];
-      if (st->downsample!=1)
-         bound = IMIN(bound, N/st->downsample);
-      for (i=bound;i<N;i++)
-         freq[c*N+i] = 0;
-   } while (++c<C);
-   out_syn[0] = out_mem[0]+MAX_PERIOD-N;
-   if (CC==2)
-      out_syn[1] = out_mem[1]+MAX_PERIOD-N;
-   if (CC==2&&C==1)
-   {
-      for (i=0;i<N;i++)
-         freq[N+i] = freq[i];
-   }
-   if (CC==1&&C==2)
-   {
-      for (i=0;i<N;i++)
-         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
-   }
-   /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, CC, LM);
-   c=0; do {
-      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
-      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
-      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, st->mode->shortMdctSize,
-            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
-            st->mode->window, st->overlap);
-      if (LM!=0)
-         comb_filter(out_syn[c]+st->mode->shortMdctSize, out_syn[c]+st->mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-st->mode->shortMdctSize,
-               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
-               st->mode->window, st->mode->overlap);
-   } while (++c<CC);
-   st->postfilter_period_old = st->postfilter_period;
-   st->postfilter_gain_old = st->postfilter_gain;
-   st->postfilter_tapset_old = st->postfilter_tapset;
-   st->postfilter_period = postfilter_pitch;
-   st->postfilter_gain = postfilter_gain;
-   st->postfilter_tapset = postfilter_tapset;
-   if (LM!=0)
-   {
-      st->postfilter_period_old = st->postfilter_period;
-      st->postfilter_gain_old = st->postfilter_gain;
-      st->postfilter_tapset_old = st->postfilter_tapset;
-   }
-   if (C==1) {
-      for (i=0;i<st->mode->nbEBands;i++)
-         oldBandE[st->mode->nbEBands+i]=oldBandE[i];
-   }
-   /* In case start or end were to change */
-   if (!isTransient)
-   {
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE2[i] = oldLogE[i];
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE[i] = oldBandE[i];
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
-   } else {
-      for (i=0;i<2*st->mode->nbEBands;i++)
-         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
-   }
-   c=0; do
-   {
-      for (i=0;i<st->start;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      for (i=st->end;i<st->mode->nbEBands;i++)
-      {
-         oldBandE[c*st->mode->nbEBands+i]=0;
-         oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
-      }
-   } while (++c<2);
-   st->rng = dec->rng;
-   deemphasis(out_syn, pcm, N, CC, /*st->downsample,*/ st->mode->preemph, st->preemph_memD);
-   st->loss_count = 0;
-   RESTORE_STACK;
-   if (ec_tell(dec) > 8*len)
-      return OPUS_INTERNAL_ERROR;
-   if(ec_get_error(dec))
-      st->error = 1;
-   return frame_size/st->downsample;
-}
-#ifdef CUSTOM_MODES
-#ifdef FIXED_POINT
-int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
-{
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
-}
-#ifndef DISABLE_FLOAT_API
-int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
-{
-   int j, ret, C, N;
-   VARDECL(opus_int16, out);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(out, C*N, opus_int16);
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
-   if (ret>0)
-      for (j=0;j<C*ret;j++)
-         pcm[j]=out[j]*(1.f/32768.f);
-   RESTORE_STACK;
-   return ret;
-}
-#endif /* DISABLE_FLOAT_API */
-#else
-int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
-{
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
-}
-int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
-{
-   int j, ret, C, N;
-   VARDECL(celt_sig, out);
-   ALLOC_STACK;
-   if (pcm==NULL)
-      return OPUS_BAD_ARG;
-   C = st->channels;
-   N = frame_size;
-   ALLOC(out, C*N, celt_sig);
-   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
-   if (ret>0)
-      for (j=0;j<C*ret;j++)
-         pcm[j] = FLOAT2INT16 (out[j]);
-   RESTORE_STACK;
-   return ret;
-}
-#endif
-#endif /* CUSTOM_MODES */
-int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
-{
-   va_list ap;
-   va_start(ap, request);
-   switch (request)
-   {
-      case CELT_SET_START_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<0 || value>=st->mode->nbEBands)
-            goto bad_arg;
-         st->start = value;
-      }
-      break;
-      case CELT_SET_END_BAND_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>st->mode->nbEBands)
-            goto bad_arg;
-         st->end = value;
-      }
-      break;
-      case CELT_SET_CHANNELS_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         if (value<1 || value>2)
-            goto bad_arg;
-         st->stream_channels = value;
-      }
-      break;
-      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value=st->error;
-         st->error = 0;
-      }
-      break;
-      case OPUS_GET_LOOKAHEAD_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value = st->overlap/st->downsample;
-      }
-      break;
-      case OPUS_RESET_STATE:
-      {
-         int i;
-         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
-         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
-         oldBandE = lpc+st->channels*LPC_ORDER;
-         oldLogE = oldBandE + 2*st->mode->nbEBands;
-         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
-         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
-               opus_custom_decoder_get_size(st->mode, st->channels)-
-               ((char*)&st->DECODER_RESET_START - (char*)st));
-         for (i=0;i<2*st->mode->nbEBands;i++)
-            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
-      }
-      break;
-      case OPUS_GET_PITCH_REQUEST:
-      {
-         opus_int32 *value = va_arg(ap, opus_int32*);
-         if (value==NULL)
-            goto bad_arg;
-         *value = st->postfilter_period;
-      }
-      break;
-#ifdef OPUS_BUILD
-      case CELT_GET_MODE_REQUEST:
-      {
-         const CELTMode ** value = va_arg(ap, const CELTMode**);
-         if (value==0)
-            goto bad_arg;
-         *value=st->mode;
-      }
-      break;
-      case CELT_SET_SIGNALLING_REQUEST:
-      {
-         opus_int32 value = va_arg(ap, opus_int32);
-         st->signalling = value;
-      }
-      break;
-      case OPUS_GET_FINAL_RANGE_REQUEST:
-      {
-         opus_uint32 * value = va_arg(ap, opus_uint32 *);
-         if (value==0)
-            goto bad_arg;
-         *value=st->rng;
-      }
-      break;
-#endif
-      default:
-         goto bad_request;
-   }
-   va_end(ap);
-   return OPUS_OK;
-bad_arg:
-   va_end(ap);
-   return OPUS_BAD_ARG;
-bad_request:
-      va_end(ap);
-  return OPUS_UNIMPLEMENTED;
-}
 const char *opus_strerror(int error)
@@ -2918,7 +212,7 @@ const char *opus_strerror(int error)
 const char *opus_get_version_string(void)
 {
-    return "libopus " OPUS_VERSION
+    return "libopus " PACKAGE_VERSION
 #ifdef FIXED_POINT
          "-fixed"
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.h b/lib/rbcodec/codecs/libopus/celt/celt.h
index 218cd883df..0911c72f72 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt.h
@@ -50,7 +50,19 @@ extern "C" {
 #define CELTDecoder OpusCustomDecoder
 #define CELTMode OpusCustomMode
-#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+typedef struct {
+   int valid;
+   opus_val16 tonality;
+   opus_val16 tonality_slope;
+   opus_val16 noisiness;
+   opus_val16 activity;
+   opus_val16 music_prob;
+   int        bandwidth;
+}AnalysisInfo;
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
 /* Encoder/decoder Requests */
@@ -81,12 +93,27 @@ extern "C" {
 #define CELT_GET_MODE_REQUEST    10015
 /** Get the CELTMode used by an encoder or decoder */
-#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x)
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
 #define CELT_SET_SIGNALLING_REQUEST    10016
 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
+#define OPUS_SET_LFE_REQUEST    10024
+#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+#define OPUS_SET_ENERGY_SAVE_REQUEST    10026
+#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
+#define OPUS_SET_ENERGY_MASK_REQUEST    10028
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
 /* Encoder stuff */
@@ -110,6 +137,75 @@ int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned cha
 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
+#ifdef CUSTOM_MODES
+#define OPUS_CUSTOM_NOSTATIC
+#else
+#define OPUS_CUSTOM_NOSTATIC static inline
+#endif
+static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0};
+/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */
+static const unsigned char spread_icdf[4] = {25, 23, 2, 0};
+static const unsigned char tapset_icdf[3]={2,1,0};
+#ifdef CUSTOM_MODES
+static const unsigned char toOpusTable[20] = {
+      0xE0, 0xE8, 0xF0, 0xF8,
+      0xC0, 0xC8, 0xD0, 0xD8,
+      0xA0, 0xA8, 0xB0, 0xB8,
+      0x00, 0x00, 0x00, 0x00,
+      0x80, 0x88, 0x90, 0x98,
+};
+static const unsigned char fromOpusTable[16] = {
+      0x80, 0x88, 0x90, 0x98,
+      0x40, 0x48, 0x50, 0x58,
+      0x20, 0x28, 0x30, 0x38,
+      0x00, 0x08, 0x10, 0x18
+};
+static inline int toOpus(unsigned char c)
+{
+   int ret=0;
+   if (c<0xA0)
+      ret = toOpusTable[c>>3];
+   if (ret == 0)
+      return -1;
+   else
+      return ret|(c&0x7);
+}
+static inline int fromOpus(unsigned char c)
+{
+   if (c<0x80)
+      return -1;
+   else
+      return fromOpusTable[(c>>3)-16] | (c&0x7);
+}
+#endif /* CUSTOM_MODES */
+#define COMBFILTER_MAXPERIOD 1024
+#define COMBFILTER_MINPERIOD 15
+extern const signed char tf_select_table[4][8];
+int resampling_factor(opus_int32 rate);
+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
+      const opus_val16 *window, int overlap);
+void init_caps(const CELTMode *m,int *cap,int LM,int C);
+#ifdef RESYNTH
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM);
+#endif
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
new file mode 100644
index 0000000000..929d1d441b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
@@ -0,0 +1,1207 @@
+/* Copyright (c) 2007-2008 CSIRO
+   Copyright (c) 2007-2010 Xiph.Org Foundation
+   Copyright (c) 2008 Gregory Maxwell
+   Written by Jean-Marc Valin and Gregory Maxwell */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#define CELT_DECODER_C
+#include "cpu_support.h"
+#include "os_support.h"
+#include "mdct.h"
+#include <math.h>
+#include "celt.h"
+#include "pitch.h"
+#include "bands.h"
+#include "modes.h"
+#include "entcode.h"
+#include "quant_bands.h"
+#include "rate.h"
+#include "stack_alloc.h"
+#include "mathops.h"
+#include "float_cast.h"
+#include <stdarg.h>
+#include "celt_lpc.h"
+#include "vq.h"
+/**********************************************************************/
+/*                                                                    */
+/*                             DECODER                                */
+/*                                                                    */
+/**********************************************************************/
+#define DECODE_BUFFER_SIZE 2048
+/** Decoder state
+ @brief Decoder state
+ */
+struct OpusCustomDecoder {
+   const OpusCustomMode *mode;
+   int overlap;
+   int channels;
+   int stream_channels;
+   int downsample;
+   int start, end;
+   int signalling;
+   int arch;
+   /* Everything beyond this point gets cleared on a reset */
+#define DECODER_RESET_START rng
+   opus_uint32 rng;
+   int error;
+   int last_pitch_index;
+   int loss_count;
+   int postfilter_period;
+   int postfilter_period_old;
+   opus_val16 postfilter_gain;
+   opus_val16 postfilter_gain_old;
+   int postfilter_tapset;
+   int postfilter_tapset_old;
+   celt_sig preemph_memD[2];
+   celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
+   /* opus_val16 lpc[],  Size = channels*LPC_ORDER */
+   /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
+   /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
+   /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
+};
+int celt_decoder_get_size(int channels)
+{
+   const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
+   return opus_custom_decoder_get_size(mode, channels);
+}
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels)
+{
+   int size = sizeof(struct CELTDecoder)
+            + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+            + channels*LPC_ORDER*sizeof(opus_val16)
+            + 4*2*mode->nbEBands*sizeof(opus_val16);
+   return size;
+}
+#ifdef CUSTOM_MODES
+CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error)
+{
+   int ret;
+   CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels));
+   ret = opus_custom_decoder_init(st, mode, channels);
+   if (ret != OPUS_OK)
+   {
+      opus_custom_decoder_destroy(st);
+      st = NULL;
+   }
+   if (error)
+      *error = ret;
+   return st;
+}
+#endif /* CUSTOM_MODES */
+int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels)
+{
+   int ret;
+   ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels);
+   if (ret != OPUS_OK)
+      return ret;
+   st->downsample = resampling_factor(sampling_rate);
+   if (st->downsample==0)
+      return OPUS_BAD_ARG;
+   else
+      return OPUS_OK;
+}
+OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels)
+{
+   if (channels < 0 || channels > 2)
+      return OPUS_BAD_ARG;
+   if (st==NULL)
+      return OPUS_ALLOC_FAIL;
+   OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels));
+   st->mode = mode;
+   st->overlap = mode->overlap;
+   st->stream_channels = st->channels = channels;
+   st->downsample = 1;
+   st->start = 0;
+   st->end = st->mode->effEBands;
+   st->signalling = 1;
+   st->arch = opus_select_arch();
+   st->loss_count = 0;
+   opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
+   return OPUS_OK;
+}
+#ifdef CUSTOM_MODES
+void opus_custom_decoder_destroy(CELTDecoder *st)
+{
+   opus_free(st);
+}
+#endif /* CUSTOM_MODES */
+static inline opus_val16 SIG2WORD16(celt_sig x)
+{
+#ifdef FIXED_POINT
+   x = PSHR32(x, SIG_SHIFT);
+   x = MAX32(x, -32768);
+   x = MIN32(x, 32767);
+   return EXTRACT16(x);
+#else
+   return (opus_val16)x;
+#endif
+}
+#ifndef RESYNTH
+static
+#endif
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch)
+{
+   int c;
+   int Nd;
+   int apply_downsampling=0;
+   opus_val16 coef0;
+   coef0 = coef[0];
+   Nd = N/downsample;
+   c=0; do {
+      int j;
+      celt_sig * OPUS_RESTRICT x;
+      opus_val16  * OPUS_RESTRICT y;
+      celt_sig m = mem[c];
+      x =in[c];
+      y = pcm+c;
+#ifdef CUSTOM_MODES
+      if (coef[1] != 0)
+      {
+         opus_val16 coef1 = coef[1];
+         opus_val16 coef3 = coef[3];
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m;
+            m = MULT16_32_Q15(coef0, tmp)
+                          - MULT16_32_Q15(coef1, x[j]);
+            tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else
+#endif
+      if (downsample>1)
+      {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m;
+            m = MULT16_32_Q15(coef0, tmp);
+            scratch[j] = tmp;
+         }
+         apply_downsampling=1;
+      } else {
+         /* Shortcut for the standard (non-custom modes) case */
+         for (j=0;j<N;j++)
+         {
+            celt_sig tmp = x[j] + m + VERY_SMALL;
+            m = MULT16_32_Q15(coef0, tmp);
+            y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+         }
+      }
+      mem[c] = m;
+      if (apply_downsampling)
+      {
+         /* Perform down-sampling */
+         for (j=0;j<Nd;j++)
+            y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+      }
+   } while (++c<C);
+}
+/** Compute the IMDCT and apply window for all sub-frames and
+    all channels in a frame */
+#ifndef RESYNTH
+static
+#endif
+void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X,
+      celt_sig * OPUS_RESTRICT out_mem[], int C, int LM)
+{
+   int b, c;
+   int B;
+   int N;
+   int shift;
+   const int overlap = OVERLAP(mode);
+   if (shortBlocks)
+   {
+      B = shortBlocks;
+      N = mode->shortMdctSize;
+      shift = mode->maxLM;
+   } else {
+      B = 1;
+      N = mode->shortMdctSize<<LM;
+      shift = mode->maxLM-LM;
+   }
+   c=0; do {
+      /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */
+      for (b=0;b<B;b++)
+         clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B);
+   } while (++c<C);
+}
+static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
+{
+   int i, curr, tf_select;
+   int tf_select_rsv;
+   int tf_changed;
+   int logp;
+   opus_uint32 budget;
+   opus_uint32 tell;
+   budget = dec->storage*8;
+   tell = ec_tell(dec);
+   logp = isTransient ? 2 : 4;
+   tf_select_rsv = LM>0 && tell+logp+1<=budget;
+   budget -= tf_select_rsv;
+   tf_changed = curr = 0;
+   for (i=start;i<end;i++)
+   {
+      if (tell+logp<=budget)
+      {
+         curr ^= ec_dec_bit_logp(dec, logp);
+         tell = ec_tell(dec);
+         tf_changed |= curr;
+      }
+      tf_res[i] = curr;
+      logp = isTransient ? 4 : 5;
+   }
+   tf_select = 0;
+   if (tf_select_rsv &&
+     tf_select_table[LM][4*isTransient+0+tf_changed] !=
+     tf_select_table[LM][4*isTransient+2+tf_changed])
+   {
+      tf_select = ec_dec_bit_logp(dec, 1);
+   }
+   for (i=start;i<end;i++)
+   {
+      tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
+   }
+}
+/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
+   CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
+   current value corresponds to a pitch of 66.67 Hz. */
+#define PLC_PITCH_LAG_MAX (720)
+/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
+   pitch of 480 Hz. */
+#define PLC_PITCH_LAG_MIN (100)
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM)
+{
+   int c;
+   int i;
+   const int C = st->channels;
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   int start;
+   int downsample;
+   int loss_count;
+   int noise_based;
+   const opus_int16 *eBands;
+   VARDECL(celt_sig, scratch);
+   SAVE_STACK;
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
+   } while (++c<C);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
+   oldBandE = lpc+C*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+   loss_count = st->loss_count;
+   start = st->start;
+   downsample = st->downsample;
+   noise_based = loss_count >= 5 || start != 0;
+   ALLOC(scratch, noise_based?N*C:N, celt_sig);
+   if (noise_based)
+   {
+      /* Noise-based PLC/CNG */
+      celt_sig *freq;
+      VARDECL(celt_norm, X);
+      opus_uint32 seed;
+      opus_val16 *plcLogE;
+      int end;
+      int effEnd;
+      end = st->end;
+      effEnd = IMAX(start, IMIN(end, mode->effEBands));
+      /* Share the interleaved signal MDCT coefficient buffer with the
+         deemphasis scratch buffer. */
+      freq = scratch;
+      ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
+      if (loss_count >= 5)
+         plcLogE = backgroundLogE;
+      else {
+         /* Energy decay */
+         opus_val16 decay = loss_count==0 ?
+               QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
+         c=0; do
+         {
+            for (i=start;i<end;i++)
+               oldBandE[c*nbEBands+i] -= decay;
+         } while (++c<C);
+         plcLogE = oldBandE;
+      }
+      seed = st->rng;
+      for (c=0;c<C;c++)
+      {
+         for (i=start;i<effEnd;i++)
+         {
+            int j;
+            int boffs;
+            int blen;
+            boffs = N*c+(eBands[i]<<LM);
+            blen = (eBands[i+1]-eBands[i])<<LM;
+            for (j=0;j<blen;j++)
+            {
+               seed = celt_lcg_rand(seed);
+               X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
+            }
+            renormalise_vector(X+boffs, blen, Q15ONE);
+         }
+      }
+      st->rng = seed;
+      denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
+      c=0; do {
+         int bound = eBands[effEnd]<<LM;
+         if (downsample!=1)
+            bound = IMIN(bound, N/downsample);
+         for (i=bound;i<N;i++)
+            freq[c*N+i] = 0;
+      } while (++c<C);
+      c=0; do {
+         OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
+               DECODE_BUFFER_SIZE-N+(overlap>>1));
+      } while (++c<C);
+      compute_inv_mdcts(mode, 0, freq, out_syn, C, LM);
+   } else {
+      /* Pitch-based PLC */
+      const opus_val16 *window;
+      opus_val16 fade = Q15ONE;
+      int pitch_index;
+      VARDECL(opus_val32, etmp);
+      VARDECL(opus_val16, exc);
+      if (loss_count == 0)
+      {
+         VARDECL( opus_val16, lp_pitch_buf );
+         ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
+         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+         pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
+               DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
+               PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
+         pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
+         st->last_pitch_index = pitch_index;
+      } else {
+         pitch_index = st->last_pitch_index;
+         fade = QCONST16(.8f,15);
+      }
+      ALLOC(etmp, overlap, opus_val32);
+      ALLOC(exc, MAX_PERIOD, opus_val16);
+      window = mode->window;
+      c=0; do {
+         opus_val16 decay;
+         opus_val16 attenuation;
+         opus_val32 S1=0;
+         celt_sig *buf;
+         int extrapolation_offset;
+         int extrapolation_len;
+         int exc_length;
+         int j;
+         buf = decode_mem[c];
+         for (i=0;i<MAX_PERIOD;i++) {
+            exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT);
+         }
+         if (loss_count == 0)
+         {
+            opus_val32 ac[LPC_ORDER+1];
+            /* Compute LPC coefficients for the last MAX_PERIOD samples before
+               the first loss so we can work in the excitation-filter domain. */
+            _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD);
+            /* Add a noise floor of -40 dB. */
+#ifdef FIXED_POINT
+            ac[0] += SHR32(ac[0],13);
+#else
+            ac[0] *= 1.0001f;
+#endif
+            /* Use lag windowing to stabilize the Levinson-Durbin recursion. */
+            for (i=1;i<=LPC_ORDER;i++)
+            {
+               /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
+#ifdef FIXED_POINT
+               ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
+#else
+               ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
+#endif
+            }
+            _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+         }
+         /* We want the excitation for 2 pitch periods in order to look for a
+            decaying signal, but we can't get more than MAX_PERIOD. */
+         exc_length = IMIN(2*pitch_index, MAX_PERIOD);
+         /* Initialize the LPC history with the samples just before the start
+            of the region for which we're computing the excitation. */
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            for (i=0;i<LPC_ORDER;i++)
+            {
+               lpc_mem[i] =
+                     ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
+            }
+            /* Compute the excitation for exc_length samples before the loss. */
+            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+                  exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
+         }
+         /* Check if the waveform is decaying, and if so how fast.
+            We do this to avoid adding energy when concealing in a segment
+            with decaying energy. */
+         {
+            opus_val32 E1=1, E2=1;
+            int decay_length;
+#ifdef FIXED_POINT
+            int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20);
+#endif
+            decay_length = exc_length>>1;
+            for (i=0;i<decay_length;i++)
+            {
+               opus_val16 e;
+               e = exc[MAX_PERIOD-decay_length+i];
+               E1 += SHR32(MULT16_16(e, e), shift);
+               e = exc[MAX_PERIOD-2*decay_length+i];
+               E2 += SHR32(MULT16_16(e, e), shift);
+            }
+            E1 = MIN32(E1, E2);
+            decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2));
+         }
+         /* Move the decoder memory one frame to the left to give us room to
+            add the data for the new frame. We ignore the overlap that extends
+            past the end of the buffer, because we aren't going to use it. */
+         OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N);
+         /* Extrapolate from the end of the excitation with a period of
+            "pitch_index", scaling down each period by an additional factor of
+            "decay". */
+         extrapolation_offset = MAX_PERIOD-pitch_index;
+         /* We need to extrapolate enough samples to cover a complete MDCT
+            window (including overlap/2 samples on both sides). */
+         extrapolation_len = N+overlap;
+         /* We also apply fading if this is not the first loss. */
+         attenuation = MULT16_16_Q15(fade, decay);
+         for (i=j=0;i<extrapolation_len;i++,j++)
+         {
+            opus_val16 tmp;
+            if (j >= pitch_index) {
+               j -= pitch_index;
+               attenuation = MULT16_16_Q15(attenuation, decay);
+            }
+            buf[DECODE_BUFFER_SIZE-N+i] =
+                  SHL32(EXTEND32(MULT16_16_Q15(attenuation,
+                        exc[extrapolation_offset+j])), SIG_SHIFT);
+            /* Compute the energy of the previously decoded signal whose
+               excitation we're copying. */
+            tmp = ROUND16(
+                  buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
+                  SIG_SHIFT);
+            S1 += SHR32(MULT16_16(tmp, tmp), 8);
+         }
+         {
+            opus_val16 lpc_mem[LPC_ORDER];
+            /* Copy the last decoded samples (prior to the overlap region) to
+               synthesis filter memory so we can have a continuous signal. */
+            for (i=0;i<LPC_ORDER;i++)
+               lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
+            /* Apply the synthesis filter to convert the excitation back into
+               the signal domain. */
+            celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
+                  buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+                  lpc_mem);
+         }
+         /* Check if the synthesis energy is higher than expected, which can
+            happen with the signal changes during our window. If so,
+            attenuate. */
+         {
+            opus_val32 S2=0;
+            for (i=0;i<extrapolation_len;i++)
+            {
+               opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
+               S2 += SHR32(MULT16_16(tmp, tmp), 8);
+            }
+            /* This checks for an "explosion" in the synthesis. */
+#ifdef FIXED_POINT
+            if (!(S1 > SHR32(S2,2)))
+#else
+            /* The float test is written this way to catch NaNs in the output
+               of the IIR filter at the same time. */
+            if (!(S1 > 0.2f*S2))
+#endif
+            {
+               for (i=0;i<extrapolation_len;i++)
+                  buf[DECODE_BUFFER_SIZE-N+i] = 0;
+            } else if (S1 < S2)
+            {
+               opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1));
+               for (i=0;i<overlap;i++)
+               {
+                  opus_val16 tmp_g = Q15ONE
+                        - MULT16_16_Q15(window[i], Q15ONE-ratio);
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+               for (i=overlap;i<extrapolation_len;i++)
+               {
+                  buf[DECODE_BUFFER_SIZE-N+i] =
+                        MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]);
+               }
+            }
+         }
+         /* Apply the pre-filter to the MDCT overlap for the next frame because
+            the post-filter will be re-applied in the decoder after the MDCT
+            overlap. */
+         comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
+              st->postfilter_period, st->postfilter_period, overlap,
+              -st->postfilter_gain, -st->postfilter_gain,
+              st->postfilter_tapset, st->postfilter_tapset, NULL, 0);
+         /* Simulate TDAC on the concealed audio so that it blends with the
+            MDCT of the next frame. */
+         for (i=0;i<overlap/2;i++)
+         {
+            buf[DECODE_BUFFER_SIZE+i] =
+               MULT16_32_Q15(window[i], etmp[overlap-1-i])
+               + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
+         }
+      } while (++c<C);
+   }
+   deemphasis(out_syn, pcm, N, C, downsample,
+         mode->preemph, st->preemph_memD, scratch);
+   st->loss_count = loss_count+1;
+   RESTORE_STACK;
+}
+#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */
+static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */
+static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
+{
+   int c, i, N;
+   int spread_decision;
+   opus_int32 bits;
+   ec_dec _dec;
+   VARDECL(celt_sig, freq);
+   VARDECL(celt_norm, X);
+   VARDECL(int, fine_quant);
+   VARDECL(int, pulses);
+   VARDECL(int, cap);
+   VARDECL(int, offsets);
+   VARDECL(int, fine_priority);
+   VARDECL(int, tf_res);
+   VARDECL(unsigned char, collapse_masks);
+   celt_sig *out_mem[2];
+   celt_sig *decode_mem[2];
+   celt_sig *out_syn[2];
+   opus_val16 *lpc;
+   opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
+   int shortBlocks;
+   int isTransient;
+   int intra_ener;
+   const int CC = st->channels;
+   int LM, M;
+   int effEnd;
+   int codedBands;
+   int alloc_trim;
+   int postfilter_pitch;
+   opus_val16 postfilter_gain;
+   int intensity=0;
+   int dual_stereo=0;
+   opus_int32 total_bits;
+   opus_int32 balance;
+   opus_int32 tell;
+   int dynalloc_logp;
+   int postfilter_tapset;
+   int anti_collapse_rsv;
+   int anti_collapse_on=0;
+   int silence;
+   int C = st->stream_channels;
+   const OpusCustomMode *mode;
+   int nbEBands;
+   int overlap;
+   const opus_int16 *eBands;
+   ALLOC_STACK;
+   mode = st->mode;
+   nbEBands = mode->nbEBands;
+   overlap = mode->overlap;
+   eBands = mode->eBands;
+   frame_size *= st->downsample;
+   c=0; do {
+      decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+      out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD;
+   } while (++c<CC);
+   lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
+   oldBandE = lpc+CC*LPC_ORDER;
+   oldLogE = oldBandE + 2*nbEBands;
+   oldLogE2 = oldLogE + 2*nbEBands;
+   backgroundLogE = oldLogE2  + 2*nbEBands;
+#ifdef CUSTOM_MODES
+   if (st->signalling && data!=NULL)
+   {
+      int data0=data[0];
+      /* Convert "standard mode" to Opus header */
+      if (mode->Fs==48000 && mode->shortMdctSize==120)
+      {
+         data0 = fromOpus(data0);
+         if (data0<0)
+            return OPUS_INVALID_PACKET;
+      }
+      st->end = IMAX(1, mode->effEBands-2*(data0>>5));
+      LM = (data0>>3)&0x3;
+      C = 1 + ((data0>>2)&0x1);
+      data++;
+      len--;
+      if (LM>mode->maxLM)
+         return OPUS_INVALID_PACKET;
+      if (frame_size < mode->shortMdctSize<<LM)
+         return OPUS_BUFFER_TOO_SMALL;
+      else
+         frame_size = mode->shortMdctSize<<LM;
+   } else {
+#else
+   {
+#endif
+      for (LM=0;LM<=mode->maxLM;LM++)
+         if (mode->shortMdctSize<<LM==frame_size)
+            break;
+      if (LM>mode->maxLM)
+         return OPUS_BAD_ARG;
+   }
+   M=1<<LM;
+   if (len<0 || len>1275 || pcm==NULL)
+      return OPUS_BAD_ARG;
+   N = M*mode->shortMdctSize;
+   effEnd = st->end;
+   if (effEnd > mode->effEBands)
+      effEnd = mode->effEBands;
+   if (data == NULL || len<=1)
+   {
+      celt_decode_lost(st, pcm, N, LM);
+      RESTORE_STACK;
+      return frame_size/st->downsample;
+   }
+   if (dec == NULL)
+   {
+      ec_dec_init(&_dec,(unsigned char*)data,len);
+      dec = &_dec;
+   }
+   if (C==1)
+   {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
+   }
+   total_bits = len*8;
+   tell = ec_tell(dec);
+   if (tell >= total_bits)
+      silence = 1;
+   else if (tell==1)
+      silence = ec_dec_bit_logp(dec, 15);
+   else
+      silence = 0;
+   if (silence)
+   {
+      /* Pretend we've read all the remaining bits */
+      tell = len*8;
+      dec->nbits_total+=tell-ec_tell(dec);
+   }
+   postfilter_gain = 0;
+   postfilter_pitch = 0;
+   postfilter_tapset = 0;
+   if (st->start==0 && tell+16 <= total_bits)
+   {
+      if(ec_dec_bit_logp(dec, 1))
+      {
+         int qg, octave;
+         octave = ec_dec_uint(dec, 6);
+         postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1;
+         qg = ec_dec_bits(dec, 3);
+         if (ec_tell(dec)+2<=total_bits)
+            postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2);
+         postfilter_gain = QCONST16(.09375f,15)*(qg+1);
+      }
+      tell = ec_tell(dec);
+   }
+   if (LM > 0 && tell+3 <= total_bits)
+   {
+      isTransient = ec_dec_bit_logp(dec, 3);
+      tell = ec_tell(dec);
+   }
+   else
+      isTransient = 0;
+   if (isTransient)
+      shortBlocks = M;
+   else
+      shortBlocks = 0;
+   /* Decode the global flags (first symbols in the stream) */
+   intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+   /* Get band energies */
+   unquant_coarse_energy(mode, st->start, st->end, oldBandE,
+         intra_ener, dec, C, LM);
+   ALLOC(tf_res, nbEBands, int);
+   tf_decode(st->start, st->end, isTransient, tf_res, LM, dec);
+   tell = ec_tell(dec);
+   spread_decision = SPREAD_NORMAL;
+   if (tell+4 <= total_bits)
+      spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
+   ALLOC(cap, nbEBands, int);
+   init_caps(mode,cap,LM,C);
+   ALLOC(offsets, nbEBands, int);
+   dynalloc_logp = 6;
+   total_bits<<=BITRES;
+   tell = ec_tell_frac(dec);
+   for (i=st->start;i<st->end;i++)
+   {
+      int width, quanta;
+      int dynalloc_loop_logp;
+      int boost;
+      width = C*(eBands[i+1]-eBands[i])<<LM;
+      /* quanta is 6 bits, but no more than 1 bit/sample
+         and no less than 1/8 bit/sample */
+      quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
+      dynalloc_loop_logp = dynalloc_logp;
+      boost = 0;
+      while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i])
+      {
+         int flag;
+         flag = ec_dec_bit_logp(dec, dynalloc_loop_logp);
+         tell = ec_tell_frac(dec);
+         if (!flag)
+            break;
+         boost += quanta;
+         total_bits -= quanta;
+         dynalloc_loop_logp = 1;
+      }
+      offsets[i] = boost;
+      /* Making dynalloc more likely */
+      if (boost>0)
+         dynalloc_logp = IMAX(2, dynalloc_logp-1);
+   }
+   ALLOC(fine_quant, nbEBands, int);
+   alloc_trim = tell+(6<<BITRES) <= total_bits ?
+         ec_dec_icdf(dec, trim_icdf, 7) : 5;
+   bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
+   anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
+   bits -= anti_collapse_rsv;
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+   codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
+         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
+   unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C);
+   /* Decode fixed codebook */
+   ALLOC(collapse_masks, C*nbEBands, unsigned char);
+   /**< Interleaved normalised MDCTs */
+   if (FREQ_X_BUF_SIZE >= C*N)
+      X = s_X;
+   else
+      ALLOC(X, C*N, celt_norm);
+   quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
+         NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
+         len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
+   if (anti_collapse_rsv > 0)
+   {
+      anti_collapse_on = ec_dec_bits(dec, 1);
+   }
+   unquant_energy_finalise(mode, st->start, st->end, oldBandE,
+         fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
+   if (anti_collapse_on)
+      anti_collapse(mode, X, collapse_masks, LM, C, N,
+            st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+   /**< Interleaved signal MDCTs */
+   if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
+      freq = s_freq;
+   else
+      ALLOC(freq, IMAX(CC,C)*N, celt_sig);
+   if (silence)
+   {
+      for (i=0;i<C*nbEBands;i++)
+         oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
+      for (i=0;i<C*N;i++)
+         freq[i] = 0;
+   } else {
+      /* Synthesis */
+      denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
+   }
+   c=0; do {
+      OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+   } while (++c<CC);
+   c=0; do {
+      int bound = M*eBands[effEnd];
+      if (st->downsample!=1)
+         bound = IMIN(bound, N/st->downsample);
+      for (i=bound;i<N;i++)
+         freq[c*N+i] = 0;
+   } while (++c<C);
+   c=0; do {
+      out_syn[c] = out_mem[c]+MAX_PERIOD-N;
+   } while (++c<CC);
+   if (CC==2&&C==1)
+   {
+      for (i=0;i<N;i++)
+         freq[N+i] = freq[i];
+   }
+   if (CC==1&&C==2)
+   {
+      for (i=0;i<N;i++)
+         freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
+   }
+   /* Compute inverse MDCTs */
+   compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
+   c=0; do {
+      st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
+      st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
+      comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
+            st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
+            mode->window, overlap);
+      if (LM!=0)
+         comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
+               st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
+               mode->window, overlap);
+   } while (++c<CC);
+   st->postfilter_period_old = st->postfilter_period;
+   st->postfilter_gain_old = st->postfilter_gain;
+   st->postfilter_tapset_old = st->postfilter_tapset;
+   st->postfilter_period = postfilter_pitch;
+   st->postfilter_gain = postfilter_gain;
+   st->postfilter_tapset = postfilter_tapset;
+   if (LM!=0)
+   {
+      st->postfilter_period_old = st->postfilter_period;
+      st->postfilter_gain_old = st->postfilter_gain;
+      st->postfilter_tapset_old = st->postfilter_tapset;
+   }
+   if (C==1) {
+      for (i=0;i<nbEBands;i++)
+         oldBandE[nbEBands+i]=oldBandE[i];
+   }
+   /* In case start or end were to change */
+   if (!isTransient)
+   {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE2[i] = oldLogE[i];
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = oldBandE[i];
+      for (i=0;i<2*nbEBands;i++)
+         backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
+   } else {
+      for (i=0;i<2*nbEBands;i++)
+         oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
+   }
+   c=0; do
+   {
+      for (i=0;i<st->start;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      for (i=st->end;i<nbEBands;i++)
+      {
+         oldBandE[c*nbEBands+i]=0;
+         oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
+      }
+   } while (++c<2);
+   st->rng = dec->rng;
+   /* We reuse freq[] as scratch space for the de-emphasis */
+   deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
+   st->loss_count = 0;
+   RESTORE_STACK;
+   if (ec_tell(dec) > 8*len)
+      return OPUS_INTERNAL_ERROR;
+   if(ec_get_error(dec))
+      st->error = 1;
+   return frame_size/st->downsample;
+}
+#ifdef CUSTOM_MODES
+#ifdef FIXED_POINT
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_int16, out);
+   ALLOC_STACK;
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, opus_int16);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=out[j]*(1.f/32768.f);
+   RESTORE_STACK;
+   return ret;
+}
+#endif /* DISABLE_FLOAT_API */
+#else
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL);
+}
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(celt_sig, out);
+   ALLOC_STACK;
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+   C = st->channels;
+   N = frame_size;
+   ALLOC(out, C*N, celt_sig);
+   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j] = FLOAT2INT16 (out[j]);
+   RESTORE_STACK;
+   return ret;
+}
+#endif
+#endif /* CUSTOM_MODES */
+int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
+{
+   va_list ap;
+   va_start(ap, request);
+   switch (request)
+   {
+      case CELT_SET_START_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<0 || value>=st->mode->nbEBands)
+            goto bad_arg;
+         st->start = value;
+      }
+      break;
+      case CELT_SET_END_BAND_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>st->mode->nbEBands)
+            goto bad_arg;
+         st->end = value;
+      }
+      break;
+      case CELT_SET_CHANNELS_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         if (value<1 || value>2)
+            goto bad_arg;
+         st->stream_channels = value;
+      }
+      break;
+      case CELT_GET_AND_CLEAR_ERROR_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value=st->error;
+         st->error = 0;
+      }
+      break;
+      case OPUS_GET_LOOKAHEAD_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->overlap/st->downsample;
+      }
+      break;
+      case OPUS_RESET_STATE:
+      {
+         int i;
+         opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
+         lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
+         oldBandE = lpc+st->channels*LPC_ORDER;
+         oldLogE = oldBandE + 2*st->mode->nbEBands;
+         oldLogE2 = oldLogE + 2*st->mode->nbEBands;
+         OPUS_CLEAR((char*)&st->DECODER_RESET_START,
+               opus_custom_decoder_get_size(st->mode, st->channels)-
+               ((char*)&st->DECODER_RESET_START - (char*)st));
+         for (i=0;i<2*st->mode->nbEBands;i++)
+            oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+      }
+      break;
+      case OPUS_GET_PITCH_REQUEST:
+      {
+         opus_int32 *value = va_arg(ap, opus_int32*);
+         if (value==NULL)
+            goto bad_arg;
+         *value = st->postfilter_period;
+      }
+      break;
+      case CELT_GET_MODE_REQUEST:
+      {
+         const CELTMode ** value = va_arg(ap, const CELTMode**);
+         if (value==0)
+            goto bad_arg;
+         *value=st->mode;
+      }
+      break;
+      case CELT_SET_SIGNALLING_REQUEST:
+      {
+         opus_int32 value = va_arg(ap, opus_int32);
+         st->signalling = value;
+      }
+      break;
+      case OPUS_GET_FINAL_RANGE_REQUEST:
+      {
+         opus_uint32 * value = va_arg(ap, opus_uint32 *);
+         if (value==0)
+            goto bad_arg;
+         *value=st->rng;
+      }
+      break;
+      default:
+         goto bad_request;
+   }
+   va_end(ap);
+   return OPUS_OK;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
+bad_request:
+      va_end(ap);
+  return OPUS_UNIMPLEMENTED;
+}
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
index 66aed1de09..7ffe90a357 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt_lpc.h"
 #include "stack_alloc.h"
 #include "mathops.h"
+#include "pitch.h"
 void _celt_lpc(
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
@@ -87,42 +88,71 @@ int          p
 #endif
 }
-void celt_fir(const opus_val16 *x,
+void celt_fir(const opus_val16 *_x,
         const opus_val16 *num,
-         opus_val16 *y,
+         opus_val16 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
      for (j=0;j<ord;j++)
      {
-         sum += MULT16_16(num[j],mem[j]);
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
-      }
-      for (j=ord-1;j>=1;j--)
-      {
-         mem[j]=mem[j-1];
      }
-      mem[0] = x[i];
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
-      y[i] = ROUND16(sum, SIG_SHIFT);
   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
 }
-void celt_iir(const opus_val32 *x,
+void celt_iir(const opus_val32 *_x,
         const opus_val16 *den,
-         opus_val32 *y,
+         opus_val32 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
+#ifdef SMALL_FOOTPRINT
   int i,j;
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = x[i];
+      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
      {
         sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
         mem[j]=mem[j-1];
      }
      mem[0] = ROUND16(sum,SIG_SHIFT);
-      y[i] = sum;
+      _y[i] = sum;
   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
 }
-void _celt_autocorr(
+int _celt_autocorr(
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
                   const opus_val16       *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
                  )
 {
   opus_val32 d;
-   int i;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
   VARDECL(opus_val16, xx);
   SAVE_STACK;
   ALLOC(xx, n, opus_val16);
   celt_assert(n>0);
   celt_assert(overlap>=0);
-   for (i=0;i<n;i++)
+   if (overlap == 0)
-      xx[i] = x[i];
-   for (i=0;i<overlap;i++)
   {
-      xx[i] = MULT16_16_Q15(x[i],window[i]);
+      xptr = x;
-      xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
   }
+   shift=0;
 #ifdef FIXED_POINT
   {
-      opus_val32 ac0=0;
+      opus_val32 ac0;
-      int shift;
+      ac0 = 1+(n<<7);
-      for(i=0;i<n;i++)
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
-         ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
+      for(i=(n&1);i<n;i+=2)
-      ac0 += 1+n;
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
      shift = celt_ilog2(ac0)-30+10;
-      shift = (shift+1)/2;
+      shift = (shift)/2;
-      for(i=0;i<n;i++)
+      if (shift>0)
-         xx[i] = VSHR32(xx[i], shift);
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
   }
 #endif
-   while (lag>=0)
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   for (k=0;k<=lag;k++)
   {
-      for (i = lag, d = 0; i < n; i++)
+      for (i = k+fastN, d = 0; i < n; i++)
-         d += xx[i] * xx[i-lag];
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
-      ac[lag] = d;
+      ac[k] += d;
-      /*printf ("%f ", ac[lag]);*/
-      lag--;
   }
-   /*printf ("\n");*/
+#ifdef FIXED_POINT
-   ac[0] += 10;
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
   RESTORE_STACK;
+   return shift;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
index 2baa77edf8..19279a0ed6 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
@@ -48,6 +48,6 @@ void celt_iir(const opus_val32 *x,
         int ord,
         opus_val16 *mem);
-void _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
+int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n);
 #endif /* PLC_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h b/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h
new file mode 100644
index 0000000000..c442a55663
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cf/fixed_cf.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2013 Nils Wallménius */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef FIXED_CF_H
+#define FIXED_CF_H
+#undef MULT16_32_Q15
+static inline int32_t MULT16_32_Q15_cf(int32_t a, int32_t b)
+{
+  int32_t r;
+  asm volatile ("mac.l %[a], %[b], %%acc0;"
+                "movclr.l %%acc0, %[r];"
+                : [r] "=r" (r)
+                : [a] "r" (a<<16), [b] "r" (b)
+                : "cc");
+  return r;
+}
+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_cf(a, b))
+#undef MULT32_32_Q31
+static inline int32_t MULT32_32_Q31_cf(int32_t a, int32_t b)
+{
+  int32_t r;
+  asm volatile ("mac.l %[a], %[b], %%acc0;"
+                "movclr.l %%acc0, %[r];"
+                : [r] "=r" (r)
+                : [a] "r" (a), [b] "r" (b)
+                : "cc");
+  return r;
+}
+#define MULT32_32_Q31(a, b) (MULT32_32_Q31_cf(a, b))
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h b/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h
new file mode 100644
index 0000000000..fe0fe2f3ca
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cf/kiss_fft_cf.h
@@ -0,0 +1,57 @@
+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
+  All rights reserved.
+  Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+  POSSIBILITY OF SUCH DAMAGE.*/
+#ifndef KISS_FFT_CF_H
+#define KISS_FFT_CF_H
+#if !defined(KISS_FFT_GUTS_H)
+#error "This file should only be included from _kiss_fft_guts.h"
+#endif
+#ifdef FIXED_POINT
+#undef C_MULC
+#define C_MULC(m,a,b) \
+    { \
+      asm volatile("move.l (%[bp]), %%d2;" \
+                   "clr.l %%d3;" \
+                   "move.w %%d2, %%d3;" \
+                   "swap %%d3;" \
+                   "clr.w %%d2;" \
+                   "movem.l (%[ap]), %%d0-%%d1;" \
+                   "mac.l %%d0, %%d2, %%acc0;" \
+                   "mac.l %%d1, %%d3, %%acc0;" \
+                   "mac.l %%d1, %%d2, %%acc1;" \
+                   "msac.l %%d0, %%d3, %%acc1;" \
+                   "movclr.l %%acc0, %[mr];" \
+                   "movclr.l %%acc1, %[mi];" \
+                   : [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
+                   : [ap] "a" (&(a)), [bp] "a" (&(b)) \
+                   : "d0", "d1", "d2", "d3", "cc"); \
+    }
+#endif /* FIXED_POINT */
+#endif /* KISS_FFT_CF_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/cpu_support.h b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
new file mode 100644
index 0000000000..41481feb9c
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
@@ -0,0 +1,51 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "arm/armcpu.h"
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+#else
+#define OPUS_ARCHMASK 0
+static inline int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c
index b8ade96fce..eb8fa1c807 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.c
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c
@@ -28,14 +28,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "os_support.h"
 #include "cwrs.h"
 #include "mathops.h"
 #include "arch.h"
-#include "rate.h"
 #ifdef CUSTOM_MODES
@@ -72,64 +71,6 @@ int log2_frac(opus_uint32 val, int frac)
 }
 #endif
-#ifndef SMALL_FOOTPRINT
-#define MASK32 (0xFFFFFFFF)
-/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
-static const opus_uint32 INV_TABLE[53]={
-  0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
-  0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
-  0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
-  0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
-  0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
-  0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
-  0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
-  0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
-  0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
-  0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
-  0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
-  0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
-  0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
-  0xD8FD8FD9,
-};
-/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
-  _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
-   fits in 32 bits, but currently the table for multiplicative inverses is only
-   valid for _d<=52.*/
-static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
-  celt_assert(_d<=52);
-  return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
-}
-/*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
-  _d does not actually have to be even, but imusdiv32odd will be faster when
-   it's odd, so you should use that instead.
-  _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
-   table for multiplicative inverses is only valid for _d<=54).
-  _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
-   32 bits.*/
-static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
-  opus_uint32 inv;
-  int           mask;
-  int           shift;
-  int           one;
-  celt_assert(_d>0);
-  celt_assert(_d<=54);
-  shift=EC_ILOG(_d^(_d-1));
-  inv=INV_TABLE[(_d-1)>>shift];
-  shift--;
-  one=1<<shift;
-  mask=one-1;
-  return (_a*(_b>>shift)-(_c>>shift)+
-   ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32;
-}
-#endif /* SMALL_FOOTPRINT */
 /*Although derived separately, the pulse vector coding scheme is equivalent to
   a Pyramid Vector Quantizer \cite{Fis86}.
  Some additional notes about an early version appear at
@@ -249,46 +190,346 @@ static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
    year=1986
  }*/
-#ifndef SMALL_FOOTPRINT
+#if !defined(SMALL_FOOTPRINT)
-/*Compute U(2,_k).
-  Note that this may be called with _k=32768 (maxK[2]+1).*/
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
-static inline unsigned ucwrs2(unsigned _k){
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
-  celt_assert(_k>0);
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
-  return _k+(_k-1);
+   with K pulses allocated to it.*/
-}
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+  Thus, the number of entries in row I is the larger of the maximum number of
+   pulses we will ever allocate for a given N=I (K=128, or however many fit in
+   32 bits, whichever is smaller), plus one, and the maximum N for which
+   K=I-1 pulses fit in 32 bits.
+  The largest band size in an Opus Custom mode is 208.
+  Otherwise, we can limit things to the set of N which can be achieved by
+   splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+   44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272] ICONST_ATTR ={
+#endif
+  /*N=0, K=0...176:*/
+  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0,
+#endif
+  /*N=1, K=1...176:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1,
+#endif
+  /*N=2, K=2...176:*/
+  3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+  43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+  81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+  115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+  175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+  205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+  235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+  265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+  295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+  325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+  383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+  413, 415,
+#endif
+  /*N=3, K=3...176:*/
+  13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+  685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+  1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+  3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+  6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+  9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+  13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+  17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+  21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+  26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+  31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+  37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+  43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+  50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+  57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+  70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+  78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+  /*N=4, K=4...176:*/
+  63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+  7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+  30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+  82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+  161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+  267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+  410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+  597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+  833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+  1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+  1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+  1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+  2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+  2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+  3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+  3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+  4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+  5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+  6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+  7085049, 7207551,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+  8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+  9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+  10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+  11912575,
+#endif
+  /*N=5, K=5...176:*/
+  321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+  50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+  330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+  1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+  2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+  4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+  8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+  13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+  20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+  29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+  40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+  55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+  73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+  95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+  122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+  155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+  193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+  238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+  290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+  351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+  420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+  500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+  590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+  /*...208:*/
+  647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+  755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+  878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+  1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+  1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+  /*N=6, K=6...96:*/
+  1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+  335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+  2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+  11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+  29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+  64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+  128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+  235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+  402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+  655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+  1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+  1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+  2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+  /*...109:*/
+  2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+  3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+  4012305913U,
+#endif
+  /*N=7, K=7...54*/
+  8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+  1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+  19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+  88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+  292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+  793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+  1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+  /*...60:*/
+  2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+  /*N=8, K=8...37*/
+  48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+  9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+  104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+  638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+  2229491905U,
+#if defined(CUSTOM_MODES)
+  /*...40:*/
+  2691463695U, 3233240945U, 3866006015U,
+#endif
+  /*N=9, K=9...28:*/
+  265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+  39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+  628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+  /*...29:*/
+  2883810113U,
+#endif
+  /*N=10, K=10...24:*/
+  1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+  254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+  3375210671U,
+  /*N=11, K=11...19:*/
+  8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+  948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+  /*...20:*/
+  2684641785U,
+#endif
+  /*N=12, K=12...18:*/
+  45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+  3248227095U,
+  /*N=13, K=13...16:*/
+  251595969, 579168825, 1267854873, 2653649025U,
+  /*N=14, K=14:*/
+  1409933619
+};
-/*Compute V(2,_k).*/
+#if defined(CUSTOM_MODES)
-static inline opus_uint32 ncwrs2(int _k){
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
-  celt_assert(_k>0);
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
-  return 4*(opus_uint32)_k;
+  CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+  CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+  CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+  CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+  CELT_PVQ_U_DATA+   0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+  CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+  CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+  CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+  CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+  int k;
+  /*_maxk==0 => there's nothing to do.*/
+  celt_assert(_maxk>0);
+  _bits[0]=0;
+  for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
 }
+#endif
-/*Compute U(3,_k).
+static opus_uint32 icwrs(int _n,const int *_y){
-  Note that this may be called with _k=32768 (maxK[3]+1).*/
+  opus_uint32 i;
-static inline opus_uint32 ucwrs3(unsigned _k){
+  int         j;
-  celt_assert(_k>0);
+  int         k;
-  return (2*(opus_uint32)_k-2)*_k+1;
+  celt_assert(_n>=2);
+  j=_n-1;
+  i=_y[j]<0;
+  k=abs(_y[j]);
+  do{
+    j--;
+    i+=CELT_PVQ_U(_n-j,k);
+    k+=abs(_y[j]);
+    if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+  }
+  while(j>0);
+  return i;
 }
-/*Compute V(3,_k).*/
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
-static inline opus_uint32 ncwrs3(int _k){
  celt_assert(_k>0);
-  return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
+  ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
 }
-/*Compute U(4,_k).*/
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
-static inline opus_uint32 ucwrs4(int _k){
+  opus_uint32 p;
+  int         s;
+  int         k0;
  celt_assert(_k>0);
-  return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
+  celt_assert(_n>1);
+  while(_n>2){
+    opus_uint32 q;
+    /*Lots of pulses case:*/
+    if(_k>=_n){
+      const opus_uint32 *row;
+      row=CELT_PVQ_U_ROW[_n];
+      /*Are the pulses in this dimension negative?*/
+      p=row[_k+1];
+      s=-(_i>=p);
+      _i-=p&s;
+      /*Count how many pulses were placed in this dimension.*/
+      k0=_k;
+      q=row[_n];
+      if(q>_i){
+        celt_assert(p>q);
+        _k=_n;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+      }
+      else for(p=row[_k];p>_i;p=row[_k])_k--;
+      _i-=p;
+      *_y++=(k0-_k+s)^s;
+    }
+    /*Lots of dimensions case:*/
+    else{
+      /*Are there any pulses in this dimension at all?*/
+      p=CELT_PVQ_U_ROW[_k][_n];
+      q=CELT_PVQ_U_ROW[_k+1][_n];
+      if(p<=_i&&_i<q){
+        _i-=p;
+        *_y++=0;
+      }
+      else{
+        /*Are the pulses in this dimension negative?*/
+        s=-(_i>=q);
+        _i-=q&s;
+        /*Count how many pulses were placed in this dimension.*/
+        k0=_k;
+        do p=CELT_PVQ_U_ROW[--_k][_n];
+        while(p>_i);
+        _i-=p;
+        *_y++=(k0-_k+s)^s;
+      }
+    }
+    _n--;
+  }
+  /*_n==2*/
+  p=2*_k+1;
+  s=-(_i>=p);
+  _i-=p&s;
+  k0=_k;
+  _k=(_i+1)>>1;
+  if(_k)_i-=2*_k-1;
+  *_y++=(k0-_k+s)^s;
+  /*_n==1*/
+  s=-(int)_i;
+  *_y=(_k+s)^s;
 }
-/*Compute V(4,_k).*/
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-static inline opus_uint32 ncwrs4(int _k){
+  cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
-  celt_assert(_k>0);
-  return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
 }
-#endif /* SMALL_FOOTPRINT */
+#else /* SMALL_FOOTPRINT */
 /*Computes the next row/column of any recurrence that obeys the relation
   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
@@ -333,125 +574,18 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
  celt_assert(len>=3);
  _u[0]=0;
  _u[1]=um2=1;
-#ifndef SMALL_FOOTPRINT
+  /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
-  /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE,
+  /*If _n==1, _u[i] should be 1 for i>1.*/
-    but _k isn't tested here because k<=52 for n=7*/
+  celt_assert(_n>=2);
-  if(_n<=6)
+  /*If _k==0, the following do-while loop will overflow the buffer.*/
-#endif
+  celt_assert(_k>0);
- {
+  k=2;
-    /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+  do _u[k]=(k<<1)-1;
-    /*If _n==1, _u[i] should be 1 for i>1.*/
+  while(++k<len);
-    celt_assert(_n>=2);
+  for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
-    /*If _k==0, the following do-while loop will overflow the buffer.*/
-    celt_assert(_k>0);
-    k=2;
-    do _u[k]=(k<<1)-1;
-    while(++k<len);
-    for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
-  }
-#ifndef SMALL_FOOTPRINT
-  else{
-    opus_uint32 um1;
-    opus_uint32 n2m1;
-    _u[2]=n2m1=um1=(_n<<1)-1;
-    for(k=3;k<len;k++){
-      /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
-      _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
-      if(++k>=len)break;
-      _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1;
-    }
-  }
-#endif /* SMALL_FOOTPRINT */
  return _u[_k]+_u[_k+1];
 }
-#ifndef SMALL_FOOTPRINT
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 1 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
-  int s;
-  s=-(int)_i;
-  _y[0]=(_k+s)^s;
-}
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 2 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  p=ucwrs2(_k+1U);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  _k=(_i+1)>>1;
-  p=_k?ucwrs2(_k):0;
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi1(_k,_i,_y+1);
-}
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
-   set of size 3 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static void cwrsi3(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  p=ucwrs3(_k+1U);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
-     _i<2147418113=U(3,32768)).*/
-  _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0;
-  p=_k?ucwrs3(_k):0;
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi2(_k,_i,_y+1);
-}
-/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
-   of size 4 with associated sign bits.
-  _y: Returns the vector of pulses.*/
-static void cwrsi4(int _k,opus_uint32 _i,int *_y){
-  opus_uint32 p;
-  int           s;
-  int           yj;
-  int           kl;
-  int           kr;
-  p=ucwrs4(_k+1);
-  s=-(_i>=p);
-  _i-=p&s;
-  yj=_k;
-  /*We could solve a cubic for k here, but the form of the direct solution does
-     not lend itself well to exact integer arithmetic.
-    Instead we do a binary search on U(4,K).*/
-  kl=0;
-  kr=_k;
-  for(;;){
-    _k=(kl+kr)>>1;
-    p=_k?ucwrs4(_k):0;
-    if(p<_i){
-      if(_k>=kr)break;
-      kl=_k+1;
-    }
-    else if(p>_i)kr=_k-1;
-    else break;
-  }
-  _i-=p;
-  yj-=_k;
-  _y[0]=(yj+s)^s;
-  cwrsi3(_k,_i,_y+1);
-}
-#endif /* SMALL_FOOTPRINT */
 /*Returns the _i'th combination of _k elements chosen from a set of size _n
   with associated sign bits.
  _y: Returns the vector of pulses.
@@ -488,55 +622,6 @@ static inline opus_uint32 icwrs1(const int *_y,int *_k){
  return _y[0]<0;
 }
-#ifndef SMALL_FOOTPRINT
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 2 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs2(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs1(_y+1,&k);
-  i+=k?ucwrs2(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs2(k+1U);
-  *_k=k;
-  return i;
-}
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 3 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs3(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs2(_y+1,&k);
-  i+=k?ucwrs3(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs3(k+1U);
-  *_k=k;
-  return i;
-}
-/*Returns the index of the given combination of K elements chosen from a set
-   of size 4 with associated sign bits.
-  _y: The vector of pulses, whose sum of absolute values is K.
-  _k: Returns K.*/
-static inline opus_uint32 icwrs4(const int *_y,int *_k){
-  opus_uint32 i;
-  int           k;
-  i=icwrs3(_y+1,&k);
-  i+=k?ucwrs4(k):0;
-  k+=abs(_y[0]);
-  if(_y[0]<0)i+=ucwrs4(k+1);
-  *_k=k;
-  return i;
-}
-#endif /* SMALL_FOOTPRINT */
 /*Returns the index of the given combination of K elements chosen from a set
   of size _n with associated sign bits.
  _y:  The vector of pulses, whose sum of absolute values must be _k.
@@ -544,8 +629,8 @@ static inline opus_uint32 icwrs4(const int *_y,int *_k){
 static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
 opus_uint32 *_u){
  opus_uint32 i;
-  int           j;
+  int         j;
-  int           k;
+  int         k;
  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
  celt_assert(_n>=2);
  _u[0]=0;
@@ -590,58 +675,23 @@ void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
 void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
  opus_uint32 i;
+  VARDECL(opus_uint32,u);
+  opus_uint32 nc;
+  SAVE_STACK;
  celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
+  ALLOC(u,_k+2U,opus_uint32);
-  switch(_n){
+  i=icwrs(_n,_k,&nc,_y,u);
-    case 2:{
+  ec_enc_uint(_enc,i,nc);
-      i=icwrs2(_y,&_k);
+  RESTORE_STACK;
-      ec_enc_uint(_enc,i,ncwrs2(_k));
-    }break;
-    case 3:{
-      i=icwrs3(_y,&_k);
-      ec_enc_uint(_enc,i,ncwrs3(_k));
-    }break;
-    case 4:{
-      i=icwrs4(_y,&_k);
-      ec_enc_uint(_enc,i,ncwrs4(_k));
-    }break;
-     default:
-    {
-#endif
-      VARDECL(opus_uint32,u);
-      opus_uint32 nc;
-      SAVE_STACK;
-      ALLOC(u,_k+2U,opus_uint32);
-      i=icwrs(_n,_k,&nc,_y,u);
-      ec_enc_uint(_enc,i,nc);
-      RESTORE_STACK;
-#ifndef SMALL_FOOTPRINT
-    }
-    break;
-  }
-#endif
 }
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
-{
+  VARDECL(opus_uint32,u);
+  SAVE_STACK;
  celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
+  ALLOC(u,_k+2U,opus_uint32);
-   switch(_n){
+  cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
-    case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
+  RESTORE_STACK;
-    case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
-    case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
-    default:
-    {
-#endif
-/*      VARDECL(opus_uint32,u);
-      SAVE_STACK;
-      ALLOC(u,_k+2U,opus_uint32); */
-      opus_uint32 u[MAX_PULSES+2];
-      cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
-/*      RESTORE_STACK; */
-#ifndef SMALL_FOOTPRINT
-    }
-    break;
-  }
-#endif
 }
+#endif /* SMALL_FOOTPRINT */
diff --git a/lib/rbcodec/codecs/libopus/celt/ecintrin.h b/lib/rbcodec/codecs/libopus/celt/ecintrin.h
index 3dffa5f95c..be57dd40de 100644
--- a/lib/rbcodec/codecs/libopus/celt/ecintrin.h
+++ b/lib/rbcodec/codecs/libopus/celt/ecintrin.h
@@ -48,7 +48,7 @@
 /*Count leading zeros.
  This macro should only be used for implementing ec_ilog(), if it is defined.
  All other code should use EC_ILOG() instead.*/
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
 # include <intrin.h>
 /*In _DEBUG mode this is not an intrinsic by default.*/
 # pragma intrinsic(_BitScanReverse)
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c
index 80e64fefaa..fa5d7c7c2c 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.c
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.c
@@ -26,13 +26,18 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "entcode.h"
 #include "arch.h"
 #if !defined(EC_CLZ)
+/*This is a fallback for systems where we don't know how to access
+   a BSR or CLZ instruction (see ecintrin.h).
+  If you are optimizing Opus on a new platform and it has a native CLZ or
+   BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be
+   an easy performance win.*/
 int ec_ilog(opus_uint32 _v){
  /*On a Pentium M, this branchless version tested as the fastest on
     1,000,000,000 random 32-bit integers, edging out a similar version with
diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c
index ff8442d534..3c264685c2 100644
--- a/lib/rbcodec/codecs/libopus/celt/entdec.c
+++ b/lib/rbcodec/codecs/libopus/celt/entdec.c
@@ -26,7 +26,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <stddef.h>
@@ -85,7 +85,7 @@
   number=3,
   pages="256--294",
   month=Jul,
-   URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf"
+   URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf"
  }*/
 static int ec_read_byte(ec_dec *_this){
diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c
index 0ec6e91fd7..a7e34ecef9 100644
--- a/lib/rbcodec/codecs/libopus/celt/entenc.c
+++ b/lib/rbcodec/codecs/libopus/celt/entenc.c
@@ -26,7 +26,7 @@
 */
 #if defined(HAVE_CONFIG_H)
-# include "opus_config.h"
+# include "config.h"
 #endif
 #include "os_support.h"
 #include "arch.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
index 28a1598d3e..0e77976e83 100644
--- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
@@ -42,64 +42,12 @@
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16))
-#if defined(CPU_COLDFIRE)
-static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
-{
-  int32_t r;
-  asm volatile ("mac.l %[a], %[b], %%acc0;"
-                "movclr.l %%acc0, %[r];"
-                : [r] "=r" (r)
-                : [a] "r" (a<<16), [b] "r" (b)
-                : "cc");
-  return r;
-}
-#elif defined(CPU_ARM)
-static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
-{
-  int32_t lo, hi;
-  asm volatile("smull %[lo], %[hi], %[b], %[a] \n\t"
-               "mov %[lo], %[lo], lsr #15 \n\t"
-               "orr %[hi], %[lo], %[hi], lsl #17 \n\t"
-               : [lo] "=&r" (lo), [hi] "=&r" (hi)
-               : [a] "r" (a), [b] "r" (b) );
-  return(hi);
-}
-#else
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
-#endif
-#if defined(CPU_COLDFIRE)
-static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
-{
-  int32_t r;
-  asm volatile ("mac.l %[a], %[b], %%acc0;"
-                "movclr.l %%acc0, %[r];"
-                : [r] "=r" (r)
-                : [a] "r" (a), [b] "r" (b)
-                : "cc");
-  return r;
-}
-#elif defined(CPU_ARM)
-static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
-{
-  int32_t lo, hi;
-  asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t"
-               "mov %[lo], %[lo], lsr #31 \n\t"
-               "orr %[hi], %[lo], %[hi], lsl #1 \n\t"
-               : [lo] "=&r" (lo), [hi] "=&r" (hi)
-               : [a] "r" (a), [b] "r" (b) );
-  return(hi);
-}
-#else
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
-//#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
-#define MULT32_32_Q31(a,b) (opus_val32)((((int64_t)(a)) * ((int64_t)(b)))>>31)
-#endif
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
@@ -136,6 +84,8 @@ static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
 #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift))
 #define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
+#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x)))
 /** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
 #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
 /** Divide by two */
@@ -160,7 +110,9 @@ static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
 /** 16x16 multiply-add where the result fits in 32 bits */
 #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
-/** 16x32 multiply-add, followed by a 15-bit shift right. Results fits in 32 bits */
+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.
+    b must fit in 31 bits.
+    Result fits in 32 bits. */
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index 01049d5344..e2b8f3b3da 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -31,7 +31,7 @@
 #ifndef SKIP_CONFIG_H
 #  ifdef HAVE_CONFIG_H
-#    include "opus_config.h"
+#    include "config.h"
 #  endif
 #endif
@@ -40,7 +40,6 @@
 #include "os_support.h"
 #include "mathops.h"
 #include "stack_alloc.h"
-#include "os_support.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
   complex numbers.  It also delares the kf_ internal functions.
@@ -145,8 +144,6 @@ static void kf_bfly4(
         C_ADDTO(*Fout, scratch[1]);
         C_ADD( scratch[3] , scratch[0] , scratch[2] );
         C_SUB( scratch[4] , scratch[0] , scratch[2] );
-         Fout[m2].r = PSHR32(Fout[m2].r, 2);
-         Fout[m2].i = PSHR32(Fout[m2].i, 2);
         C_SUB( Fout[m2], *Fout, scratch[3] );
         tw1 += fstride;
         tw2 += fstride*2;
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
index c6bb4bfd45..66cf1f2126 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
@@ -128,7 +128,14 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
    f[k].r and f[k].i
 * */
 void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
-void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) ICODE_ATTR;
+#if defined(CPU_COLDFIRE)
+#define IFFT_ICODE ICODE_ATTR
+#else
+#define IFFT_ICODE
+#endif
+void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE;
 void opus_fft_free(const kiss_fft_state *cfg);
diff --git a/lib/rbcodec/codecs/libopus/celt/laplace.c b/lib/rbcodec/codecs/libopus/celt/laplace.c
index 6fa4009d57..a7bca874b6 100644
--- a/lib/rbcodec/codecs/libopus/celt/laplace.c
+++ b/lib/rbcodec/codecs/libopus/celt/laplace.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "laplace.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.c b/lib/rbcodec/codecs/libopus/celt/mathops.c
index 1af6672592..21fd942960 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.c
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.c
@@ -32,7 +32,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "mathops.h"
@@ -123,6 +123,8 @@ opus_val32 celt_sqrt(opus_val32 x)
   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
   if (x==0)
      return 0;
+   else if (x>=1073741824)
+      return 32767;
   k = (celt_ilog2(x)>>1)-7;
   x = VSHR32(x, 2*k);
   n = x-32768;
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.h b/lib/rbcodec/codecs/libopus/celt/mathops.h
index 4e97795606..44fa97c697 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.h
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.h
@@ -43,6 +43,41 @@
 unsigned isqrt32(opus_uint32 _val);
+#ifndef OVERRIDE_CELT_MAXABS16
+static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   opus_val16 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX16(maxval, x[i]);
+      minval = MIN16(minval, x[i]);
+   }
+   return MAX32(EXTEND32(maxval),-EXTEND32(minval));
+}
+#endif
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   opus_val32 minval = 0;
+   for (i=0;i<len;i++)
+   {
+      maxval = MAX32(maxval, x[i]);
+      minval = MIN32(minval, x[i]);
+   }
+   return MAX32(maxval, -minval);
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
 #ifndef FIXED_POINT
 #define PI 3.141592653f
@@ -117,27 +152,6 @@ static inline opus_int16 celt_ilog2(opus_int32 x)
 }
 #endif
-#ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val16 celt_maxabs16(opus_val16 *x, int len)
-{
-   int i;
-   opus_val16 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX16(maxval, ABS16(x[i]));
-   return maxval;
-}
-#endif
-#ifndef OVERRIDE_CELT_MAXABS32
-static inline opus_val32 celt_maxabs32(opus_val32 *x, int len)
-{
-   int i;
-   opus_val32 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX32(maxval, ABS32(x[i]));
-   return maxval;
-}
-#endif
 /** Integer log in base2. Defined for zero, but not for negative numbers */
 static inline opus_int16 celt_zlog2(opus_val32 x)
@@ -176,6 +190,13 @@ static inline opus_val16 celt_log2(opus_val32 x)
 #define D1 22804
 #define D2 14819
 #define D3 10204
+static inline opus_val32 celt_exp2_frac(opus_val16 x)
+{
+   opus_val16 frac;
+   frac = SHL16(x, 4);
+   return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
+}
 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
 static inline opus_val32 celt_exp2(opus_val16 x)
 {
@@ -186,8 +207,7 @@ static inline opus_val32 celt_exp2(opus_val16 x)
      return 0x7f000000;
   else if (integer < -15)
      return 0;
-   frac = SHL16(x-SHL16(integer,10),4);
+   frac = celt_exp2_frac(x-SHL16(integer,10));
-   frac = ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
   return VSHR32(EXTEND32(frac), -integer-2);
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 0df77fd5ec..72ea180568 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -41,7 +41,7 @@
 #ifndef SKIP_CONFIG_H
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #endif
@@ -110,12 +110,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
   int N, N2, N4;
   kiss_twiddle_scalar sine;
   VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
   SAVE_STACK;
   N = l->n;
   N >>= shift;
   N2 = N>>1;
   N4 = N>>2;
   ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
   /* sin(x) ~= x here */
 #ifdef FIXED_POINT
   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -132,7 +134,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
      kiss_fft_scalar * OPUS_RESTRICT yp = f;
      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
-      for(i=0;i<(overlap>>2);i++)
+      for(i=0;i<((overlap+3)>>2);i++)
      {
         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
@@ -144,7 +146,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
      }
      wp1 = window;
      wp2 = window+overlap-1;
-      for(;i<N4-(overlap>>2);i++)
+      for(;i<N4-((overlap+3)>>2);i++)
      {
         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
         *yp++ = *xp2;
@@ -181,12 +183,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
   }
   /* N/4 complex FFT, down-scales by 4/N */
-   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in);
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
   /* Post-rotate */
   {
      /* Temp pointers to make it really clear to the compiler what we're doing */
-      const kiss_fft_scalar * OPUS_RESTRICT fp = in;
+      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
      const kiss_twiddle_scalar *t = &l->trig[0];
@@ -208,35 +210,20 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
 }
 #endif
-#define S_F_BUF_SIZE (1920>>1) /* N = 1920 for static modes */
-static kiss_fft_scalar s_f2[S_F_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR;
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
 {
   int i;
   int N, N2, N4;
-   int tstride = 1<<shift;
   kiss_twiddle_scalar sine;
-   VARDECL(kiss_fft_scalar, f);
+/*   VARDECL(kiss_fft_scalar, f2);
-   VARDECL(kiss_fft_scalar, f2);
+   SAVE_STACK; */
-   SAVE_STACK;
   N = l->n;
   N >>= shift;
   N2 = N>>1;
   N4 = N>>2;
-   kiss_fft_scalar s_f[S_F_BUF_SIZE];
+/*   ALLOC(f2, N2, kiss_fft_scalar); */
+   kiss_fft_scalar f2[N2]; /* worst case 3840b */
-   if (S_F_BUF_SIZE >= N2)
-   {
-      f  = s_f;
-      f2 = s_f2;
-   }
-   else
-   {
-      ALLOC(f , N2, kiss_fft_scalar);
-      ALLOC(f2, N2, kiss_fft_scalar);
-   }
   /* sin(x) ~= x here */
 #ifdef FIXED_POINT
   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -250,102 +237,78 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
-      const kiss_twiddle_scalar *t0 = &l->trig[0];
+      const kiss_twiddle_scalar *t = &l->trig[0];
-      const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift];
      for(i=0;i<N4;i++)
      {
         kiss_fft_scalar yr, yi;
-         yr = -S_MUL(*xp2, *t0) + S_MUL(*xp1, *t1);
+         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
-         yi = -S_MUL(*xp2, *t1) - S_MUL(*xp1, *t0);
+         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
         /* works because the cos is nearly one */
         *yp++ = yr - S_MUL(yi,sine);
         *yp++ = yi + S_MUL(yr,sine);
         xp1+=2*stride;
         xp2-=2*stride;
-         t0 += tstride;
-         t1 -= tstride;
      }
   }
   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
-   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f);
+   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
-   /* Post-rotate */
+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
+      it in-place. */
   {
-      kiss_fft_scalar * OPUS_RESTRICT fp = f;
+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
-      const kiss_twiddle_scalar *t0 = &l->trig[0];
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
-      const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift];
+      const kiss_twiddle_scalar *t = &l->trig[0];
-      for(i=0;i<N4;i++)
+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
+         middle pair will be computed twice. */
+      for(i=0;i<(N4+1)>>1;i++)
      {
         kiss_fft_scalar re, im, yr, yi;
-         re = fp[0];
+         kiss_twiddle_scalar t0, t1;
-         im = fp[1];
+         re = yp0[0];
+         im = yp0[1];
+         t0 = t[i<<shift];
+         t1 = t[(N4-i)<<shift];
         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-         yr = S_MUL(re, *t0) - S_MUL(im, *t1);
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
-         yi = S_MUL(im, *t0) + S_MUL(re, *t1);
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
+         re = yp1[0];
+         im = yp1[1];
         /* works because the cos is nearly one */
-         *fp++ = yr - S_MUL(yi,sine);
+         yp0[0] = -(yr - S_MUL(yi,sine));
-         *fp++ = yi + S_MUL(yr,sine);
+         yp1[1] = yi + S_MUL(yr,sine);
-         t0 += tstride;
-         t1 -= tstride;
-      }
-   }
-   /* De-shuffle the components for the middle of the window only */
-   {
-      const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
-      const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
-      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
-      for(i = 0; i < N4; i++)
-      {
-         *yp++ =-*fp1;
-         *yp++ = *fp2;
-         fp1 += 2;
-         fp2 -= 2;
-      }
-   }
-   out -= (N2-overlap)>>1;
-   /* Mirror on both sides for TDAC */
-   {
-      kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
-      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
-      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
-      const opus_val16 * OPUS_RESTRICT wp1 = window;
-      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
-      i = N4-overlap/2;
+         t0 = t[(N4-i-1)<<shift];
-      xp1 -= N4-overlap/2;
+         t1 = t[(i+1)<<shift];
-      fp1 -= N4-overlap/2;
+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
-      OPUS_COPY(xp1+1, fp1+1, N4-overlap/2);
+         yr = S_MUL(re,t0) - S_MUL(im,t1);
-      for(; i < N4; i++)
+         yi = S_MUL(im,t0) + S_MUL(re,t1);
-      {
+         /* works because the cos is nearly one */
-         kiss_fft_scalar x1;
+         yp1[0] = -(yr - S_MUL(yi,sine));
-         x1 = *fp1--;
+         yp0[1] = yi + S_MUL(yr,sine);
-         *yp1++ +=-MULT16_32_Q15(*wp1, x1);
+         yp0 += 2;
-         *xp1-- += MULT16_32_Q15(*wp2, x1);
+         yp1 -= 2;
-         wp1++;
-         wp2--;
      }
   }
+   /* Mirror on both sides for TDAC */
   {
-      kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4;
+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
-      kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2;
+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
-      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
      const opus_val16 * OPUS_RESTRICT wp1 = window;
      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
-      i = N4-overlap/2;
+      for(i = 0; i < overlap/2; i++)
-      OPUS_COPY(xp2, fp2, N4-overlap/2);
-      xp2 += N4-overlap/2;
-      fp2 += N4-overlap/2;
-      for(; i < N4; i++)
      {
-         kiss_fft_scalar x2;
+         kiss_fft_scalar x1, x2;
-         x2 = *fp2++;
+         x1 = *xp1;
-         *yp2--  = MULT16_32_Q15(*wp1, x2);
+         x2 = *yp1;
-         *xp2++  = MULT16_32_Q15(*wp2, x2);
+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
         wp1++;
         wp2--;
      }
   }
-   RESTORE_STACK;
+/*   RESTORE_STACK; */
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.h b/lib/rbcodec/codecs/libopus/celt/mdct.h
index 933aafcda1..d72182138a 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.h
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.h
@@ -61,11 +61,6 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
      kiss_fft_scalar * OPUS_RESTRICT out,
      const opus_val16 *window, int overlap, int shift, int stride);
-#if defined(CPU_COLDFIRE)
-#define MDCT_ICODE ICODE_ATTR
-#else
-#define MDCT_ICODE
-#endif
 /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
    (scales implicitly by 1/2) */
 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
diff --git a/lib/rbcodec/codecs/libopus/celt/modes.c b/lib/rbcodec/codecs/libopus/celt/modes.c
index d44cb3b9de..42e68e1cb7 100644
--- a/lib/rbcodec/codecs/libopus/celt/modes.c
+++ b/lib/rbcodec/codecs/libopus/celt/modes.c
@@ -28,7 +28,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt.h"
@@ -345,6 +345,14 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
   mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
   if (mode->eBands==NULL)
      goto failure;
+#if !defined(SMALL_FOOTPRINT)
+   /* Make sure we don't allocate a band larger than our PVQ table.
+      208 should be enough, but let's be paranoid. */
+   if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+    208) {
+       goto failure;
+   }
+#endif
   mode->effEBands = mode->nbEBands;
   while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index 1b7efd945d..0d8be13025 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -32,7 +32,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "pitch.h"
@@ -77,7 +77,7 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
 #ifndef FIXED_POINT
         /* Considering the range of xcorr16, this should avoid both underflows
            and overflows (inf) when squaring xcorr16 */
-         xcorr16 *= 1e-12;
+         xcorr16 *= 1e-12f;
 #endif
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
@@ -102,13 +102,57 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
   }
 }
+static void celt_fir5(const opus_val16 *x,
+         const opus_val16 *num,
+         opus_val16 *y,
+         int N,
+         opus_val16 *mem)
+{
+   int i;
+   opus_val16 num0, num1, num2, num3, num4;
+   opus_val32 mem0, mem1, mem2, mem3, mem4;
+   num0=num[0];
+   num1=num[1];
+   num2=num[2];
+   num3=num[3];
+   num4=num[4];
+   mem0=mem[0];
+   mem1=mem[1];
+   mem2=mem[2];
+   mem3=mem[3];
+   mem4=mem[4];
+   for (i=0;i<N;i++)
+   {
+      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      sum = MAC16_16(sum,num0,mem0);
+      sum = MAC16_16(sum,num1,mem1);
+      sum = MAC16_16(sum,num2,mem2);
+      sum = MAC16_16(sum,num3,mem3);
+      sum = MAC16_16(sum,num4,mem4);
+      mem4 = mem3;
+      mem3 = mem2;
+      mem2 = mem1;
+      mem1 = mem0;
+      mem0 = x[i];
+      y[i] = ROUND16(sum, SIG_SHIFT);
+   }
+   mem[0]=mem0;
+   mem[1]=mem1;
+   mem[2]=mem2;
+   mem[3]=mem3;
+   mem[4]=mem4;
+}
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C)
 {
   int i;
   opus_val32 ac[5];
   opus_val16 tmp=Q15ONE;
-   opus_val16 lpc[4], mem[4]={0,0,0,0};
+   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
+   opus_val16 lpc2[5];
+   opus_val16 c1 = QCONST16(.8f,15);
 #ifdef FIXED_POINT
   int shift;
   opus_val32 maxabs = celt_maxabs32(x[0], len);
@@ -161,14 +205,89 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
   }
-   celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
+   /* Add a zero */
+   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
+   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
+   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
+   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
+   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
+   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
+}
-   mem[0]=0;
+#if 0 /* This is a simple version of the pitch correlation that should work
-   lpc[0]=QCONST16(.8f,12);
+         well on DSPs like Blackfin and TI C5x/C6x */
-   celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i, j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, x[j],y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
 }
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+{
+   int i,j;
+#ifdef FIXED_POINT
+   opus_val32 maxcorr=1;
+#endif
+   for (i=0;i<max_pitch-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(_x, _y+i, sum, len);
+      xcorr[i]=sum[0];
+      xcorr[i+1]=sum[1];
+      xcorr[i+2]=sum[2];
+      xcorr[i+3]=sum[3];
+#ifdef FIXED_POINT
+      sum[0] = MAX32(sum[0], sum[1]);
+      sum[2] = MAX32(sum[2], sum[3]);
+      sum[0] = MAX32(sum[0], sum[2]);
+      maxcorr = MAX32(maxcorr, sum[0]);
+#endif
+   }
+   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
+   for (;i<max_pitch;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<len;j++)
+         sum = MAC16_16(sum, _x[j],_y[i+j]);
+      xcorr[i] = sum;
+#ifdef FIXED_POINT
+      maxcorr = MAX32(maxcorr, sum);
+#endif
+   }
+#ifdef FIXED_POINT
+   return maxcorr;
+#endif
+}
+#endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                  int len, int max_pitch, int *pitch)
 {
@@ -179,8 +298,8 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   VARDECL(opus_val16, y_lp4);
   VARDECL(opus_val32, xcorr);
 #ifdef FIXED_POINT
-   opus_val32 maxcorr=1;
+   opus_val32 maxcorr;
-   opus_val16 xmax, ymax;
+   opus_val32 xmax, ymax;
   int shift=0;
 #endif
   int offset;
@@ -204,7 +323,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 #ifdef FIXED_POINT
   xmax = celt_maxabs16(x_lp4, len>>2);
   ymax = celt_maxabs16(y_lp4, lag>>2);
-   shift = celt_ilog2(MAX16(1, MAX16(xmax, ymax)))-11;
+   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
   if (shift>0)
   {
      for (j=0;j<len>>2;j++)
@@ -220,16 +339,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
   /* Coarse search with 4x decimation */
-   for (i=0;i<max_pitch>>2;i++)
-   {
-      opus_val32 sum = 0;
-      for (j=0;j<len>>2;j++)
-         sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]);
-      xcorr[i] = MAX32(-1, sum);
 #ifdef FIXED_POINT
-      maxcorr = MAX32(maxcorr, sum);
+   maxcorr =
 #endif
-   }
+   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
 #ifdef FIXED_POINT
                   , 0, maxcorr
@@ -288,11 +402,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   int k, i, T, T0;
   opus_val16 g, g0;
   opus_val16 pg;
-   opus_val32 xy,xx,yy;
+   opus_val32 xy,xx,yy,xy2;
   opus_val32 xcorr[3];
   opus_val32 best_xy, best_yy;
   int offset;
   int minperiod0;
+   VARDECL(opus_val32, yy_lookup);
+   SAVE_STACK;
   minperiod0 = minperiod;
   maxperiod /= 2;
@@ -305,13 +421,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      *T0_=maxperiod-1;
   T = T0 = *T0_;
-   xx=xy=yy=0;
+   ALLOC(yy_lookup, maxperiod+1, opus_val32);
-   for (i=0;i<N;i++)
+   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
+   yy_lookup[0] = xx;
+   yy=xx;
+   for (i=1;i<=maxperiod;i++)
   {
-      xy = MAC16_16(xy, x[i], x[i-T0]);
+      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
-      xx = MAC16_16(xx, x[i], x[i]);
+      yy_lookup[i] = MAX32(0, yy);
-      yy = MAC16_16(yy, x[i-T0],x[i-T0]);
   }
+   yy = yy_lookup[T0];
   best_xy = xy;
   best_yy = yy;
 #ifdef FIXED_POINT
@@ -332,6 +451,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int T1, T1b;
      opus_val16 g1;
      opus_val16 cont=0;
+      opus_val16 thresh;
      T1 = (2*T0+k)/(2*k);
      if (T1 < minperiod)
         break;
@@ -346,15 +466,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      {
         T1b = (2*second_check[k]*T0+k)/(2*k);
      }
-      xy=yy=0;
+      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
-      for (i=0;i<N;i++)
+      xy += xy2;
-      {
+      yy = yy_lookup[T1] + yy_lookup[T1b];
-         xy = MAC16_16(xy, x[i], x[i-T1]);
-         yy = MAC16_16(yy, x[i-T1], x[i-T1]);
-         xy = MAC16_16(xy, x[i], x[i-T1b]);
-         yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
-      }
 #ifdef FIXED_POINT
      {
         opus_val32 x2y2;
@@ -373,7 +487,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
         cont = HALF32(prev_gain);
      else
         cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
+      if (g1 > thresh)
      {
         best_xy = xy;
         best_yy = yy;
@@ -407,6 +528,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
   if (*T0_<minperiod0)
      *T0_=minperiod0;
+   RESTORE_STACK;
   return pg;
 }
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h
index 2757071a6f..caffd24bc4 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.h
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.h
@@ -36,6 +36,10 @@
 #include "modes.h"
+#if defined(__SSE__) && !defined(FIXED_POINT)
+#include "x86/pitch_sse.h"
+#endif
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C);
@@ -45,4 +49,97 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int N, int *T0, int prev_period, opus_val16 prev_gain);
+/* OPT: This is the kernel you really want to optimize. It gets used a lot
+   by the prefilter and by the PLC. */
+#ifndef OVERRIDE_XCORR_KERNEL
+static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   opus_val16 y_0, y_1, y_2, y_3;
+   y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
+   y_0=*y++;
+   y_1=*y++;
+   y_2=*y++;
+   for (j=0;j<len-3;j+=4)
+   {
+      opus_val16 tmp;
+      tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+      tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+      tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+      tmp=*x++;
+      y_2=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_3);
+      sum[1] = MAC16_16(sum[1],tmp,y_0);
+      sum[2] = MAC16_16(sum[2],tmp,y_1);
+      sum[3] = MAC16_16(sum[3],tmp,y_2);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp = *x++;
+      y_3=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_0);
+      sum[1] = MAC16_16(sum[1],tmp,y_1);
+      sum[2] = MAC16_16(sum[2],tmp,y_2);
+      sum[3] = MAC16_16(sum[3],tmp,y_3);
+   }
+   if (j++<len)
+   {
+      opus_val16 tmp=*x++;
+      y_0=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_1);
+      sum[1] = MAC16_16(sum[1],tmp,y_2);
+      sum[2] = MAC16_16(sum[2],tmp,y_3);
+      sum[3] = MAC16_16(sum[3],tmp,y_0);
+   }
+   if (j<len)
+   {
+      opus_val16 tmp=*x++;
+      y_1=*y++;
+      sum[0] = MAC16_16(sum[0],tmp,y_2);
+      sum[1] = MAC16_16(sum[1],tmp,y_3);
+      sum[2] = MAC16_16(sum[2],tmp,y_0);
+      sum[3] = MAC16_16(sum[3],tmp,y_1);
+   }
+}
+#endif /* OVERRIDE_XCORR_KERNEL */
+#ifndef OVERRIDE_DUAL_INNER_PROD
+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
+      int N, opus_val32 *xy1, opus_val32 *xy2)
+{
+   int i;
+   opus_val32 xy01=0;
+   opus_val32 xy02=0;
+   for (i=0;i<N;i++)
+   {
+      xy01 = MAC16_16(xy01, x[i], y01[i]);
+      xy02 = MAC16_16(xy02, x[i], y02[i]);
+   }
+   *xy1 = xy01;
+   *xy2 = xy02;
+}
+#endif
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/quant_bands.c b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
index 5ad5311f84..79685e17cb 100644
--- a/lib/rbcodec/codecs/libopus/celt/quant_bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "quant_bands.h"
@@ -40,8 +40,8 @@
 #include "rate.h"
 #ifdef FIXED_POINT
-/* Mean energy in each band quantized in Q6 */
+/* Mean energy in each band quantized in Q4 */
-static const signed char eMeans[25] = {
+const signed char eMeans[25] = {
      103,100, 92, 85, 81,
       77, 72, 70, 78, 75,
       73, 71, 78, 74, 69,
@@ -49,8 +49,8 @@ static const signed char eMeans[25] = {
       60, 60, 60, 60, 60
 };
 #else
-/* Mean energy in each band quantized in Q6 and converted back to float */
+/* Mean energy in each band quantized in Q4 and converted back to float */
-static const opus_val16 eMeans[25] = {
+const opus_val16 eMeans[25] = {
      6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
      4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
      4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
@@ -157,7 +157,7 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
      const opus_val16 *eBands, opus_val16 *oldEBands,
      opus_int32 budget, opus_int32 tell,
      const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
-      int C, int LM, int intra, opus_val16 max_decay)
+      int C, int LM, int intra, opus_val16 max_decay, int lfe)
 {
   int i, c;
   int badness = 0;
@@ -222,6 +222,8 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
            if (bits_left < 16)
               qi = IMAX(-1, qi);
         }
+         if (lfe && i>=2)
+            qi = IMIN(qi, 0);
         if (budget-tell >= 15)
         {
            int pi;
@@ -253,13 +255,13 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
         prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
      } while (++c < C);
   }
-   return badness;
+   return lfe ? 0 : badness;
 }
 void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
      opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
-      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate)
+      int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
 {
   int intra;
   opus_val16 max_decay;
@@ -280,15 +282,17 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
   if (tell+3 > budget)
      two_pass = intra = 0;
-   /* Encode the global flags using a simple probability model
+   max_decay = QCONST16(16.f,DB_SHIFT);
-      (first symbols in the stream) */
+   if (end-start>10)
+   {
 #ifdef FIXED_POINT
-      max_decay = MIN32(QCONST16(16.f,DB_SHIFT), SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
+      max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
 #else
-   max_decay = MIN32(16.f, .125f*nbAvailableBytes);
+      max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
 #endif
+   }
+   if (lfe)
+      max_decay=3;
   enc_start_state = *enc;
   ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
@@ -298,7 +302,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
   if (two_pass || intra)
   {
      badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget,
-            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay);
+            tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe);
   }
   if (!intra)
@@ -325,7 +329,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      *enc = enc_start_state;
      badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget,
-            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay);
+            tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
      if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra)))
      {
@@ -532,25 +536,6 @@ void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *
   }
 }
-void log2Amp(const CELTMode *m, int start, int end,
-      celt_ener *eBands, const opus_val16 *oldEBands, int C)
-{
-   int c, i;
-   c=0;
-   do {
-      for (i=0;i<start;i++)
-         eBands[i+c*m->nbEBands] = 0;
-      for (;i<end;i++)
-      {
-         opus_val16 lg = ADD16(oldEBands[i+c*m->nbEBands],
-                         SHL16((opus_val16)eMeans[i],6));
-         eBands[i+c*m->nbEBands] = PSHR32(celt_exp2(lg),4);
-      }
-      for (;i<m->nbEBands;i++)
-         eBands[i+c*m->nbEBands] = 0;
-   } while (++c < C);
-}
 void amp2Log2(const CELTMode *m, int effEnd, int end,
      celt_ener *bandE, opus_val16 *bandLogE, int C)
 {
diff --git a/lib/rbcodec/codecs/libopus/celt/quant_bands.h b/lib/rbcodec/codecs/libopus/celt/quant_bands.h
index bec2855cf0..0490bca4b4 100644
--- a/lib/rbcodec/codecs/libopus/celt/quant_bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/quant_bands.h
@@ -35,6 +35,12 @@
 #include "entdec.h"
 #include "mathops.h"
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
 void amp2Log2(const CELTMode *m, int effEnd, int end,
      celt_ener *bandE, opus_val16 *bandLogE, int C);
@@ -45,7 +51,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
      const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
      opus_val16 *error, ec_enc *enc, int C, int LM,
      int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
-      int two_pass, int loss_rate);
+      int two_pass, int loss_rate, int lfe);
 void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.c b/lib/rbcodec/codecs/libopus/celt/rate.c
index 3b056d8dc7..e474cf5004 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.c
+++ b/lib/rbcodec/codecs/libopus/celt/rate.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include <math.h>
@@ -248,7 +248,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
 static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start,
      const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance,
      int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits,
-      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev)
+      int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
 {
   opus_int32 psum;
   int lo, hi;
@@ -353,7 +353,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int
 #ifdef FUZZING
            if ((rand()&0x1) == 0)
 #else
-            if (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4)
+            if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
 #endif
            {
               ec_enc_bit_logp(ec, 1, 1);
@@ -524,7 +524,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int
 }
 int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
-      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev)
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
 {
   int lo, hi, len, j;
   int codedBands;
@@ -631,7 +631,7 @@ int compute_allocation(const CELTMode *m, int start, int end, const int *offsets
   }
   codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap,
         total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv,
-         pulses, ebits, fine_priority, C, LM, ec, encode, prev);
+         pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth);
   RESTORE_STACK;
   return codedBands;
 }
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.h b/lib/rbcodec/codecs/libopus/celt/rate.h
index e0d5022326..263fde9820 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.h
+++ b/lib/rbcodec/codecs/libopus/celt/rate.h
@@ -96,6 +96,6 @@ static inline int pulses2bits(const CELTMode *m, int band, int LM, int pulses)
 @return Total number of bits allocated
 */
 int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
-      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev);
+      opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
 #endif
diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
index a6f06d2263..1c093a8cdc 100644
--- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
+++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h
@@ -146,4 +146,26 @@ extern char *global_stack_top;
 #endif /* VAR_ARRAYS */
+#ifdef ENABLE_VALGRIND
+#include <valgrind/memcheck.h>
+#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr))
+#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value)
+#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0)
+#define OPUS_FPRINTF fprintf
+#else
+static inline int _opus_false(void) {return 0;}
+#define OPUS_CHECK_ARRAY(ptr, len) _opus_false()
+#define OPUS_CHECK_VALUE(value) _opus_false()
+#define OPUS_PRINT_INT(value) do{}while(0)
+#define OPUS_FPRINTF (void)
+#endif
 #endif /* STACK_ALLOC_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index f6b6e4fc64..af991bb052 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -27,7 +27,7 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "mathops.h"
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.h b/lib/rbcodec/codecs/libopus/celt/vq.h
index 1ceeeeb268..ffdc69cdc4 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.h
+++ b/lib/rbcodec/codecs/libopus/celt/vq.h
@@ -40,11 +40,9 @@
 /** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
  * the pitch and a combination of pulses such that its norm is still equal
  * to 1. This is the function that will typically require the most CPU.
- * @param x Residual signal to quantise/encode (returns quantised version)
+ * @param X Residual signal to quantise/encode (returns quantised version)
- * @param W Perceptual weight to use when optimising (currently unused)
 * @param N Number of samples to encode
 * @param K Number of pulses to use
- * @param p Pitch vector (it is assumed that p+x is a unit vector)
 * @param enc Entropy encoder state
 * @ret A mask indicating which blocks in the band received pulses
 */
@@ -56,10 +54,9 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
      );
 /** Algebraic pulse decoder
- * @param x Decoded normalised spectrum (returned)
+ * @param X Decoded normalised spectrum (returned)
 * @param N Number of samples to decode
 * @param K Number of pulses to use
- * @param p Pitch vector (automatically added to x)
 * @param dec Entropy decoder state
 * @ret A mask indicating which blocks in the band received pulses
 */
author	Nils Wallménius <nils@rockbox.org>	2013-05-20 22:25:57 +0200
committer	Nils Wallménius <nils@rockbox.org>	2013-08-31 08:30:51 +0200
commit	580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree	807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt
parent	74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download	rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip