From 9b7ec42403073ee887efc531c153e6b1b6c15bab Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils@rockbox.org>
Date: Sun, 19 Jan 2014 16:31:59 +0100
Subject: Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c

This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.

This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.

Speeds up decoding of the following test files:

                 H300 (cf)   C200 (arm7tdmi)  ipod classic (arm9e)
16 kbps (silk)   14.28 MHz   4.00 MHz         2.61 MHz
64 kbps (celt)   4.09 MHz    8.08 MHz         6.24 MHz
128 kbps (celt)  1.93 MHz    8.83 MHz         6.53 MHz

Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
---
 lib/rbcodec/codecs/libopus/silk/CNG.c              | 21 ++++++---
 lib/rbcodec/codecs/libopus/silk/PLC.c              | 53 +++++++++++++++-------
 lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h      |  5 ++
 lib/rbcodec/codecs/libopus/silk/code_signs.c       |  4 +-
 lib/rbcodec/codecs/libopus/silk/dec_API.c          | 37 +++++++++++----
 lib/rbcodec/codecs/libopus/silk/decode_core.c      |  7 ++-
 lib/rbcodec/codecs/libopus/silk/decode_frame.c     | 16 +++----
 lib/rbcodec/codecs/libopus/silk/decode_pulses.c    |  6 +--
 lib/rbcodec/codecs/libopus/silk/macros.h           |  9 +++-
 lib/rbcodec/codecs/libopus/silk/main.h             |  8 ++--
 .../libopus/silk/resampler_private_IIR_FIR.c       | 11 ++---
 lib/rbcodec/codecs/libopus/silk/shell_coder.c      |  8 ++--
 lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c    |  1 +
 13 files changed, 122 insertions(+), 64 deletions(-)

(limited to 'lib/rbcodec/codecs/libopus/silk')

diff --git a/lib/rbcodec/codecs/libopus/silk/CNG.c b/lib/rbcodec/codecs/libopus/silk/CNG.c
index 8481d95dbe..bb30a7ccf2 100644
--- a/lib/rbcodec/codecs/libopus/silk/CNG.c
+++ b/lib/rbcodec/codecs/libopus/silk/CNG.c
@@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
+    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
     opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
@@ -55,7 +55,7 @@ static OPUS_INLINE void silk_CNG_exc(
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
     }
     *rand_seed = seed;
 }
@@ -85,7 +85,7 @@ void silk_CNG(
 )
 {
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16;
+    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
@@ -125,11 +125,20 @@ void silk_CNG(
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
         VARDECL( opus_int32, CNG_sig_Q10 );
-
         ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
 
         /* Generate CNG excitation */
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+		gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
+		if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
+			gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
+			gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+			gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
+		} else {
+			gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
+			gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+			gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
+		}
+        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
@@ -162,7 +171,7 @@ void silk_CNG(
             /* Update states */
             CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
 
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
         }
         silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
diff --git a/lib/rbcodec/codecs/libopus/silk/PLC.c b/lib/rbcodec/codecs/libopus/silk/PLC.c
index 01f40014c4..8b0a8fe57d 100644
--- a/lib/rbcodec/codecs/libopus/silk/PLC.c
+++ b/lib/rbcodec/codecs/libopus/silk/PLC.c
@@ -165,6 +165,30 @@ static OPUS_INLINE void silk_PLC_update(
     psPLC->nb_subfr = psDec->nb_subfr;
 }
 
+static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2,
+      const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr)
+{
+    int i, k;
+    VARDECL( opus_int16, exc_buf );
+    opus_int16 *exc_buf_ptr;
+    SAVE_STACK;
+    ALLOC( exc_buf, 2*subfr_length, opus_int16 );
+    /* Find random noise component */
+    /* Scale previous excitation signal */
+    exc_buf_ptr = exc_buf;
+    for( k = 0; k < 2; k++ ) {
+        for( i = 0; i < subfr_length; i++ ) {
+            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
+                silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) );
+        }
+        exc_buf_ptr += subfr_length;
+    }
+    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
+    silk_sum_sqr_shift( energy1, shift1, exc_buf,                  subfr_length );
+    silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length );
+    RESTORE_STACK;
+}
+
 static OPUS_INLINE void silk_PLC_conceal(
     silk_decoder_state                  *psDec,             /* I/O Decoder state        */
     silk_decoder_control                *psDecCtrl,         /* I/O Decoder control      */
@@ -177,19 +201,26 @@ static OPUS_INLINE void silk_PLC_conceal(
     opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr;
     opus_int32 LPC_pred_Q10, LTP_pred_Q12;
     opus_int16 rand_scale_Q14;
-    opus_int16 *B_Q14, *exc_buf_ptr;
+    opus_int16 *B_Q14;
     opus_int32 *sLPC_Q14_ptr;
-    VARDECL( opus_int16, exc_buf );
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
+#ifdef SMALL_FOOTPRINT
+    opus_int16 *sLTP;
+#else
     VARDECL( opus_int16, sLTP );
+#endif
     VARDECL( opus_int32, sLTP_Q14 );
     silk_PLC_struct *psPLC = &psDec->sPLC;
     opus_int32 prevGain_Q10[2];
     SAVE_STACK;
 
-    ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
-    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
     ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
+#ifdef SMALL_FOOTPRINT
+    /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */
+    sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length;
+#else
+    ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
+#endif
 
     prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
     prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@@ -198,19 +229,7 @@ static OPUS_INLINE void silk_PLC_conceal(
        silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) );
     }
 
-    /* Find random noise component */
-    /* Scale previous excitation signal */
-    exc_buf_ptr = exc_buf;
-    for( k = 0; k < 2; k++ ) {
-        for( i = 0; i < psPLC->subfr_length; i++ ) {
-            exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT(
-                silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) );
-        }
-        exc_buf_ptr += psPLC->subfr_length;
-    }
-    /* Find the subframe with lowest energy of the last two and use that as random noise generator */
-    silk_sum_sqr_shift( &energy1, &shift1, exc_buf,                         psPLC->subfr_length );
-    silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length );
+    silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr);
 
     if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) {
         /* First sub-frame has lowest energy */
diff --git a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
index 1b58057910..4be0985435 100644
--- a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
+++ b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h
@@ -587,6 +587,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 #include "arm/SigProc_FIX_armv5e.h"
 #endif
 
+#if defined(MIPSr1_ASM)
+#include "mips/sigproc_fix_mipsr1.h"
+#endif
+
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/rbcodec/codecs/libopus/silk/code_signs.c b/lib/rbcodec/codecs/libopus/silk/code_signs.c
index 561043c739..6ac25cb389 100644
--- a/lib/rbcodec/codecs/libopus/silk/code_signs.c
+++ b/lib/rbcodec/codecs/libopus/silk/code_signs.c
@@ -76,7 +76,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -85,7 +85,7 @@ void silk_decode_signs(
 {
     opus_int         i, j, p;
     opus_uint8       icdf[ 2 ];
-    opus_int         *q_ptr;
+    opus_int16       *q_ptr;
     const opus_uint8 *icdf_ptr;
 
     icdf[ 1 ] = 0;
diff --git a/lib/rbcodec/codecs/libopus/silk/dec_API.c b/lib/rbcodec/codecs/libopus/silk/dec_API.c
index 4cbcf71514..1087c6726a 100644
--- a/lib/rbcodec/codecs/libopus/silk/dec_API.c
+++ b/lib/rbcodec/codecs/libopus/silk/dec_API.c
@@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "API.h"
 #include "main.h"
 #include "stack_alloc.h"
+#include "os_support.h"
 
 /************************/
 /* Decoder Super Struct */
@@ -90,7 +91,8 @@ opus_int silk_Decode(                                   /* O    Returns error co
     opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
     opus_int32 nSamplesOutDec, LBRR_symbol;
     opus_int16 *samplesOut1_tmp[ 2 ];
-    VARDECL( opus_int16, samplesOut1_tmp_storage );
+    VARDECL( opus_int16, samplesOut1_tmp_storage1 );
+    VARDECL( opus_int16, samplesOut1_tmp_storage2 );
     VARDECL( opus_int16, samplesOut2_tmp );
     opus_int32 MS_pred_Q13[ 2 ] = { 0 };
     opus_int16 *resample_out_ptr;
@@ -98,6 +100,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
     silk_decoder_state *channel_state = psDec->channel_state;
     opus_int has_side;
     opus_int stereo_to_mono;
+    int delay_stack_alloc;
     SAVE_STACK;
 
     silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@@ -196,7 +199,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
                     if( channel_state[ n ].LBRR_flags[ i ] ) {
-                        opus_int pulses[ MAX_FRAME_LENGTH ];
+                        opus_int16 pulses[ MAX_FRAME_LENGTH ];
                         opus_int condCoding;
 
                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
@@ -251,13 +254,22 @@ opus_int silk_Decode(                                   /* O    Returns error co
         psDec->channel_state[ 1 ].first_frame_after_reset = 1;
     }
 
-    ALLOC( samplesOut1_tmp_storage,
-           decControl->nChannelsInternal*(
-               channel_state[ 0 ].frame_length + 2 ),
+    /* Check if the temp buffer fits into the output PCM buffer. If it fits,
+       we can delay allocating the temp buffer until after the SILK peak stack
+       usage. We need to use a < and not a <= because of the two extra samples. */
+    delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
+          < decControl->API_sampleRate*decControl->nChannelsAPI;
+    ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
+           : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
            opus_int16 );
-    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
-    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
-                           + channel_state[ 0 ].frame_length + 2;
+    if ( delay_stack_alloc )
+    {
+       samplesOut1_tmp[ 0 ] = samplesOut;
+       samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
+    } else {
+       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
+       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
+    }
 
     if( lostFlag == FLAG_DECODE_NORMAL ) {
         has_side = !decode_only_middle;
@@ -312,6 +324,15 @@ opus_int silk_Decode(                                   /* O    Returns error co
         resample_out_ptr = samplesOut;
     }
 
+    ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
+           ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
+           : ALLOC_NONE,
+           opus_int16 );
+    if ( delay_stack_alloc ) {
+       OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
+       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
+       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
+    }
     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
 
         /* Resample decoded signal to API_sampleRate */
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_core.c b/lib/rbcodec/codecs/libopus/silk/decode_core.c
index 87fbd5de9f..af68b75da9 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_core.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_core.c
@@ -39,7 +39,7 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
 )
 {
     opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
@@ -49,7 +49,7 @@ void silk_decode_core(
     opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
     opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
     VARDECL( opus_int32, res_Q14 );
-/*    VARDECL( opus_int32, sLPC_Q14 ); */
+    VARDECL( opus_int32, sLPC_Q14 );
     SAVE_STACK;
 
     silk_assert( psDec->prev_gain_Q16 != 0 );
@@ -57,8 +57,7 @@ void silk_decode_core(
     ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
     ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
     ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
-/*    ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); */
-    opus_int32 sLPC_Q14[psDec->subfr_length + MAX_LPC_ORDER]; /* worst case is 80 + 16 */
+    ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
 
     offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
 
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_frame.c b/lib/rbcodec/codecs/libopus/silk/decode_frame.c
index abc00a3d54..6a7cffbbe0 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_frame.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_frame.c
@@ -47,13 +47,10 @@ opus_int silk_decode_frame(
 {
     VARDECL( silk_decoder_control, psDecCtrl );
     opus_int         L, mv_len, ret = 0;
-    VARDECL( opus_int, pulses );
     SAVE_STACK;
 
     L = psDec->frame_length;
     ALLOC( psDecCtrl, 1, silk_decoder_control );
-    ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
-                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
     psDecCtrl->LTP_scale_Q14 = 0;
 
     /* Safety checks */
@@ -62,6 +59,9 @@ opus_int silk_decode_frame(
     if(   lostFlag == FLAG_DECODE_NORMAL ||
         ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
     {
+        VARDECL( opus_int16, pulses );
+        ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
+                       ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
         /*********************************************/
         /* Decode quantization indices of side info  */
         /*********************************************/
@@ -107,16 +107,16 @@ opus_int silk_decode_frame(
     silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
     silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
 
-    /****************************************************************/
-    /* Ensure smooth connection of extrapolated and good frames     */
-    /****************************************************************/
-    silk_PLC_glue_frames( psDec, pOut, L );
-
     /************************************************/
     /* Comfort noise generation / estimation        */
     /************************************************/
     silk_CNG( psDec, psDecCtrl, pOut, L );
 
+    /****************************************************************/
+    /* Ensure smooth connection of extrapolated and good frames     */
+    /****************************************************************/
+    silk_PLC_glue_frames( psDec, pOut, L );
+
     /* Update some decoder state variables */
     psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
 
diff --git a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
index e8a87c2ab7..1e14bc37b4 100644
--- a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
+++ b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c
@@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /*********************************************/
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
@@ -44,7 +44,7 @@ void silk_decode_pulses(
 {
     opus_int   i, j, k, iter, abs_q, nLS, RateLevelIndex;
     opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
-    opus_int   *pulses_ptr;
+    opus_int16 *pulses_ptr;
     const opus_uint8 *cdf_ptr;
 
     /*********************/
@@ -84,7 +84,7 @@ void silk_decode_pulses(
         if( sum_pulses[ i ] > 0 ) {
             silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
         } else {
-            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) );
         }
     }
 
diff --git a/lib/rbcodec/codecs/libopus/silk/macros.h b/lib/rbcodec/codecs/libopus/silk/macros.h
index 482dc3c6eb..05623b5df8 100644
--- a/lib/rbcodec/codecs/libopus/silk/macros.h
+++ b/lib/rbcodec/codecs/libopus/silk/macros.h
@@ -79,17 +79,24 @@ POSSIBILITY OF SUCH DAMAGE.
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
-#include "ecintrin.h"
+#if defined(MIPSr1_ASM)
+#include "mips/macros_mipsr1.h"
+#endif
 
+#include "ecintrin.h"
+#ifndef OVERRIDE_silk_CLZ16
 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
     return 32 - EC_ILOG(in16<<16|0x8000);
 }
+#endif
 
+#ifndef OVERRIDE_silk_CLZ32
 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 {
     return in32 ? 32 - EC_ILOG(in32) : 32;
 }
+#endif
 
 /* Row based */
 #define matrix_ptr(Matrix_base_adr, row, column, N) \
diff --git a/lib/rbcodec/codecs/libopus/silk/main.h b/lib/rbcodec/codecs/libopus/silk/main.h
index 2bdf89784d..77524f5b57 100644
--- a/lib/rbcodec/codecs/libopus/silk/main.h
+++ b/lib/rbcodec/codecs/libopus/silk/main.h
@@ -116,7 +116,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -161,7 +161,7 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 );
@@ -397,13 +397,13 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
 );
 
 /* Decode quantization indices of excitation (Shell coding) */
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
diff --git a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
index c7b4f6ed5e..6b2b3a2e18 100644
--- a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
+++ b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c
@@ -72,13 +72,10 @@ void silk_resampler_private_IIR_FIR(
     silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS;
     opus_int32 nSamplesIn;
     opus_int32 max_index_Q16, index_increment_Q16;
-/*    VARDECL( opus_int16, buf );
-    SAVE_STACK; */
+    VARDECL( opus_int16, buf );
+    SAVE_STACK;
 
-/*    ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); */
-
-    /* worst case = 2*16*10+8 = 328 * 2 = 656bytes */
-    opus_int16 buf[2 * S->batchSize + RESAMPLER_ORDER_FIR_12];
+    ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 );
 
     /* Copy buffered samples to start of buffer */
     silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
@@ -106,5 +103,5 @@ void silk_resampler_private_IIR_FIR(
 
     /* Copy last part of filtered signal to the state for the next call */
     silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) );
-/*    RESTORE_STACK; */
+    RESTORE_STACK;
 }
diff --git a/lib/rbcodec/codecs/libopus/silk/shell_coder.c b/lib/rbcodec/codecs/libopus/silk/shell_coder.c
index 9d6e1bb366..d80dd51f9e 100644
--- a/lib/rbcodec/codecs/libopus/silk/shell_coder.c
+++ b/lib/rbcodec/codecs/libopus/silk/shell_coder.c
@@ -60,8 +60,8 @@ static OPUS_INLINE void encode_split(
 #endif
 
 static OPUS_INLINE void decode_split(
-    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
-    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
+    opus_int16                  *p_child1,      /* O    pulse amplitude of first child subframe     */
+    opus_int16                  *p_child2,      /* O    pulse amplitude of second child subframe    */
     ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
     const opus_int              p,              /* I    pulse amplitude of current subframe         */
     const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
@@ -121,12 +121,12 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 )
 {
-    opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+    opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
 
     /* this function operates on one shell code frame of 16 pulses */
     silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
diff --git a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
index 12514c9917..129df191d8 100644
--- a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
+++ b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c
@@ -53,6 +53,7 @@ void silk_sum_sqr_shift(
             /* Scale down */
             nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 );
             shft = 2;
+            i+=2;
             break;
         }
     }
-- 
cgit v1.2.3