From 9b7ec42403073ee887efc531c153e6b1b6c15bab Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Sun, 19 Jan 2014 16:31:59 +0100 Subject: Sync to upstream libopus Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58 --- lib/rbcodec/codecs/libopus/silk/dec_API.c | 37 ++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) (limited to 'lib/rbcodec/codecs/libopus/silk/dec_API.c') diff --git a/lib/rbcodec/codecs/libopus/silk/dec_API.c b/lib/rbcodec/codecs/libopus/silk/dec_API.c index 4cbcf71514..1087c6726a 100644 --- a/lib/rbcodec/codecs/libopus/silk/dec_API.c +++ b/lib/rbcodec/codecs/libopus/silk/dec_API.c @@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" #include "stack_alloc.h" +#include "os_support.h" /************************/ /* Decoder Super Struct */ @@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage ); + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); VARDECL( opus_int16, samplesOut2_tmp ); opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int16 *resample_out_ptr; @@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co silk_decoder_state *channel_state = psDec->channel_state; opus_int has_side; opus_int stereo_to_mono; + int delay_stack_alloc; SAVE_STACK; silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); @@ -196,7 +199,7 @@ opus_int silk_Decode( /* O Returns error co for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { for( n = 0; n < decControl->nChannelsInternal; n++ ) { if( channel_state[ n ].LBRR_flags[ i ] ) { - opus_int pulses[ MAX_FRAME_LENGTH ]; + opus_int16 pulses[ MAX_FRAME_LENGTH ]; opus_int condCoding; if( decControl->nChannelsInternal == 2 && n == 0 ) { @@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co psDec->channel_state[ 1 ].first_frame_after_reset = 1; } - ALLOC( samplesOut1_tmp_storage, - decControl->nChannelsInternal*( - channel_state[ 0 ].frame_length + 2 ), + /* Check if the temp buffer fits into the output PCM buffer. If it fits, + we can delay allocating the temp buffer until after the SILK peak stack + usage. We need to use a < and not a <= because of the two extra samples. */ + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal + < decControl->API_sampleRate*decControl->nChannelsAPI; + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), opus_int16 ); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage - + channel_state[ 0 ].frame_length + 2; + if ( delay_stack_alloc ) + { + samplesOut1_tmp[ 0 ] = samplesOut; + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; + } else { + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; + } if( lostFlag == FLAG_DECODE_NORMAL ) { has_side = !decode_only_middle; @@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co resample_out_ptr = samplesOut; } + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) + : ALLOC_NONE, + opus_int16 ); + if ( delay_stack_alloc ) { + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; + } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { /* Resample decoded signal to API_sampleRate */ -- cgit v1.2.3