From 580b307fd791c0997a8831bc800bba87797bfb7e Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 20 May 2013 22:25:57 +0200 Subject: Sync opus codec to upstream git Sync opus codec to upstream commit 02fed471a4568852d6618e041c4f2af0d7730ee2 (August 30 2013) This brings in a lot of optimizations but also makes the diff between our codec and the upstream much smaller as most of our optimizations have been upstreamed or supeceded. Speedups across the board for CELT mode files: 64kbps 128kbps H300 9.82MHz 15.48MHz c200 4.86MHz 9.63MHz fuze v1 10.32MHz 15.92MHz For the silk mode test file (16kbps) arm targets get a speedup of about 2MHz while the H300 is 7.8MHz slower, likely because it's now using the pseudostack more rather than the real stack which is in iram. Patches to get around that are upcomming. Change-Id: Ifecf963e461c51ac42e09dac1e91bc4bc3b12fa3 --- lib/rbcodec/codecs/libopus/opus_decoder.c | 259 ++++++++++++++++++++++-------- 1 file changed, 191 insertions(+), 68 deletions(-) (limited to 'lib/rbcodec/codecs/libopus/opus_decoder.c') diff --git a/lib/rbcodec/codecs/libopus/opus_decoder.c b/lib/rbcodec/codecs/libopus/opus_decoder.c index 3d42bb2e70..b430a4df07 100644 --- a/lib/rbcodec/codecs/libopus/opus_decoder.c +++ b/lib/rbcodec/codecs/libopus/opus_decoder.c @@ -26,11 +26,11 @@ */ #ifdef HAVE_CONFIG_H -#include "opus_config.h" +#include "config.h" #endif #ifndef OPUS_BUILD -#error "OPUS_BUILD _MUST_ be defined to build Opus and you probably want a decent config.h, see README for more details." +#error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details." #endif #include @@ -46,6 +46,7 @@ #include "structs.h" #include "define.h" #include "mathops.h" +#include "cpu_support.h" struct OpusDecoder { int celt_dec_offset; @@ -64,8 +65,13 @@ struct OpusDecoder { int prev_mode; int frame_size; int prev_redundancy; + int last_packet_duration; +#ifndef FIXED_POINT + opus_val16 softclip_mem[2]; +#endif opus_uint32 rangeFinal; + int arch; }; #ifdef FIXED_POINT @@ -115,6 +121,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) st->Fs = Fs; st->DecControl.API_sampleRate = st->Fs; st->DecControl.nChannelsAPI = st->channels; + st->arch = opus_select_arch(); /* Reset decoder */ ret = silk_InitDecoder( silk_dec ); @@ -131,7 +138,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) return OPUS_OK; } -#define STATIC_DECODER_SIZE 26532 /* 26486 for 32bit, 26532 for 64bit environment */ +#define STATIC_DECODER_SIZE 26540 /* 26540 for 64bit environment */ static char s_dec[STATIC_DECODER_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error) @@ -208,8 +215,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int i, silk_ret=0, celt_ret=0; ec_dec dec; opus_int32 silk_frame_size; + int pcm_silk_size; VARDECL(opus_int16, pcm_silk); - VARDECL(opus_val16, pcm_transition); + int pcm_transition_silk_size; + VARDECL(opus_val16, pcm_transition_silk); + int pcm_transition_celt_size; + VARDECL(opus_val16, pcm_transition_celt); + opus_val16 *pcm_transition = NULL; /* Silence false positive "may be used uninitialized" warning */ + int redundant_audio_size; VARDECL(opus_val16, redundant_audio); int audiosize; @@ -265,33 +278,30 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } - /* For CELT/hybrid PLC of more than 20 ms, do multiple calls */ - if (data==NULL && frame_size > F20 && mode != MODE_SILK_ONLY) - { - int nb_samples = 0; - do { - int ret = opus_decode_frame(st, NULL, 0, pcm, F20, 0); - if (ret != F20) - { - RESTORE_STACK; - return OPUS_INTERNAL_ERROR; - } - pcm += F20*st->channels; - nb_samples += F20; - } while (nb_samples < frame_size); - RESTORE_STACK; - return frame_size; - } - ALLOC(pcm_transition, F5*st->channels, opus_val16); + /* For CELT/hybrid PLC of more than 20 ms, opus_decode_native() will do + multiple calls */ + if (data==NULL && mode != MODE_SILK_ONLY) + frame_size = IMIN(frame_size, F20); + pcm_transition_silk_size = 0; + pcm_transition_celt_size = 0; if (data!=NULL && st->prev_mode > 0 && ( (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) ) { transition = 1; + /* Decide where to allocate the stack memory for pcm_transition */ if (mode == MODE_CELT_ONLY) - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + pcm_transition_celt_size = F5*st->channels; + else + pcm_transition_silk_size = F5*st->channels; + } + ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); + if (transition && mode == MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_celt; + opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); } if (audiosize > frame_size) { @@ -302,8 +312,9 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, frame_size = audiosize; } - ALLOC(pcm_silk, IMAX(F10, frame_size)*st->channels, opus_int16); - ALLOC(redundant_audio, F5*st->channels, opus_val16); + /* Don't allocate any memory when in CELT-only mode */ + pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : 0; + ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) @@ -417,10 +428,22 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } if (redundancy) + { transition = 0; + pcm_transition_silk_size=0; + } + + ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); if (transition && mode != MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_silk; opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + } + + /* Only allocation memory for redundancy if/when needed */ + redundant_audio_size = redundancy ? F5*st->channels : 0; + ALLOC(redundant_audio, redundant_audio_size, opus_val16); /* 5 ms redundant frame for CELT->SILK*/ if (redundancy && celt_to_silk) @@ -534,12 +557,19 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, st->prev_mode = mode; st->prev_redundancy = redundancy && !celt_to_silk; + + if (celt_ret>=0) + { + if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels)) + OPUS_PRINT_INT(audiosize); + } + RESTORE_STACK; return celt_ret < 0 ? celt_ret : audiosize; } -static int parse_size(const unsigned char *data, opus_int32 len, short *size) +static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size) { if (len<1) { @@ -561,14 +591,14 @@ static int parse_size(const unsigned char *data, opus_int32 len, short *size) static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, int self_delimited, unsigned char *out_toc, - const unsigned char *frames[48], short size[48], int *payload_offset) + const unsigned char *frames[48], opus_int16 size[48], int *payload_offset) { int i, bytes; int count; int cbr; unsigned char ch, toc; int framesize; - int last_size; + opus_int32 last_size; const unsigned char *data0 = data; if (size==NULL) @@ -594,7 +624,9 @@ static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, { if (len&0x1) return OPUS_INVALID_PACKET; - size[0] = last_size = len/2; + last_size = len/2; + /* If last_size doesn't fit in size[0], we'll catch it later */ + size[0] = (opus_int16)last_size; } break; /* Two VBR frames */ @@ -620,16 +652,14 @@ static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, /* Padding flag is bit 6 */ if (ch&0x40) { - int padding=0; int p; do { if (len<=0) return OPUS_INVALID_PACKET; p = *data++; len--; - padding += p==255 ? 254: p; + len -= p==255 ? 254: p; } while (p==255); - len -= padding; } if (len<0) return OPUS_INVALID_PACKET; @@ -657,7 +687,7 @@ static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, if (last_size*count!=len) return OPUS_INVALID_PACKET; for (i=0;i 1275) return OPUS_INVALID_PACKET; - size[count-1] = last_size; + size[count-1] = (opus_int16)last_size; } if (frames) @@ -701,14 +731,14 @@ static int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, *out_toc = toc; if (payload_offset) - *payload_offset = data-data0; + *payload_offset = (int)(data-data0); return count; } int opus_packet_parse(const unsigned char *data, opus_int32 len, unsigned char *out_toc, const unsigned char *frames[48], - short size[48], int *payload_offset) + opus_int16 size[48], int *payload_offset) { return opus_packet_parse_impl(data, len, 0, out_toc, frames, size, payload_offset); @@ -716,50 +746,120 @@ int opus_packet_parse(const unsigned char *data, opus_int32 len, int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, - int self_delimited, int *packet_offset) + int self_delimited, int *packet_offset, int soft_clip) { int i, nb_samples; int count, offset; unsigned char toc; int tot_offset; + int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels; /* 48 x 2.5 ms = 120 ms */ - short size[48]; + opus_int16 size[48]; if (decode_fec<0 || decode_fec>1) return OPUS_BAD_ARG; + /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */ + if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0) + return OPUS_BAD_ARG; if (len==0 || data==NULL) - return opus_decode_frame(st, NULL, 0, pcm, frame_size, 0); - else if (len<0) + { + int pcm_count=0; + do { + int ret; + ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0); + if (ret<0) + return ret; + pcm_count += ret; + } while (pcm_count < frame_size); + celt_assert(pcm_count == frame_size); + if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels)) + OPUS_PRINT_INT(pcm_count); + st->last_packet_duration = pcm_count; + return pcm_count; + } else if (len<0) return OPUS_BAD_ARG; - tot_offset = 0; - st->mode = opus_packet_get_mode(data); - st->bandwidth = opus_packet_get_bandwidth(data); - st->frame_size = opus_packet_get_samples_per_frame(data, st->Fs); - st->stream_channels = opus_packet_get_nb_channels(data); + packet_mode = opus_packet_get_mode(data); + packet_bandwidth = opus_packet_get_bandwidth(data); + packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs); + packet_stream_channels = opus_packet_get_nb_channels(data); count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, size, &offset); + + data += offset; + + if (decode_fec) + { + int duration_copy; + int ret; + /* If no FEC can be present, run the PLC (recursive call) */ + if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY) + return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip); + /* Otherwise, run the PLC on everything except the size for which we might have FEC */ + duration_copy = st->last_packet_duration; + if (frame_size-packet_frame_size!=0) + { + ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip); + if (ret<0) + { + st->last_packet_duration = duration_copy; + return ret; + } + celt_assert(ret==frame_size-packet_frame_size); + } + /* Complete with FEC */ + st->mode = packet_mode; + st->bandwidth = packet_bandwidth; + st->frame_size = packet_frame_size; + st->stream_channels = packet_stream_channels; + ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size), + packet_frame_size, 1); + if (ret<0) + return ret; + else { + if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels)) + OPUS_PRINT_INT(frame_size); + st->last_packet_duration = frame_size; + return frame_size; + } + } + tot_offset = 0; if (count < 0) return count; - data += offset; tot_offset += offset; - if (count*st->frame_size > frame_size) + if (count*packet_frame_size > frame_size) return OPUS_BUFFER_TOO_SMALL; + + /* Update the state as the last step to avoid updating it on an invalid packet */ + st->mode = packet_mode; + st->bandwidth = packet_bandwidth; + st->frame_size = packet_frame_size; + st->stream_channels = packet_stream_channels; + nb_samples=0; for (i=0;ichannels, frame_size-nb_samples, 0); if (ret<0) return ret; + celt_assert(ret==packet_frame_size); data += size[i]; tot_offset += size[i]; - pcm += ret*st->channels; nb_samples += ret; } if (packet_offset != NULL) *packet_offset = tot_offset; + st->last_packet_duration = nb_samples; + if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels)) + OPUS_PRINT_INT(nb_samples); +#ifndef FIXED_POINT + if (soft_clip) + opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem); + else + st->softclip_mem[0]=st->softclip_mem[1]=0; +#endif return nb_samples; } @@ -768,7 +868,7 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data, int opus_decode(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) { - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL); + return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -781,7 +881,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, ALLOC(out, frame_size*st->channels, opus_int16); - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL); + ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); if (ret > 0) { for (i=0;ichannels;i++) @@ -809,7 +909,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, ALLOC(out, frame_size*st->channels, float); - ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL); + ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); if (ret > 0) { for (i=0;ichannels;i++) @@ -822,7 +922,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, int opus_decode_float(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) { - return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL); + return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); } #endif @@ -845,12 +945,20 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) case OPUS_GET_BANDWIDTH_REQUEST: { opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } *value = st->bandwidth; } break; case OPUS_GET_FINAL_RANGE_REQUEST: { opus_uint32 *value = va_arg(ap, opus_uint32*); + if (!value) + { + goto bad_arg; + } *value = st->rangeFinal; } break; @@ -869,10 +977,9 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) case OPUS_GET_SAMPLE_RATE_REQUEST: { opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) + if (!value) { - ret = OPUS_BAD_ARG; - break; + goto bad_arg; } *value = st->Fs; } @@ -880,10 +987,9 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) case OPUS_GET_PITCH_REQUEST: { opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) + if (!value) { - ret = OPUS_BAD_ARG; - break; + goto bad_arg; } if (st->prev_mode == MODE_CELT_ONLY) celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); @@ -894,10 +1000,9 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) case OPUS_GET_GAIN_REQUEST: { opus_int32 *value = va_arg(ap, opus_int32*); - if (value==NULL) + if (!value) { - ret = OPUS_BAD_ARG; - break; + goto bad_arg; } *value = st->decode_gain; } @@ -907,12 +1012,21 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) opus_int32 value = va_arg(ap, opus_int32); if (value<-32768 || value>32767) { - ret = OPUS_BAD_ARG; - break; + goto bad_arg; } st->decode_gain = value; } break; + case OPUS_GET_LAST_PACKET_DURATION_REQUEST: + { + opus_uint32 *value = va_arg(ap, opus_uint32*); + if (!value) + { + goto bad_arg; + } + *value = st->last_packet_duration; + } + break; default: /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/ ret = OPUS_UNIMPLEMENTED; @@ -921,6 +1035,9 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...) va_end(ap); return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; } void opus_decoder_destroy(OpusDecoder *st) @@ -989,8 +1106,8 @@ int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) return packet[1]&0x3F; } -int opus_decoder_get_nb_samples(const OpusDecoder *dec, - const unsigned char packet[], opus_int32 len) +int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, + opus_int32 Fs) { int samples; int count = opus_packet_get_nb_frames(packet, len); @@ -998,10 +1115,16 @@ int opus_decoder_get_nb_samples(const OpusDecoder *dec, if (count<0) return count; - samples = count*opus_packet_get_samples_per_frame(packet, dec->Fs); + samples = count*opus_packet_get_samples_per_frame(packet, Fs); /* Can't have more than 120 ms */ - if (samples*25 > dec->Fs*3) + if (samples*25 > Fs*3) return OPUS_INVALID_PACKET; else return samples; } + +int opus_decoder_get_nb_samples(const OpusDecoder *dec, + const unsigned char packet[], opus_int32 len) +{ + return opus_packet_get_nb_samples(packet, len, dec->Fs); +} -- cgit v1.2.3