From 9b7ec42403073ee887efc531c153e6b1b6c15bab Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Sun, 19 Jan 2014 16:31:59 +0100 Subject: Sync to upstream libopus Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58 --- lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h | 4 - lib/rbcodec/codecs/libopus/celt/arch.h | 34 +- lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h | 4 + lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h | 35 ++ lib/rbcodec/codecs/libopus/celt/bands.c | 215 ++++---- lib/rbcodec/codecs/libopus/celt/bands.h | 11 +- lib/rbcodec/codecs/libopus/celt/celt.c | 79 ++- lib/rbcodec/codecs/libopus/celt/celt.h | 11 +- lib/rbcodec/codecs/libopus/celt/celt_decoder.c | 325 ++++++----- lib/rbcodec/codecs/libopus/celt/cwrs.c | 40 +- lib/rbcodec/codecs/libopus/celt/cwrs.h | 2 +- lib/rbcodec/codecs/libopus/celt/entcode.c | 60 +++ lib/rbcodec/codecs/libopus/celt/entcode.h | 35 ++ lib/rbcodec/codecs/libopus/celt/entdec.c | 2 +- lib/rbcodec/codecs/libopus/celt/entenc.c | 2 +- lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 19 +- lib/rbcodec/codecs/libopus/celt/kiss_fft.c | 482 ++++++----------- lib/rbcodec/codecs/libopus/celt/kiss_fft.h | 15 +- lib/rbcodec/codecs/libopus/celt/mdct.c | 191 ++++--- lib/rbcodec/codecs/libopus/celt/modes.h | 8 - lib/rbcodec/codecs/libopus/celt/pitch.c | 30 +- lib/rbcodec/codecs/libopus/celt/pitch.h | 18 +- lib/rbcodec/codecs/libopus/celt/rate.c | 7 +- lib/rbcodec/codecs/libopus/celt/stack_alloc.h | 8 +- .../codecs/libopus/celt/static_modes_fixed.h | 596 +++++++++++++++------ lib/rbcodec/codecs/libopus/celt/vq.c | 69 ++- lib/rbcodec/codecs/libopus/celt/vq.h | 2 +- lib/rbcodec/codecs/libopus/config.h | 4 +- lib/rbcodec/codecs/libopus/opus.c | 21 + lib/rbcodec/codecs/libopus/opus_decoder.c | 85 +-- lib/rbcodec/codecs/libopus/opus_defines.h | 50 +- lib/rbcodec/codecs/libopus/opus_private.h | 7 +- lib/rbcodec/codecs/libopus/silk/CNG.c | 21 +- lib/rbcodec/codecs/libopus/silk/PLC.c | 53 +- lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h | 5 + lib/rbcodec/codecs/libopus/silk/code_signs.c | 4 +- lib/rbcodec/codecs/libopus/silk/dec_API.c | 37 +- lib/rbcodec/codecs/libopus/silk/decode_core.c | 7 +- lib/rbcodec/codecs/libopus/silk/decode_frame.c | 16 +- lib/rbcodec/codecs/libopus/silk/decode_pulses.c | 6 +- lib/rbcodec/codecs/libopus/silk/macros.h | 9 +- lib/rbcodec/codecs/libopus/silk/main.h | 8 +- .../libopus/silk/resampler_private_IIR_FIR.c | 11 +- lib/rbcodec/codecs/libopus/silk/shell_coder.c | 8 +- lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c | 1 + lib/rbcodec/codecs/opus.c | 2 - 46 files changed, 1608 insertions(+), 1051 deletions(-) diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h index cc52f37c36..8ddb9adc96 100644 --- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h +++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h @@ -65,10 +65,6 @@ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) -# define C_MUL4(m,a,b) \ - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) - # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ (c).i = S_MUL( (c).i , s ) ; }while(0) diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h index b2d26c4ee4..035b92ff29 100644 --- a/lib/rbcodec/codecs/libopus/celt/arch.h +++ b/lib/rbcodec/codecs/libopus/celt/arch.h @@ -69,11 +69,9 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define IMUL32(a,b) ((a)*(b)) -#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ @@ -108,6 +106,13 @@ typedef opus_val32 celt_ener; #define SCALEIN(a) (a) #define SCALEOUT(a) (a) +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) + +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "fixed_debug.h" #else @@ -139,6 +144,22 @@ typedef float celt_sig; typedef float celt_norm; typedef float celt_ener; +#ifdef FLOAT_APPROX +/* This code should reliably detect NaN/inf even when -ffast-math is used. + Assumes IEEE 754 format. */ +static OPUS_INLINE int celt_isnan(float x) +{ + union {float f; opus_uint32 i;} in; + in.f = x; + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; +} +#else +#ifdef __FAST_MATH__ +#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input +#endif +#define celt_isnan(x) ((x)!=(x)) +#endif + #define Q15ONE 1.0f #define NORM_SCALING 1.f @@ -148,6 +169,10 @@ typedef float celt_ener; #define VERY_LARGE16 1e15f #define Q15_ONE ((opus_val16)1.f) +/* This appears to be the same speed as C99's fabsf() but it's more portable. */ +#define ABS16(x) ((float)fabs(x)) +#define ABS32(x) ((float)fabs(x)) + #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -186,6 +211,7 @@ typedef float celt_ener; #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) +#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b)) @@ -203,6 +229,8 @@ typedef float celt_ener; #define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) +#define SIG2WORD16(x) (x) + #endif /* !FIXED_POINT */ #ifndef GLOBAL_STACK_SIZE diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h index b690bc8cea..efb3b1896a 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h @@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) #undef MAC16_32_Q15 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #undef MULT32_32_Q31 diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h index 1194a7d3ec..36a6321101 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h @@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q16\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) + /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, @@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) } #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) +#ifdef OPUS_ARM_INLINE_MEDIA + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) +{ + celt_sig res; + __asm__( + "#SIG2WORD16\n\t" + "ssat %0, #16, %1, ASR #12\n\t" + : "=r"(res) + : "r"(x+2048) + ); + return EXTRACT16(res); +} +#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) + +#endif /* OPUS_ARM_INLINE_MEDIA */ + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c index 1ad786d795..caa70163b4 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.c +++ b/lib/rbcodec/codecs/libopus/celt/bands.c @@ -93,11 +93,11 @@ static int bitexact_log2tan(int isin,int icos) #if 0 #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize< 0) { - int shift = celt_ilog2(maxval)-10; - j=M*eBands[i]; do { - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), - EXTRACT16(VSHR32(X[j+c*N],shift))); - } while (++jlogN[i]>>BITRES)+LM+1)>>1); + j=eBands[i]<0) + { + do { + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), + EXTRACT16(SHR32(X[j+c*N],shift))); + } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { @@ -151,18 +156,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #else /* FIXED_POINT */ /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -192,74 +195,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel /* De-normalise the energy to produce the synthesis from the unit-energy bands */ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, + int end, int M, int downsample, int silence) { - int i, c, N; + int i, N; + int bound; + celt_sig * OPUS_RESTRICT f; + const celt_norm * OPUS_RESTRICT x; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); - c=0; do { - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - f = freq+c*N; - x = X+c*N+M*eBands[start]; - for (i=0;inbEBands], SHL16((opus_val16)eMeans[i],6)); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef FIXED_POINT - g = celt_exp2(lg); + g = celt_exp2(lg); #else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<=0); + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef FIXED_POINT thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); @@ -352,7 +362,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } } -static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) +static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) { int i = bandID; int j; @@ -372,25 +382,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons celt_norm r, l; l = X[j]; r = Y[j]; - X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); + X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); /* Side is not encoded, no need to calculate */ } } -static void stereo_split(celt_norm *X, celt_norm *Y, int N) +static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) { int j; for (j=0;jm->nbEBands-4) - hf_sum += 32*(tcount[1]+tcount[0])/N; + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); sum += tmp*256; nbBands++; @@ -496,7 +505,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, if (update_hf) { if (hf_sum) - hf_sum /= C*(4-m->nbEBands+end); + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); *hf_average = (*hf_average+hf_sum)>>1; hf_sum = *hf_average; if (*tapset_decision==2) @@ -512,7 +521,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ - sum /= nbBands; + celt_assert(sum>=0); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -571,8 +581,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;j=0); + itheta = celt_udiv((opus_int32)itheta*16384, qn); if (encode && stereo) { if (itheta==0) @@ -1025,8 +1035,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;j1)) { - int j; - for (j=0;joverlap; + nbEBands = mode->nbEBands; + N = mode->shortMdctSize<shortMdctSize; + B = M; + NB = mode->shortMdctSize; shift = mode->maxLM; } else { B = 1; - N = mode->shortMdctSize<shortMdctSize<maxLM-LM; } - c=0; do { - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + + if (CC==2&&C==1) + { + /* Copying a mono streams to two channels */ + celt_sig *freq2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ + freq2 = out_syn[1]+overlap/2; + OPUS_COPY(freq2, freq, N); for (b=0;bmdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); - } while (++cmdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + for (b=0;bmdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); + } else if (CC==1&&C==2) + { + /* Downmixing a stereo stream to mono */ + celt_sig *freq2; + freq2 = out_syn[0]+overlap/2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Use the output buffer as temp array before downmixing. */ + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, + downsample, silence); + for (i=0;imdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + } else { + /* Normal case (mono or stereo) */ + c=0; do { + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, + downsample, silence); + for (b=0;bmdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); + } while (++c>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + RESTORE_STACK; + return pitch_index; +} + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) { int c; int i; @@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R int nbEBands; int overlap; int start; - int downsample; int loss_count; int noise_based; const opus_int16 *eBands; - VARDECL(celt_sig, scratch); SAVE_STACK; mode = st->mode; @@ -367,14 +446,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R loss_count = st->loss_count; start = st->start; - downsample = st->downsample; noise_based = loss_count >= 5 || start != 0; - ALLOC(scratch, noise_based?N*C:N, celt_sig); if (noise_based) { /* Noise-based PLC/CNG */ - celt_sig *freq; +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif opus_uint32 seed; opus_val16 *plcLogE; int end; @@ -383,10 +463,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); - /* Share the interleaved signal MDCT coefficient buffer with the - deemphasis scratch buffer. */ - freq = scratch; +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[C-1]+overlap/2); +#else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif if (loss_count >= 5) plcLogE = backgroundLogE; @@ -421,20 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } st->rng = seed; - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<>1)); } while (++cdownsample, 0); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -445,15 +520,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R if (loss_count == 0) { - VARDECL( opus_val16, lp_pitch_buf ); - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, st->arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - st->last_pitch_index = pitch_index; + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); @@ -644,25 +711,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++cpreemph, st->preemph_memD, scratch); - st->loss_count = loss_count+1; RESTORE_STACK; } -#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ -static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ -static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; opus_int32 bits; ec_dec _dec; - VARDECL(celt_sig, freq); +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); @@ -680,6 +745,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat int intra_ener; const int CC = st->channels; int LM, M; + int start; + int end; int effEnd; int codedBands; int alloc_trim; @@ -706,11 +773,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; frame_size *= st->downsample; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - } while (++c_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; @@ -728,7 +794,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data0<0) return OPUS_INVALID_PACKET; } - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); LM = (data0>>3)&0x3; C = 1 + ((data0>>2)&0x1); data++; @@ -755,14 +821,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return OPUS_BAD_ARG; N = M*mode->shortMdctSize; + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++cend; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, N, LM); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -798,7 +869,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat postfilter_gain = 0; postfilter_pitch = 0; postfilter_tapset = 0; - if (st->start==0 && tell+16 <= total_bits) + if (start==0 && tell+16 <= total_bits) { if(ec_dec_bit_logp(dec, 1)) { @@ -829,11 +900,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat /* Decode the global flags (first symbols in the stream) */ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; /* Get band energies */ - unquant_coarse_energy(mode, st->start, st->end, oldBandE, + unquant_coarse_energy(mode, start, end, oldBandE, intra_ener, dec, C, LM); ALLOC(tf_res, nbEBands, int); - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + tf_decode(start, end, isTransient, tf_res, LM, dec); tell = ec_tell(dec); spread_decision = SPREAD_NORMAL; @@ -849,7 +920,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); - for (i=st->start;iend;i++) + for (i=start;istart, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); + + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++c= C*N) - X = s_X; - else - ALLOC(X, C*N, celt_norm); - quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[CC-1]+overlap/2); +#else + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif + + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, len*(8<rng); @@ -911,58 +989,20 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat anti_collapse_on = ec_dec_bits(dec, 1); } - unquant_energy_finalise(mode, st->start, st->end, oldBandE, + unquant_energy_finalise(mode, start, end, oldBandE, fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); if (anti_collapse_on) anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - - /**< Interleaved signal MDCTs */ - if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) - freq = s_freq; - else - ALLOC(freq, IMAX(CC,C)*N, celt_sig); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); if (silence) { for (i=0;istart, effEnd, C, M); } - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++cdownsample!=1) - bound = IMIN(bound, N/st->downsample); - for (i=bound;idownsample, silence); c=0; do { st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); @@ -989,18 +1029,14 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->postfilter_tapset_old = st->postfilter_tapset; } - if (C==1) { - for (i=0;istart;i++) + for (i=0;iend;irng = dec->rng; - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1039,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1056,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j0) for (j=0;j0); celt_assert(_n>1); while(_n>2){ @@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ } else for(p=row[_k];p>_i;p=row[_k])_k--; _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } /*Lots of dimensions case:*/ else{ @@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } } _n--; @@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; _k=(_i+1)>>1; if(_k)_i-=2*_k-1; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); /*_n==1*/ s=-(int)_i; - *_y=(_k+s)^s; + val=(_k+s)^s; + *_y=val; + yy=MAC16_16(yy,val,val); + return yy; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ @@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ _y: Returns the vector of pulses. _u: Must contain entries [0..._k+1] of row _n of U() on input. Its contents will be destructively modified.*/ -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ int j; + opus_int16 val; + opus_val32 yy=0; celt_assert(_n>0); j=0; do{ @@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ while(p>_i)p=_u[--_k]; _i-=p; yj-=_k; - _y[j]=(yj+s)^s; + val=(yj+s)^s; + _y[j]=val; + yy=MAC16_16(yy,val,val); uprev(_u,_k+2,0); } while(++j<_n); + return yy; } /*Returns the index of the given combination of K elements chosen from a set @@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ VARDECL(opus_uint32,u); + int ret; SAVE_STACK; celt_assert(_k>0); ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); RESTORE_STACK; + return ret; } #endif /* SMALL_FOOTPRINT */ diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.h b/lib/rbcodec/codecs/libopus/celt/cwrs.h index 7dfbd076d1..7cd4717459 100644 --- a/lib/rbcodec/codecs/libopus/celt/cwrs.h +++ b/lib/rbcodec/codecs/libopus/celt/cwrs.h @@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); void encode_pulses(const int *_y, int N, int K, ec_enc *enc); -void decode_pulses(int *_y, int N, int K, ec_dec *dec); +opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); #endif /* CWRS_H */ diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c index fa5d7c7c2c..461a36dd55 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.c +++ b/lib/rbcodec/codecs/libopus/celt/entcode.c @@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){ } #endif +#if 1 +/* This is a faster version of ec_tell_frac() that takes advantage + of the low (1/8 bit) resolution to use just a linear function + followed by a lookup to determine the exact transition thresholds. */ +opus_uint32 ec_tell_frac(ec_ctx *_this){ + static const unsigned correction[8] = + {35733, 38967, 42495, 46340, + 50535, 55109, 60097, 65535}; + opus_uint32 nbits; + opus_uint32 r; + int l; + unsigned b; + nbits=_this->nbits_total<rng); + r=_this->rng>>(l-16); + b = (r>>12)-8; + b += r>correction[b]; + l = (l<<3)+b; + return nbits-l; +} +#else opus_uint32 ec_tell_frac(ec_ctx *_this){ opus_uint32 nbits; opus_uint32 r; @@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){ } return nbits-l; } +#endif + +#ifdef USE_SMALL_DIV_TABLE +/* Result of 2^32/(2*i+1), except for i=0. */ +const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 +}; +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.h b/lib/rbcodec/codecs/libopus/celt/entcode.h index dd13e49e50..13d6c84ef0 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.h +++ b/lib/rbcodec/codecs/libopus/celt/entcode.h @@ -34,6 +34,12 @@ # include # include "ecintrin.h" +extern const opus_uint32 SMALL_DIV_TABLE[129]; + +#ifdef OPUS_ARM_ASM +#define USE_SMALL_DIV_TABLE +#endif + /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a larger type, you can speed up the decoder by using it here.*/ typedef opus_uint32 ec_window; @@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){ rounding error is in the positive direction).*/ opus_uint32 ec_tell_frac(ec_ctx *_this); +/* Tested exhaustively for all n and for 1<=d<=256 */ +static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (d>256) + return n/d; + else { + opus_uint32 t, q; + t = EC_ILOG(d&-d); + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; + return q+(n-q*d >= d); + } +#else + return n/d; +#endif +} + +static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (n<0) + return -(opus_int32)celt_udiv(-n, d); + else + return celt_udiv(n, d); +#else + return n/d; +#endif +} + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c index 3c264685c2..0b3433ed8b 100644 --- a/lib/rbcodec/codecs/libopus/celt/entdec.c +++ b/lib/rbcodec/codecs/libopus/celt/entdec.c @@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned s; - _this->ext=_this->rng/_ft; + _this->ext=celt_udiv(_this->rng,_ft); s=(unsigned)(_this->val/_this->ext); return _ft-EC_MINI(s+1,_ft); } diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c index a7e34ecef9..271e4d30c5 100644 --- a/lib/rbcodec/codecs/libopus/celt/entenc.c +++ b/lib/rbcodec/codecs/libopus/celt/entenc.c @@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ opus_uint32 r; - r=_this->rng/_ft; + r=celt_udiv(_this->rng,_ft); if(_fl>0){ _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); _this->rng=IMUL32(r,(_fh-_fl)); diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index ecf018a244..ac67d37ce8 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h @@ -113,7 +113,11 @@ /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) +#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. + Results fits in 32 bits */ +#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) @@ -131,4 +135,17 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) +#if defined(MIPSr1_ASM) +#include "mips/fixed_generic_mipsr1.h" +#endif + +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index e2b8f3b3da..833ef5a71f 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c @@ -45,73 +45,62 @@ complex numbers. It also delares the kf_ internal functions. */ -#if 0 static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jr = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} + } else #endif - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jtwiddles; - for (j=0;jr = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + kiss_fft_cpx scratch0, scratch1; + + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); + + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; } - } -} -#endif - -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for (j=0;jtwiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;jtwiddles[fstride*m]; +#endif for (i=0;itwiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -259,59 +229,9 @@ static void kf_bfly3( } while(--k); } } -#endif - -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;itwiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - ++Fout; - }while(--k); - } -} -#if 0 +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -324,13 +244,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;itwiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; +#endif /* OVERRIDE_kf_bfly5 */ - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -496,6 +355,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;infft=nfft; -#ifndef FIXED_POINT +#ifdef FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -#if 0 -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;infft;i++) - fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } + opus_fft_impl(st, fout); } +#endif + +#ifdef TEST_UNIT_DFT_C +void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;infft;i++) + fout[st->bitrev[i]] = fin[i]; + for (i=0;infft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;infft;i++) + fout[i].i = -fout[i].i; +} +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h index 66cf1f2126..390b54d948 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h @@ -79,8 +79,9 @@ typedef struct { typedef struct kiss_fft_state{ int nfft; -#ifndef FIXED_POINT - kiss_fft_scalar scale; + opus_val16 scale; +#ifdef FIXED_POINT + int scale_shift; #endif int shift; opus_int16 factors[2*MAXFACTORS]; @@ -128,14 +129,10 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); f[k].r and f[k].i * */ void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -#if defined(CPU_COLDFIRE) -#define IFFT_ICODE ICODE_ATTR -#else -#define IFFT_ICODE -#endif - -void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE; +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); void opus_fft_free(const kiss_fft_state *cfg); diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c index 72ea180568..7fa8eaf6bf 100644 --- a/lib/rbcodec/codecs/libopus/celt/mdct.c +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c @@ -53,18 +53,20 @@ #include "mathops.h" #include "stack_alloc.h" +#if defined(MIPSr1_ASM) +#include "mips/mdct_mipsr1.h" +#endif + + #ifdef CUSTOM_MODES int clt_mdct_init(mdct_lookup *l,int N, int maxshift) { int i; - int N4; kiss_twiddle_scalar *trig; -#if defined(FIXED_POINT) + int shift; int N2=N>>1; -#endif l->n = N; - N4 = N>>2; l->maxshift = maxshift; for (i=0;i<=maxshift;i++) { @@ -77,17 +79,28 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift) return 0; #endif } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return 0; - /* We have enough points that sine isn't necessary */ + for (shift=0;shift<=maxshift;shift++) + { + /* We have enough points that sine isn't necessary */ #if defined(FIXED_POINT) - for (i=0;i<=N4;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); +#if 1 + for (i=0;i>= 1; + N >>= 1; + } return 1; } @@ -103,27 +116,37 @@ void clt_mdct_clear(mdct_lookup *l) #if 0 /* Forward MDCT trashes the input array */ +#ifndef OVERRIDE_clt_mdct_forward void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_scalar, f2); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif SAVE_STACK; + scale = st->scale; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif + ALLOC(f2, N4, kiss_fft_cpx); /* Consider the input to be composed of four blocks: [a, b, c, d] */ /* Window, shuffle, fold */ @@ -168,125 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar /* Pre-rotation */ { kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; for(i=0;ibitrev[i]] = yc; } } - /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; /* Temp pointers to make it really clear to the compiler what we're doing */ for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; yp1 += 2*stride; yp2 -= 2*stride; } } RESTORE_STACK; } +#endif /* OVERRIDE_clt_mdct_forward */ #endif +#ifndef OVERRIDE_clt_mdct_backward void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; -/* VARDECL(kiss_fft_scalar, f2); - SAVE_STACK; */ + const kiss_twiddle_scalar *trig; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; -/* ALLOC(f2, N2, kiss_fft_scalar); */ - kiss_fft_scalar f2[N2]; /* worst case 3840b */ - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif /* Pre-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; for(i=0;ikfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice. */ for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i<overlap) -#endif - -#ifndef FRAMESIZE -#define FRAMESIZE(mode) ((mode)->mdctSize) -#endif - typedef struct { int size; const opus_int16 *index; diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c index c28857297a..ee56a434f0 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.c +++ b/lib/rbcodec/codecs/libopus/celt/pitch.c @@ -252,15 +252,15 @@ void #endif celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { - int i,j; + int i; /*The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned. Since it's hard to put asserts in assembly, put them here.*/ - celt_assert(max_pitch>0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i>1;i++) { - opus_val32 sum=0; + opus_val32 sum; xcorr[i] = 0; if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) continue; +#ifdef FIXED_POINT + sum = 0; for (j=0;j>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); +#else + sum = celt_inner_prod(x_lp, y+i, len>>1); +#endif xcorr[i] = MAX32(-1, sum); #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 g1; opus_val16 cont=0; opus_val16 thresh; - T1 = (2*T0+k)/(2*k); + T1 = celt_udiv(2*T0+k, 2*k); if (T1 < minperiod) break; /* Look for another strong correlation at T1b */ @@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = T0+T1; } else { - T1b = (2*second_check[k]*T0+k)/(2*k); + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); xy += xy2; @@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, pg = SHR32(frac_div32(best_xy,best_yy+1),16); for (k=0;k<3;k++) - { - int T1 = T+k-1; - xy = 0; - for (i=0;i MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) offset = 1; else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h index df317ecc1d..96dbc0d794 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.h +++ b/lib/rbcodec/codecs/libopus/celt/pitch.h @@ -41,8 +41,12 @@ #include "x86/pitch_sse.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/pitch_mipsr1.h" +#endif + #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) -# include "arm/pitch_arm.h" +//# include "arm/pitch_arm.h" #endif void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, @@ -141,6 +145,18 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y } #endif +#ifndef OVERRIDE_CELT_INNER_PROD +static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, + int N) +{ + int i; + opus_val32 xy=0; + for (i=0;ieBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); band_width = m->eBands[codedBands]-m->eBands[j]; @@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /* Allocate the remaining bits */ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; for (j=start;jeBands[j+1]-m->eBands[j])); @@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, offset += NClogN>>3; /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<>BITRES; /* Make sure not to bust */ if (C*ebits[j] > (bits[j]>>BITRES)) diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h index 316a6ce12c..2b51c8d80c 100644 --- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h +++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h @@ -116,9 +116,11 @@ #else #ifdef CELT_C +char *scratch_ptr=0; char *global_stack=0; #else extern char *global_stack; +extern char *scratch_ptr; #endif /* CELT_C */ #ifdef ENABLE_VALGRIND @@ -140,8 +142,12 @@ extern char *global_stack_top; #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#if 0 /* Set this to 1 to instrument pseudostack usage */ +#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) +#else #define RESTORE_STACK (global_stack = _saved_stack) -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; +#endif +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h index 92e5fe5877..0396ce38df 100644 --- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h +++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h @@ -341,84 +341,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] ICONST_ATTR = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -426,8 +426,10 @@ static const opus_int16 fft_bitrev60[60] = { #define FFT_STATE48000_960_0 static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 480, /* nfft */ +17476, /* scale */ +8, /* scale_shift */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -437,8 +439,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_1 static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 240, /* nfft */ +17476, /* scale */ +7, /* scale_shift */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -448,8 +452,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_2 static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 120, /* nfft */ +17476, /* scale */ +6, /* scale_shift */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -459,8 +465,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_3 static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 60, /* nfft */ +17476, /* scale */ +5, /* scale_shift */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -470,104 +478,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = { -32767, 32767, 32767, 32767, 32766, -32763, 32762, 32759, 32757, 32753, -32751, 32747, 32743, 32738, 32733, -32729, 32724, 32717, 32711, 32705, -32698, 32690, 32683, 32676, 32667, -32658, 32650, 32640, 32631, 32620, -32610, 32599, 32588, 32577, 32566, -32554, 32541, 32528, 32515, 32502, -32487, 32474, 32459, 32444, 32429, -32413, 32397, 32381, 32364, 32348, -32331, 32313, 32294, 32277, 32257, -32239, 32219, 32200, 32180, 32159, -32138, 32118, 32096, 32074, 32051, -32029, 32006, 31984, 31960, 31936, -31912, 31888, 31863, 31837, 31812, -31786, 31760, 31734, 31707, 31679, -31652, 31624, 31596, 31567, 31539, -31508, 31479, 31450, 31419, 31388, -31357, 31326, 31294, 31262, 31230, -31198, 31164, 31131, 31097, 31063, -31030, 30994, 30959, 30924, 30889, -30853, 30816, 30779, 30743, 30705, -30668, 30629, 30592, 30553, 30515, -30475, 30435, 30396, 30356, 30315, -30274, 30233, 30191, 30149, 30107, -30065, 30022, 29979, 29936, 29891, -29847, 29803, 29758, 29713, 29668, -29622, 29577, 29529, 29483, 29436, -29390, 29341, 29293, 29246, 29197, -29148, 29098, 29050, 29000, 28949, -28899, 28848, 28797, 28746, 28694, -28642, 28590, 28537, 28485, 28432, -28378, 28324, 28271, 28217, 28162, -28106, 28051, 27995, 27940, 27884, -27827, 27770, 27713, 27657, 27598, -27540, 27481, 27423, 27365, 27305, -27246, 27187, 27126, 27066, 27006, -26945, 26883, 26822, 26760, 26698, -26636, 26574, 26510, 26448, 26383, -26320, 26257, 26191, 26127, 26062, -25997, 25931, 25866, 25800, 25734, -25667, 25601, 25533, 25466, 25398, -25330, 25262, 25194, 25125, 25056, -24987, 24917, 24848, 24778, 24707, -24636, 24566, 24495, 24424, 24352, -24280, 24208, 24135, 24063, 23990, -23917, 23842, 23769, 23695, 23622, -23546, 23472, 23398, 23322, 23246, -23171, 23095, 23018, 22942, 22866, -22788, 22711, 22634, 22557, 22478, -22400, 22322, 22244, 22165, 22085, -22006, 21927, 21846, 21766, 21687, -21606, 21524, 21443, 21363, 21282, -21199, 21118, 21035, 20954, 20870, -20788, 20705, 20621, 20538, 20455, -20371, 20286, 20202, 20118, 20034, -19947, 19863, 19777, 19692, 19606, -19520, 19434, 19347, 19260, 19174, -19088, 18999, 18911, 18825, 18737, -18648, 18560, 18472, 18384, 18294, -18205, 18116, 18025, 17936, 17846, -17757, 17666, 17576, 17485, 17395, -17303, 17212, 17122, 17030, 16937, -16846, 16755, 16662, 16569, 16477, -16385, 16291, 16198, 16105, 16012, -15917, 15824, 15730, 15636, 15541, -15447, 15352, 15257, 15162, 15067, -14973, 14875, 14781, 14685, 14589, -14493, 14396, 14300, 14204, 14107, -14010, 13914, 13815, 13718, 13621, -13524, 13425, 13328, 13230, 13133, -13033, 12935, 12836, 12738, 12638, -12540, 12441, 12341, 12241, 12142, -12044, 11943, 11843, 11744, 11643, -11542, 11442, 11342, 11241, 11139, -11039, 10939, 10836, 10736, 10635, -10534, 10431, 10330, 10228, 10127, -10024, 9921, 9820, 9718, 9614, -9512, 9410, 9306, 9204, 9101, -8998, 8895, 8791, 8689, 8585, -8481, 8377, 8274, 8171, 8067, -7962, 7858, 7753, 7650, 7545, -7441, 7336, 7231, 7129, 7023, -6917, 6813, 6709, 6604, 6498, -6393, 6288, 6182, 6077, 5973, -5867, 5760, 5656, 5549, 5445, -5339, 5232, 5127, 5022, 4914, -4809, 4703, 4596, 4490, 4384, -4278, 4171, 4065, 3958, 3852, -3745, 3640, 3532, 3426, 3318, -3212, 3106, 2998, 2891, 2786, -2679, 2570, 2465, 2358, 2251, -2143, 2037, 1929, 1823, 1715, -1609, 1501, 1393, 1287, 1180, -1073, 964, 858, 751, 644, -535, 429, 322, 214, 107, -0, }; +static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { +32767, 32767, 32767, 32766, 32765, +32763, 32761, 32759, 32756, 32753, +32750, 32746, 32742, 32738, 32733, +32728, 32722, 32717, 32710, 32704, +32697, 32690, 32682, 32674, 32666, +32657, 32648, 32639, 32629, 32619, +32609, 32598, 32587, 32576, 32564, +32552, 32539, 32526, 32513, 32500, +32486, 32472, 32457, 32442, 32427, +32411, 32395, 32379, 32362, 32345, +32328, 32310, 32292, 32274, 32255, +32236, 32217, 32197, 32177, 32157, +32136, 32115, 32093, 32071, 32049, +32027, 32004, 31981, 31957, 31933, +31909, 31884, 31859, 31834, 31809, +31783, 31756, 31730, 31703, 31676, +31648, 31620, 31592, 31563, 31534, +31505, 31475, 31445, 31415, 31384, +31353, 31322, 31290, 31258, 31226, +31193, 31160, 31127, 31093, 31059, +31025, 30990, 30955, 30920, 30884, +30848, 30812, 30775, 30738, 30701, +30663, 30625, 30587, 30548, 30509, +30470, 30430, 30390, 30350, 30309, +30269, 30227, 30186, 30144, 30102, +30059, 30016, 29973, 29930, 29886, +29842, 29797, 29752, 29707, 29662, +29616, 29570, 29524, 29477, 29430, +29383, 29335, 29287, 29239, 29190, +29142, 29092, 29043, 28993, 28943, +28892, 28842, 28791, 28739, 28688, +28636, 28583, 28531, 28478, 28425, +28371, 28317, 28263, 28209, 28154, +28099, 28044, 27988, 27932, 27876, +27820, 27763, 27706, 27648, 27591, +27533, 27474, 27416, 27357, 27298, +27238, 27178, 27118, 27058, 26997, +26936, 26875, 26814, 26752, 26690, +26628, 26565, 26502, 26439, 26375, +26312, 26247, 26183, 26119, 26054, +25988, 25923, 25857, 25791, 25725, +25658, 25592, 25524, 25457, 25389, +25322, 25253, 25185, 25116, 25047, +24978, 24908, 24838, 24768, 24698, +24627, 24557, 24485, 24414, 24342, +24270, 24198, 24126, 24053, 23980, +23907, 23834, 23760, 23686, 23612, +23537, 23462, 23387, 23312, 23237, +23161, 23085, 23009, 22932, 22856, +22779, 22701, 22624, 22546, 22468, +22390, 22312, 22233, 22154, 22075, +21996, 21916, 21836, 21756, 21676, +21595, 21515, 21434, 21352, 21271, +21189, 21107, 21025, 20943, 20860, +20777, 20694, 20611, 20528, 20444, +20360, 20276, 20192, 20107, 20022, +19937, 19852, 19767, 19681, 19595, +19509, 19423, 19336, 19250, 19163, +19076, 18988, 18901, 18813, 18725, +18637, 18549, 18460, 18372, 18283, +18194, 18104, 18015, 17925, 17835, +17745, 17655, 17565, 17474, 17383, +17292, 17201, 17110, 17018, 16927, +16835, 16743, 16650, 16558, 16465, +16372, 16279, 16186, 16093, 15999, +15906, 15812, 15718, 15624, 15529, +15435, 15340, 15245, 15150, 15055, +14960, 14864, 14769, 14673, 14577, +14481, 14385, 14288, 14192, 14095, +13998, 13901, 13804, 13706, 13609, +13511, 13414, 13316, 13218, 13119, +13021, 12923, 12824, 12725, 12626, +12527, 12428, 12329, 12230, 12130, +12030, 11930, 11831, 11730, 11630, +11530, 11430, 11329, 11228, 11128, +11027, 10926, 10824, 10723, 10622, +10520, 10419, 10317, 10215, 10113, +10011, 9909, 9807, 9704, 9602, +9499, 9397, 9294, 9191, 9088, +8985, 8882, 8778, 8675, 8572, +8468, 8364, 8261, 8157, 8053, +7949, 7845, 7741, 7637, 7532, +7428, 7323, 7219, 7114, 7009, +6905, 6800, 6695, 6590, 6485, +6380, 6274, 6169, 6064, 5958, +5853, 5747, 5642, 5536, 5430, +5325, 5219, 5113, 5007, 4901, +4795, 4689, 4583, 4476, 4370, +4264, 4157, 4051, 3945, 3838, +3732, 3625, 3518, 3412, 3305, +3198, 3092, 2985, 2878, 2771, +2664, 2558, 2451, 2344, 2237, +2130, 2023, 1916, 1809, 1702, +1594, 1487, 1380, 1273, 1166, +1059, 952, 844, 737, 630, +523, 416, 308, 201, 94, +-13, -121, -228, -335, -442, +-550, -657, -764, -871, -978, +-1086, -1193, -1300, -1407, -1514, +-1621, -1728, -1835, -1942, -2049, +-2157, -2263, -2370, -2477, -2584, +-2691, -2798, -2905, -3012, -3118, +-3225, -3332, -3439, -3545, -3652, +-3758, -3865, -3971, -4078, -4184, +-4290, -4397, -4503, -4609, -4715, +-4821, -4927, -5033, -5139, -5245, +-5351, -5457, -5562, -5668, -5774, +-5879, -5985, -6090, -6195, -6301, +-6406, -6511, -6616, -6721, -6826, +-6931, -7036, -7140, -7245, -7349, +-7454, -7558, -7663, -7767, -7871, +-7975, -8079, -8183, -8287, -8390, +-8494, -8597, -8701, -8804, -8907, +-9011, -9114, -9217, -9319, -9422, +-9525, -9627, -9730, -9832, -9934, +-10037, -10139, -10241, -10342, -10444, +-10546, -10647, -10748, -10850, -10951, +-11052, -11153, -11253, -11354, -11455, +-11555, -11655, -11756, -11856, -11955, +-12055, -12155, -12254, -12354, -12453, +-12552, -12651, -12750, -12849, -12947, +-13046, -13144, -13242, -13340, -13438, +-13536, -13633, -13731, -13828, -13925, +-14022, -14119, -14216, -14312, -14409, +-14505, -14601, -14697, -14793, -14888, +-14984, -15079, -15174, -15269, -15364, +-15459, -15553, -15647, -15741, -15835, +-15929, -16023, -16116, -16210, -16303, +-16396, -16488, -16581, -16673, -16766, +-16858, -16949, -17041, -17133, -17224, +-17315, -17406, -17497, -17587, -17678, +-17768, -17858, -17948, -18037, -18127, +-18216, -18305, -18394, -18483, -18571, +-18659, -18747, -18835, -18923, -19010, +-19098, -19185, -19271, -19358, -19444, +-19531, -19617, -19702, -19788, -19873, +-19959, -20043, -20128, -20213, -20297, +-20381, -20465, -20549, -20632, -20715, +-20798, -20881, -20963, -21046, -21128, +-21210, -21291, -21373, -21454, -21535, +-21616, -21696, -21776, -21856, -21936, +-22016, -22095, -22174, -22253, -22331, +-22410, -22488, -22566, -22643, -22721, +-22798, -22875, -22951, -23028, -23104, +-23180, -23256, -23331, -23406, -23481, +-23556, -23630, -23704, -23778, -23852, +-23925, -23998, -24071, -24144, -24216, +-24288, -24360, -24432, -24503, -24574, +-24645, -24716, -24786, -24856, -24926, +-24995, -25064, -25133, -25202, -25270, +-25339, -25406, -25474, -25541, -25608, +-25675, -25742, -25808, -25874, -25939, +-26005, -26070, -26135, -26199, -26264, +-26327, -26391, -26455, -26518, -26581, +-26643, -26705, -26767, -26829, -26891, +-26952, -27013, -27073, -27133, -27193, +-27253, -27312, -27372, -27430, -27489, +-27547, -27605, -27663, -27720, -27777, +-27834, -27890, -27946, -28002, -28058, +-28113, -28168, -28223, -28277, -28331, +-28385, -28438, -28491, -28544, -28596, +-28649, -28701, -28752, -28803, -28854, +-28905, -28955, -29006, -29055, -29105, +-29154, -29203, -29251, -29299, -29347, +-29395, -29442, -29489, -29535, -29582, +-29628, -29673, -29719, -29764, -29808, +-29853, -29897, -29941, -29984, -30027, +-30070, -30112, -30154, -30196, -30238, +-30279, -30320, -30360, -30400, -30440, +-30480, -30519, -30558, -30596, -30635, +-30672, -30710, -30747, -30784, -30821, +-30857, -30893, -30929, -30964, -30999, +-31033, -31068, -31102, -31135, -31168, +-31201, -31234, -31266, -31298, -31330, +-31361, -31392, -31422, -31453, -31483, +-31512, -31541, -31570, -31599, -31627, +-31655, -31682, -31710, -31737, -31763, +-31789, -31815, -31841, -31866, -31891, +-31915, -31939, -31963, -31986, -32010, +-32032, -32055, -32077, -32099, -32120, +-32141, -32162, -32182, -32202, -32222, +-32241, -32260, -32279, -32297, -32315, +-32333, -32350, -32367, -32383, -32399, +-32415, -32431, -32446, -32461, -32475, +-32489, -32503, -32517, -32530, -32542, +-32555, -32567, -32579, -32590, -32601, +-32612, -32622, -32632, -32641, -32651, +-32659, -32668, -32676, -32684, -32692, +-32699, -32706, -32712, -32718, -32724, +-32729, -32734, -32739, -32743, -32747, +-32751, -32754, -32757, -32760, -32762, +-32764, -32765, -32767, -32767, -32767, +32767, 32767, 32765, 32761, 32756, +32750, 32742, 32732, 32722, 32710, +32696, 32681, 32665, 32647, 32628, +32608, 32586, 32562, 32538, 32512, +32484, 32455, 32425, 32393, 32360, +32326, 32290, 32253, 32214, 32174, +32133, 32090, 32046, 32001, 31954, +31906, 31856, 31805, 31753, 31700, +31645, 31588, 31530, 31471, 31411, +31349, 31286, 31222, 31156, 31089, +31020, 30951, 30880, 30807, 30733, +30658, 30582, 30504, 30425, 30345, +30263, 30181, 30096, 30011, 29924, +29836, 29747, 29656, 29564, 29471, +29377, 29281, 29184, 29086, 28987, +28886, 28784, 28681, 28577, 28471, +28365, 28257, 28147, 28037, 27925, +27812, 27698, 27583, 27467, 27349, +27231, 27111, 26990, 26868, 26744, +26620, 26494, 26367, 26239, 26110, +25980, 25849, 25717, 25583, 25449, +25313, 25176, 25038, 24900, 24760, +24619, 24477, 24333, 24189, 24044, +23898, 23751, 23602, 23453, 23303, +23152, 22999, 22846, 22692, 22537, +22380, 22223, 22065, 21906, 21746, +21585, 21423, 21261, 21097, 20933, +20767, 20601, 20434, 20265, 20096, +19927, 19756, 19584, 19412, 19239, +19065, 18890, 18714, 18538, 18361, +18183, 18004, 17824, 17644, 17463, +17281, 17098, 16915, 16731, 16546, +16361, 16175, 15988, 15800, 15612, +15423, 15234, 15043, 14852, 14661, +14469, 14276, 14083, 13889, 13694, +13499, 13303, 13107, 12910, 12713, +12515, 12317, 12118, 11918, 11718, +11517, 11316, 11115, 10913, 10710, +10508, 10304, 10100, 9896, 9691, +9486, 9281, 9075, 8869, 8662, +8455, 8248, 8040, 7832, 7623, +7415, 7206, 6996, 6787, 6577, +6366, 6156, 5945, 5734, 5523, +5311, 5100, 4888, 4675, 4463, +4251, 4038, 3825, 3612, 3399, +3185, 2972, 2758, 2544, 2330, +2116, 1902, 1688, 1474, 1260, +1045, 831, 617, 402, 188, +-27, -241, -456, -670, -885, +-1099, -1313, -1528, -1742, -1956, +-2170, -2384, -2598, -2811, -3025, +-3239, -3452, -3665, -3878, -4091, +-4304, -4516, -4728, -4941, -5153, +-5364, -5576, -5787, -5998, -6209, +-6419, -6629, -6839, -7049, -7258, +-7467, -7676, -7884, -8092, -8300, +-8507, -8714, -8920, -9127, -9332, +-9538, -9743, -9947, -10151, -10355, +-10558, -10761, -10963, -11165, -11367, +-11568, -11768, -11968, -12167, -12366, +-12565, -12762, -12960, -13156, -13352, +-13548, -13743, -13937, -14131, -14324, +-14517, -14709, -14900, -15091, -15281, +-15470, -15659, -15847, -16035, -16221, +-16407, -16593, -16777, -16961, -17144, +-17326, -17508, -17689, -17869, -18049, +-18227, -18405, -18582, -18758, -18934, +-19108, -19282, -19455, -19627, -19799, +-19969, -20139, -20308, -20475, -20642, +-20809, -20974, -21138, -21301, -21464, +-21626, -21786, -21946, -22105, -22263, +-22420, -22575, -22730, -22884, -23037, +-23189, -23340, -23490, -23640, -23788, +-23935, -24080, -24225, -24369, -24512, +-24654, -24795, -24934, -25073, -25211, +-25347, -25482, -25617, -25750, -25882, +-26013, -26143, -26272, -26399, -26526, +-26651, -26775, -26898, -27020, -27141, +-27260, -27379, -27496, -27612, -27727, +-27841, -27953, -28065, -28175, -28284, +-28391, -28498, -28603, -28707, -28810, +-28911, -29012, -29111, -29209, -29305, +-29401, -29495, -29587, -29679, -29769, +-29858, -29946, -30032, -30118, -30201, +-30284, -30365, -30445, -30524, -30601, +-30677, -30752, -30825, -30897, -30968, +-31038, -31106, -31172, -31238, -31302, +-31365, -31426, -31486, -31545, -31602, +-31658, -31713, -31766, -31818, -31869, +-31918, -31966, -32012, -32058, -32101, +-32144, -32185, -32224, -32262, -32299, +-32335, -32369, -32401, -32433, -32463, +-32491, -32518, -32544, -32568, -32591, +-32613, -32633, -32652, -32669, -32685, +-32700, -32713, -32724, -32735, -32744, +-32751, -32757, -32762, -32766, -32767, +32767, 32764, 32755, 32741, 32720, +32694, 32663, 32626, 32583, 32535, +32481, 32421, 32356, 32286, 32209, +32128, 32041, 31948, 31850, 31747, +31638, 31523, 31403, 31278, 31148, +31012, 30871, 30724, 30572, 30415, +30253, 30086, 29913, 29736, 29553, +29365, 29172, 28974, 28771, 28564, +28351, 28134, 27911, 27684, 27452, +27216, 26975, 26729, 26478, 26223, +25964, 25700, 25432, 25159, 24882, +24601, 24315, 24026, 23732, 23434, +23133, 22827, 22517, 22204, 21886, +21565, 21240, 20912, 20580, 20244, +19905, 19563, 19217, 18868, 18516, +18160, 17802, 17440, 17075, 16708, +16338, 15964, 15588, 15210, 14829, +14445, 14059, 13670, 13279, 12886, +12490, 12093, 11693, 11291, 10888, +10482, 10075, 9666, 9255, 8843, +8429, 8014, 7597, 7180, 6760, +6340, 5919, 5496, 5073, 4649, +4224, 3798, 3372, 2945, 2517, +2090, 1661, 1233, 804, 375, +-54, -483, -911, -1340, -1768, +-2197, -2624, -3052, -3479, -3905, +-4330, -4755, -5179, -5602, -6024, +-6445, -6865, -7284, -7702, -8118, +-8533, -8946, -9358, -9768, -10177, +-10584, -10989, -11392, -11793, -12192, +-12589, -12984, -13377, -13767, -14155, +-14541, -14924, -15305, -15683, -16058, +-16430, -16800, -17167, -17531, -17892, +-18249, -18604, -18956, -19304, -19649, +-19990, -20329, -20663, -20994, -21322, +-21646, -21966, -22282, -22595, -22904, +-23208, -23509, -23806, -24099, -24387, +-24672, -24952, -25228, -25499, -25766, +-26029, -26288, -26541, -26791, -27035, +-27275, -27511, -27741, -27967, -28188, +-28405, -28616, -28823, -29024, -29221, +-29412, -29599, -29780, -29957, -30128, +-30294, -30455, -30611, -30761, -30906, +-31046, -31181, -31310, -31434, -31552, +-31665, -31773, -31875, -31972, -32063, +-32149, -32229, -32304, -32373, -32437, +-32495, -32547, -32594, -32635, -32671, +-32701, -32726, -32745, -32758, -32766, +32767, 32754, 32717, 32658, 32577, +32473, 32348, 32200, 32029, 31837, +31624, 31388, 31131, 30853, 30553, +30232, 29891, 29530, 29148, 28746, +28324, 27883, 27423, 26944, 26447, +25931, 25398, 24847, 24279, 23695, +23095, 22478, 21846, 21199, 20538, +19863, 19174, 18472, 17757, 17030, +16291, 15541, 14781, 14010, 13230, +12441, 11643, 10837, 10024, 9204, +8377, 7545, 6708, 5866, 5020, +4171, 3319, 2464, 1608, 751, +-107, -965, -1822, -2678, -3532, +-4383, -5232, -6077, -6918, -7754, +-8585, -9409, -10228, -11039, -11843, +-12639, -13426, -14204, -14972, -15730, +-16477, -17213, -17937, -18648, -19347, +-20033, -20705, -21363, -22006, -22634, +-23246, -23843, -24423, -24986, -25533, +-26062, -26573, -27066, -27540, -27995, +-28431, -28848, -29245, -29622, -29979, +-30315, -30630, -30924, -31197, -31449, +-31679, -31887, -32074, -32239, -32381, +-32501, -32600, -32675, -32729, -32759, +}; #endif static const CELTMode mode48000_960_120 ICONST_ATTR = { diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c index af991bb052..b047b22774 100644 --- a/lib/rbcodec/codecs/libopus/celt/vq.c +++ b/lib/rbcodec/codecs/libopus/celt/vq.c @@ -37,19 +37,27 @@ #include "os_support.h" #include "bands.h" #include "rate.h" +#include "pitch.h" +#if defined(MIPSr1_ASM) +#include "mips/vq_mipsr1.h" +#endif + +#ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; + opus_val16 ms; celt_norm *Xptr; Xptr = X; + ms = NEG16(s); for (i=0;i=0;i--) @@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } } +#endif /* OVERRIDE_vq_exp_rotation1 */ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { @@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int } /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for extract_collapse_mask().*/ - len /= stride; + len = celt_udiv(len, stride); for (i=0;i0, "alg_unquant() needs at least one pulse"); celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); -/* ALLOC(iy, N, int); */ - decode_pulses(iy, N, K, dec); - Ryy = 0; - i=0; - do { - Ryy = MAC16_16(Ryy, iy[i], iy[i]); - } while (++i < N); + ALLOC(iy, N, int); + Ryy = decode_pulses(iy, N, K, dec); normalise_residual(iy, X, N, Ryy, gain); exp_rotation(X, N, -1, B, K, spread); collapse_mask = extract_collapse_mask(iy, N, B); -/* RESTORE_STACK; */ + RESTORE_STACK; return collapse_mask; } +#ifndef OVERRIDE_renormalise_vector void renormalise_vector(celt_norm *X, int N, opus_val16 gain) { int i; #ifdef FIXED_POINT int k; #endif - opus_val32 E = EPSILON; + opus_val32 E; opus_val16 g; opus_val32 t; - celt_norm *xptr = X; - for (i=0;i>1; #endif @@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) } /*return celt_sqrt(E);*/ } +#endif /* OVERRIDE_renormalise_vector */ -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) { int i; int itheta; @@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) Eside = MAC16_16(Eside, s, s); } } else { - for (i=0;i 4 diff --git a/lib/rbcodec/codecs/libopus/opus.c b/lib/rbcodec/codecs/libopus/opus.c index 989c6537cb..0526f8b414 100644 --- a/lib/rbcodec/codecs/libopus/opus.c +++ b/lib/rbcodec/codecs/libopus/opus.c @@ -168,6 +168,27 @@ static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *siz } } +int opus_packet_get_samples_per_frame(const unsigned char *data, + opus_int32 Fs) +{ + int audiosize; + if (data[0]&0x80) + { + audiosize = ((data[0]>>3)&0x3); + audiosize = (Fs<>3)&0x3); + if (audiosize == 3) + audiosize = Fs*60/1000; + else + audiosize = (Fs< 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} -#endif - int opus_decoder_get_size(int channels) { @@ -222,7 +216,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, VARDECL(opus_val16, pcm_transition_silk); int pcm_transition_celt_size; VARDECL(opus_val16, pcm_transition_celt); - opus_val16 *pcm_transition = NULL; /* Silence false positive "may be used uninitialized" warning */ + opus_val16 *pcm_transition=NULL; int redundant_audio_size; VARDECL(opus_val16, redundant_audio); @@ -237,6 +231,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int F2_5, F5, F10, F20; const opus_val16 *window; opus_uint32 redundant_rng = 0; + int celt_accum; ALLOC_STACK; silk_dec = (char*)st+st->silk_dec_offset; @@ -302,6 +297,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } + /* In fixed-point, we can tell CELT to do the accumulation on top of the + SILK PCM buffer. This saves some stack space. */ +#ifdef FIXED_POINT + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); +#else + celt_accum = 0; +#endif + pcm_transition_silk_size = ALLOC_NONE; pcm_transition_celt_size = ALLOC_NONE; if (data!=NULL && st->prev_mode > 0 && ( @@ -332,14 +335,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) { int lost_flag, decoded_samples; - opus_int16 *pcm_ptr = pcm_silk; + opus_int16 *pcm_ptr; +#ifdef FIXED_POINT + if (celt_accum) + pcm_ptr = pcm; + else +#endif + pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); @@ -469,7 +478,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL); + redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } @@ -484,25 +493,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec); + len, pcm, celt_frame_size, &dec, celt_accum); } else { unsigned char silence[2] = {0xFF, 0xFF}; - for (i=0;ichannels;i++) - pcm[i] = 0; + if (!celt_accum) + { + for (i=0;ichannels;i++) + pcm[i] = 0; + } /* For hybrid -> SILK transitions, we let the CELT MDCT do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } - if (mode != MODE_CELT_ONLY) + if (mode != MODE_CELT_ONLY && !celt_accum) { #ifdef FIXED_POINT for (i=0;ichannels;i++) - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); #else for (i=0;ichannels;i++) pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); @@ -521,7 +533,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); @@ -717,6 +729,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, { VARDECL(opus_int16, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -724,6 +737,14 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data, RESTORE_STACK; return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, opus_int16); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); @@ -744,6 +765,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, { VARDECL(float, out); int ret, i; + int nb_samples; ALLOC_STACK; if(frame_size<=0) @@ -752,6 +774,14 @@ int opus_decode(OpusDecoder *st, const unsigned char *data, return OPUS_BAD_ARG; } + if (data != NULL && len > 0 && !decode_fec) + { + nb_samples = opus_decoder_get_nb_samples(st, data, len); + if (nb_samples>0) + frame_size = IMIN(frame_size, nb_samples); + else + return OPUS_INVALID_PACKET; + } ALLOC(out, frame_size*st->channels, float); ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); @@ -911,27 +941,6 @@ int opus_packet_get_bandwidth(const unsigned char *data) return bandwidth; } -int opus_packet_get_samples_per_frame(const unsigned char *data, - opus_int32 Fs) -{ - int audiosize; - if (data[0]&0x80) - { - audiosize = ((data[0]>>3)&0x3); - audiosize = (Fs<>3)&0x3); - if (audiosize == 3) - audiosize = Fs*60/1000; - else - audiosize = (Fs<Fs value passed to opus_encoder_init() - * or opus_decoder_init(). - * @param[out] x opus_int32 *: Sampling rate of encoder or decoder. - * @hideinitializer - */ -#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) - /** Gets the total samples of delay added by the entire codec. * This can be queried by the encoder and then the provided number of samples can be * skipped on from the start of the decoder's output to provide time aligned input @@ -545,11 +537,6 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) -/** Gets the duration (in samples) of the last packet successfully decoded or concealed. - * @param[out] x opus_int32 *: Number of samples (at current sampling rate). - * @hideinitializer */ -#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) - /** Configures the encoder's use of variable duration frames. * When variable duration is enabled, the encoder is free to use a shorter frame * size than the one requested in the opus_encode*() call. @@ -649,18 +636,6 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) -/** Gets the pitch of the last decoded frame, if available. - * This can be used for any post-processing algorithm requiring the use of pitch, - * e.g. time stretching/shortening. If the last frame was not voiced, or if the - * pitch was not coded in the frame, then zero is returned. - * - * This CTL is only implemented for decoder instances. - * - * @param[out] x opus_int32 *: pitch period at 48 kHz (or 0 if not available) - * - * @hideinitializer */ -#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) - /** Gets the encoder's configured bandpass or the decoder's last bandpass. * @see OPUS_SET_BANDWIDTH * @param[out] x opus_int32 *: Returns one of the following values: @@ -675,6 +650,14 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) +/** Gets the sampling rate the encoder or decoder was initialized with. + * This simply returns the Fs value passed to opus_encoder_init() + * or opus_decoder_init(). + * @param[out] x opus_int32 *: Sampling rate of encoder or decoder. + * @hideinitializer + */ +#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) + /**@}*/ /** @defgroup opus_decoderctls Decoder related CTLs @@ -699,6 +682,23 @@ extern "C" { * @hideinitializer */ #define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) +/** Gets the duration (in samples) of the last packet successfully decoded or concealed. + * @param[out] x opus_int32 *: Number of samples (at current sampling rate). + * @hideinitializer */ +#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) + +/** Gets the pitch of the last decoded frame, if available. + * This can be used for any post-processing algorithm requiring the use of pitch, + * e.g. time stretching/shortening. If the last frame was not voiced, or if the + * pitch was not coded in the frame, then zero is returned. + * + * This CTL is only implemented for decoder instances. + * + * @param[out] x opus_int32 *: pitch period at 48 kHz (or 0 if not available) + * + * @hideinitializer */ +#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) + /**@}*/ /** @defgroup opus_libinfo Opus library information functions diff --git a/lib/rbcodec/codecs/libopus/opus_private.h b/lib/rbcodec/codecs/libopus/opus_private.h index 83225f2b6c..3177f52442 100644 --- a/lib/rbcodec/codecs/libopus/opus_private.h +++ b/lib/rbcodec/codecs/libopus/opus_private.h @@ -86,10 +86,6 @@ typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); -int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, - int bitrate, opus_val16 tonality, float *mem, int buffering, - downmix_func downmix); - int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); @@ -104,7 +100,8 @@ opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, - const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, + int analysis_channels, downmix_func downmix, int float_api); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, diff --git a/lib/rbcodec/codecs/libopus/silk/CNG.c b/lib/rbcodec/codecs/libopus/silk/CNG.c index 8481d95dbe..bb30a7ccf2 100644 --- a/lib/rbcodec/codecs/libopus/silk/CNG.c +++ b/lib/rbcodec/codecs/libopus/silk/CNG.c @@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. /* Generates excitation for CNG LPC synthesis */ static OPUS_INLINE void silk_CNG_exc( - opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ + opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ opus_int32 Gain_Q16, /* I Gain to apply */ opus_int length, /* I Length */ @@ -55,7 +55,7 @@ static OPUS_INLINE void silk_CNG_exc( idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); silk_assert( idx >= 0 ); silk_assert( idx <= CNG_BUF_MASK_MAX ); - residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); + exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); } *rand_seed = seed; } @@ -85,7 +85,7 @@ void silk_CNG( ) { opus_int i, subfr; - opus_int32 sum_Q6, max_Gain_Q16; + opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; opus_int16 A_Q12[ MAX_LPC_ORDER ]; silk_CNG_struct *psCNG = &psDec->sCNG; SAVE_STACK; @@ -125,11 +125,20 @@ void silk_CNG( /* Add CNG when packet is lost or during DTX */ if( psDec->lossCnt ) { VARDECL( opus_int32, CNG_sig_Q10 ); - ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); /* Generate CNG excitation */ - silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); + gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); + if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) { + gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 ); + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 ); + } else { + gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 ); + gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); + gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); + } + silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); /* Convert CNG NLSF to filter representation */ silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); @@ -162,7 +171,7 @@ void silk_CNG( /* Update states */ CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); - frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) ); + frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); } silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); } else { diff --git a/lib/rbcodec/codecs/libopus/silk/PLC.c b/lib/rbcodec/codecs/libopus/silk/PLC.c index 01f40014c4..8b0a8fe57d 100644 --- a/lib/rbcodec/codecs/libopus/silk/PLC.c +++ b/lib/rbcodec/codecs/libopus/silk/PLC.c @@ -165,6 +165,30 @@ static OPUS_INLINE void silk_PLC_update( psPLC->nb_subfr = psDec->nb_subfr; } +static OPUS_INLINE void silk_PLC_energy(opus_int32 *energy1, opus_int *shift1, opus_int32 *energy2, opus_int *shift2, + const opus_int32 *exc_Q14, const opus_int32 *prevGain_Q10, int subfr_length, int nb_subfr) +{ + int i, k; + VARDECL( opus_int16, exc_buf ); + opus_int16 *exc_buf_ptr; + SAVE_STACK; + ALLOC( exc_buf, 2*subfr_length, opus_int16 ); + /* Find random noise component */ + /* Scale previous excitation signal */ + exc_buf_ptr = exc_buf; + for( k = 0; k < 2; k++ ) { + for( i = 0; i < subfr_length; i++ ) { + exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( + silk_SMULWW( exc_Q14[ i + ( k + nb_subfr - 2 ) * subfr_length ], prevGain_Q10[ k ] ), 8 ) ); + } + exc_buf_ptr += subfr_length; + } + /* Find the subframe with lowest energy of the last two and use that as random noise generator */ + silk_sum_sqr_shift( energy1, shift1, exc_buf, subfr_length ); + silk_sum_sqr_shift( energy2, shift2, &exc_buf[ subfr_length ], subfr_length ); + RESTORE_STACK; +} + static OPUS_INLINE void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ @@ -177,19 +201,26 @@ static OPUS_INLINE void silk_PLC_conceal( opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; opus_int32 LPC_pred_Q10, LTP_pred_Q12; opus_int16 rand_scale_Q14; - opus_int16 *B_Q14, *exc_buf_ptr; + opus_int16 *B_Q14; opus_int32 *sLPC_Q14_ptr; - VARDECL( opus_int16, exc_buf ); opus_int16 A_Q12[ MAX_LPC_ORDER ]; +#ifdef SMALL_FOOTPRINT + opus_int16 *sLTP; +#else VARDECL( opus_int16, sLTP ); +#endif VARDECL( opus_int32, sLTP_Q14 ); silk_PLC_struct *psPLC = &psDec->sPLC; opus_int32 prevGain_Q10[2]; SAVE_STACK; - ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 ); - ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); +#ifdef SMALL_FOOTPRINT + /* Ugly hack that breaks aliasing rules to save stack: put sLTP at the very end of sLTP_Q14. */ + sLTP = ((opus_int16*)&sLTP_Q14[psDec->ltp_mem_length + psDec->frame_length])-psDec->ltp_mem_length; +#else + ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); +#endif prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); @@ -198,19 +229,7 @@ static OPUS_INLINE void silk_PLC_conceal( silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); } - /* Find random noise component */ - /* Scale previous excitation signal */ - exc_buf_ptr = exc_buf; - for( k = 0; k < 2; k++ ) { - for( i = 0; i < psPLC->subfr_length; i++ ) { - exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( - silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) ); - } - exc_buf_ptr += psPLC->subfr_length; - } - /* Find the subframe with lowest energy of the last two and use that as random noise generator */ - silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); - silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); + silk_PLC_energy(&energy1, &shift1, &energy2, &shift2, psDec->exc_Q14, prevGain_Q10, psDec->subfr_length, psDec->nb_subfr); if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { /* First sub-frame has lowest energy */ diff --git a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h index 1b58057910..4be0985435 100644 --- a/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h +++ b/lib/rbcodec/codecs/libopus/silk/SigProc_FIX.h @@ -587,6 +587,11 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) #include "arm/SigProc_FIX_armv5e.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/sigproc_fix_mipsr1.h" +#endif + + #ifdef __cplusplus } #endif diff --git a/lib/rbcodec/codecs/libopus/silk/code_signs.c b/lib/rbcodec/codecs/libopus/silk/code_signs.c index 561043c739..6ac25cb389 100644 --- a/lib/rbcodec/codecs/libopus/silk/code_signs.c +++ b/lib/rbcodec/codecs/libopus/silk/code_signs.c @@ -76,7 +76,7 @@ void silk_encode_signs( /* Decodes signs of excitation */ void silk_decode_signs( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* I/O pulse signal */ + opus_int16 pulses[], /* I/O pulse signal */ opus_int length, /* I length of input */ const opus_int signalType, /* I Signal type */ const opus_int quantOffsetType, /* I Quantization offset type */ @@ -85,7 +85,7 @@ void silk_decode_signs( { opus_int i, j, p; opus_uint8 icdf[ 2 ]; - opus_int *q_ptr; + opus_int16 *q_ptr; const opus_uint8 *icdf_ptr; icdf[ 1 ] = 0; diff --git a/lib/rbcodec/codecs/libopus/silk/dec_API.c b/lib/rbcodec/codecs/libopus/silk/dec_API.c index 4cbcf71514..1087c6726a 100644 --- a/lib/rbcodec/codecs/libopus/silk/dec_API.c +++ b/lib/rbcodec/codecs/libopus/silk/dec_API.c @@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" #include "stack_alloc.h" +#include "os_support.h" /************************/ /* Decoder Super Struct */ @@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; opus_int16 *samplesOut1_tmp[ 2 ]; - VARDECL( opus_int16, samplesOut1_tmp_storage ); + VARDECL( opus_int16, samplesOut1_tmp_storage1 ); + VARDECL( opus_int16, samplesOut1_tmp_storage2 ); VARDECL( opus_int16, samplesOut2_tmp ); opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int16 *resample_out_ptr; @@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co silk_decoder_state *channel_state = psDec->channel_state; opus_int has_side; opus_int stereo_to_mono; + int delay_stack_alloc; SAVE_STACK; silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); @@ -196,7 +199,7 @@ opus_int silk_Decode( /* O Returns error co for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { for( n = 0; n < decControl->nChannelsInternal; n++ ) { if( channel_state[ n ].LBRR_flags[ i ] ) { - opus_int pulses[ MAX_FRAME_LENGTH ]; + opus_int16 pulses[ MAX_FRAME_LENGTH ]; opus_int condCoding; if( decControl->nChannelsInternal == 2 && n == 0 ) { @@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co psDec->channel_state[ 1 ].first_frame_after_reset = 1; } - ALLOC( samplesOut1_tmp_storage, - decControl->nChannelsInternal*( - channel_state[ 0 ].frame_length + 2 ), + /* Check if the temp buffer fits into the output PCM buffer. If it fits, + we can delay allocating the temp buffer until after the SILK peak stack + usage. We need to use a < and not a <= because of the two extra samples. */ + delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal + < decControl->API_sampleRate*decControl->nChannelsAPI; + ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE + : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), opus_int16 ); - samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; - samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage - + channel_state[ 0 ].frame_length + 2; + if ( delay_stack_alloc ) + { + samplesOut1_tmp[ 0 ] = samplesOut; + samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2; + } else { + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; + } if( lostFlag == FLAG_DECODE_NORMAL ) { has_side = !decode_only_middle; @@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co resample_out_ptr = samplesOut; } + ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc + ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ) + : ALLOC_NONE, + opus_int16 ); + if ( delay_stack_alloc ) { + OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2)); + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2; + } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { /* Resample decoded signal to API_sampleRate */ diff --git a/lib/rbcodec/codecs/libopus/silk/decode_core.c b/lib/rbcodec/codecs/libopus/silk/decode_core.c index 87fbd5de9f..af68b75da9 100644 --- a/lib/rbcodec/codecs/libopus/silk/decode_core.c +++ b/lib/rbcodec/codecs/libopus/silk/decode_core.c @@ -39,7 +39,7 @@ void silk_decode_core( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I Decoder control */ opus_int16 xq[], /* O Decoded speech */ - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ ) { opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; @@ -49,7 +49,7 @@ void silk_decode_core( opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; VARDECL( opus_int32, res_Q14 ); -/* VARDECL( opus_int32, sLPC_Q14 ); */ + VARDECL( opus_int32, sLPC_Q14 ); SAVE_STACK; silk_assert( psDec->prev_gain_Q16 != 0 ); @@ -57,8 +57,7 @@ void silk_decode_core( ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); -/* ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); */ - opus_int32 sLPC_Q14[psDec->subfr_length + MAX_LPC_ORDER]; /* worst case is 80 + 16 */ + ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; diff --git a/lib/rbcodec/codecs/libopus/silk/decode_frame.c b/lib/rbcodec/codecs/libopus/silk/decode_frame.c index abc00a3d54..6a7cffbbe0 100644 --- a/lib/rbcodec/codecs/libopus/silk/decode_frame.c +++ b/lib/rbcodec/codecs/libopus/silk/decode_frame.c @@ -47,13 +47,10 @@ opus_int silk_decode_frame( { VARDECL( silk_decoder_control, psDecCtrl ); opus_int L, mv_len, ret = 0; - VARDECL( opus_int, pulses ); SAVE_STACK; L = psDec->frame_length; ALLOC( psDecCtrl, 1, silk_decoder_control ); - ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & - ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int ); psDecCtrl->LTP_scale_Q14 = 0; /* Safety checks */ @@ -62,6 +59,9 @@ opus_int silk_decode_frame( if( lostFlag == FLAG_DECODE_NORMAL || ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) { + VARDECL( opus_int16, pulses ); + ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & + ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 ); /*********************************************/ /* Decode quantization indices of side info */ /*********************************************/ @@ -107,16 +107,16 @@ opus_int silk_decode_frame( silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); - /****************************************************************/ - /* Ensure smooth connection of extrapolated and good frames */ - /****************************************************************/ - silk_PLC_glue_frames( psDec, pOut, L ); - /************************************************/ /* Comfort noise generation / estimation */ /************************************************/ silk_CNG( psDec, psDecCtrl, pOut, L ); + /****************************************************************/ + /* Ensure smooth connection of extrapolated and good frames */ + /****************************************************************/ + silk_PLC_glue_frames( psDec, pOut, L ); + /* Update some decoder state variables */ psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; diff --git a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c index e8a87c2ab7..1e14bc37b4 100644 --- a/lib/rbcodec/codecs/libopus/silk/decode_pulses.c +++ b/lib/rbcodec/codecs/libopus/silk/decode_pulses.c @@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE. /*********************************************/ void silk_decode_pulses( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* O Excitation signal */ + opus_int16 pulses[], /* O Excitation signal */ const opus_int signalType, /* I Sigtype */ const opus_int quantOffsetType, /* I quantOffsetType */ const opus_int frame_length /* I Frame length */ @@ -44,7 +44,7 @@ void silk_decode_pulses( { opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; - opus_int *pulses_ptr; + opus_int16 *pulses_ptr; const opus_uint8 *cdf_ptr; /*********************/ @@ -84,7 +84,7 @@ void silk_decode_pulses( if( sum_pulses[ i ] > 0 ) { silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); } else { - silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) ); + silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) ); } } diff --git a/lib/rbcodec/codecs/libopus/silk/macros.h b/lib/rbcodec/codecs/libopus/silk/macros.h index 482dc3c6eb..05623b5df8 100644 --- a/lib/rbcodec/codecs/libopus/silk/macros.h +++ b/lib/rbcodec/codecs/libopus/silk/macros.h @@ -79,17 +79,24 @@ POSSIBILITY OF SUCH DAMAGE. (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) -#include "ecintrin.h" +#if defined(MIPSr1_ASM) +#include "mips/macros_mipsr1.h" +#endif +#include "ecintrin.h" +#ifndef OVERRIDE_silk_CLZ16 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) { return 32 - EC_ILOG(in16<<16|0x8000); } +#endif +#ifndef OVERRIDE_silk_CLZ32 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) { return in32 ? 32 - EC_ILOG(in32) : 32; } +#endif /* Row based */ #define matrix_ptr(Matrix_base_adr, row, column, N) \ diff --git a/lib/rbcodec/codecs/libopus/silk/main.h b/lib/rbcodec/codecs/libopus/silk/main.h index 2bdf89784d..77524f5b57 100644 --- a/lib/rbcodec/codecs/libopus/silk/main.h +++ b/lib/rbcodec/codecs/libopus/silk/main.h @@ -116,7 +116,7 @@ void silk_encode_signs( /* Decodes signs of excitation */ void silk_decode_signs( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* I/O pulse signal */ + opus_int16 pulses[], /* I/O pulse signal */ opus_int length, /* I length of input */ const opus_int signalType, /* I Signal type */ const opus_int quantOffsetType, /* I Quantization offset type */ @@ -161,7 +161,7 @@ void silk_shell_encoder( /* Shell decoder, operates on one shell code frame of 16 pulses */ void silk_shell_decoder( - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int pulses4 /* I number of pulses per pulse-subframe */ ); @@ -397,13 +397,13 @@ void silk_decode_core( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I Decoder control */ opus_int16 xq[], /* O Decoded speech */ - const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ + const opus_int16 pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ ); /* Decode quantization indices of excitation (Shell coding) */ void silk_decode_pulses( ec_dec *psRangeDec, /* I/O Compressor data structure */ - opus_int pulses[], /* O Excitation signal */ + opus_int16 pulses[], /* O Excitation signal */ const opus_int signalType, /* I Sigtype */ const opus_int quantOffsetType, /* I quantOffsetType */ const opus_int frame_length /* I Frame length */ diff --git a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c index c7b4f6ed5e..6b2b3a2e18 100644 --- a/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c +++ b/lib/rbcodec/codecs/libopus/silk/resampler_private_IIR_FIR.c @@ -72,13 +72,10 @@ void silk_resampler_private_IIR_FIR( silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; opus_int32 nSamplesIn; opus_int32 max_index_Q16, index_increment_Q16; -/* VARDECL( opus_int16, buf ); - SAVE_STACK; */ + VARDECL( opus_int16, buf ); + SAVE_STACK; -/* ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); */ - - /* worst case = 2*16*10+8 = 328 * 2 = 656bytes */ - opus_int16 buf[2 * S->batchSize + RESAMPLER_ORDER_FIR_12]; + ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); /* Copy buffered samples to start of buffer */ silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); @@ -106,5 +103,5 @@ void silk_resampler_private_IIR_FIR( /* Copy last part of filtered signal to the state for the next call */ silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); -/* RESTORE_STACK; */ + RESTORE_STACK; } diff --git a/lib/rbcodec/codecs/libopus/silk/shell_coder.c b/lib/rbcodec/codecs/libopus/silk/shell_coder.c index 9d6e1bb366..d80dd51f9e 100644 --- a/lib/rbcodec/codecs/libopus/silk/shell_coder.c +++ b/lib/rbcodec/codecs/libopus/silk/shell_coder.c @@ -60,8 +60,8 @@ static OPUS_INLINE void encode_split( #endif static OPUS_INLINE void decode_split( - opus_int *p_child1, /* O pulse amplitude of first child subframe */ - opus_int *p_child2, /* O pulse amplitude of second child subframe */ + opus_int16 *p_child1, /* O pulse amplitude of first child subframe */ + opus_int16 *p_child2, /* O pulse amplitude of second child subframe */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int p, /* I pulse amplitude of current subframe */ const opus_uint8 *shell_table /* I table of shell cdfs */ @@ -121,12 +121,12 @@ void silk_shell_encoder( /* Shell decoder, operates on one shell code frame of 16 pulses */ void silk_shell_decoder( - opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + opus_int16 *pulses0, /* O data: nonnegative pulse amplitudes */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int pulses4 /* I number of pulses per pulse-subframe */ ) { - opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; + opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; /* this function operates on one shell code frame of 16 pulses */ silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); diff --git a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c index 12514c9917..129df191d8 100644 --- a/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c +++ b/lib/rbcodec/codecs/libopus/silk/sum_sqr_shift.c @@ -53,6 +53,7 @@ void silk_sum_sqr_shift( /* Scale down */ nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); shft = 2; + i+=2; break; } } diff --git a/lib/rbcodec/codecs/opus.c b/lib/rbcodec/codecs/opus.c index 2c495aa8d0..842803aa4f 100644 --- a/lib/rbcodec/codecs/opus.c +++ b/lib/rbcodec/codecs/opus.c @@ -337,8 +337,6 @@ enum codec_status codec_run(void) param = ci->id3->elapsed; strtoffset = ci->id3->offset; - global_stack = 0; - #if defined(CPU_COLDFIRE) /* EMAC rounding is disabled because of MULT16_32_Q15, which will be inaccurate with rounding in its current incarnation */ -- cgit v1.2.3