From 9b7ec42403073ee887efc531c153e6b1b6c15bab Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Sun, 19 Jan 2014 16:31:59 +0100 Subject: Sync to upstream libopus Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58 --- lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h | 4 - lib/rbcodec/codecs/libopus/celt/arch.h | 34 +- lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h | 4 + lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h | 35 ++ lib/rbcodec/codecs/libopus/celt/bands.c | 215 ++++---- lib/rbcodec/codecs/libopus/celt/bands.h | 11 +- lib/rbcodec/codecs/libopus/celt/celt.c | 79 ++- lib/rbcodec/codecs/libopus/celt/celt.h | 11 +- lib/rbcodec/codecs/libopus/celt/celt_decoder.c | 325 ++++++----- lib/rbcodec/codecs/libopus/celt/cwrs.c | 40 +- lib/rbcodec/codecs/libopus/celt/cwrs.h | 2 +- lib/rbcodec/codecs/libopus/celt/entcode.c | 60 +++ lib/rbcodec/codecs/libopus/celt/entcode.h | 35 ++ lib/rbcodec/codecs/libopus/celt/entdec.c | 2 +- lib/rbcodec/codecs/libopus/celt/entenc.c | 2 +- lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 19 +- lib/rbcodec/codecs/libopus/celt/kiss_fft.c | 482 ++++++----------- lib/rbcodec/codecs/libopus/celt/kiss_fft.h | 15 +- lib/rbcodec/codecs/libopus/celt/mdct.c | 191 ++++--- lib/rbcodec/codecs/libopus/celt/modes.h | 8 - lib/rbcodec/codecs/libopus/celt/pitch.c | 30 +- lib/rbcodec/codecs/libopus/celt/pitch.h | 18 +- lib/rbcodec/codecs/libopus/celt/rate.c | 7 +- lib/rbcodec/codecs/libopus/celt/stack_alloc.h | 8 +- .../codecs/libopus/celt/static_modes_fixed.h | 596 +++++++++++++++------ lib/rbcodec/codecs/libopus/celt/vq.c | 69 ++- lib/rbcodec/codecs/libopus/celt/vq.h | 2 +- 27 files changed, 1388 insertions(+), 916 deletions(-) (limited to 'lib/rbcodec/codecs/libopus/celt') diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h index cc52f37c36..8ddb9adc96 100644 --- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h +++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h @@ -65,10 +65,6 @@ do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) -# define C_MUL4(m,a,b) \ - do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ - (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) - # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ (c).i = S_MUL( (c).i , s ) ; }while(0) diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h index b2d26c4ee4..035b92ff29 100644 --- a/lib/rbcodec/codecs/libopus/celt/arch.h +++ b/lib/rbcodec/codecs/libopus/celt/arch.h @@ -69,11 +69,9 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) #define IMUL32(a,b) ((a)*(b)) -#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ -#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ -#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ @@ -108,6 +106,13 @@ typedef opus_val32 celt_ener; #define SCALEIN(a) (a) #define SCALEOUT(a) (a) +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) + +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "fixed_debug.h" #else @@ -139,6 +144,22 @@ typedef float celt_sig; typedef float celt_norm; typedef float celt_ener; +#ifdef FLOAT_APPROX +/* This code should reliably detect NaN/inf even when -ffast-math is used. + Assumes IEEE 754 format. */ +static OPUS_INLINE int celt_isnan(float x) +{ + union {float f; opus_uint32 i;} in; + in.f = x; + return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; +} +#else +#ifdef __FAST_MATH__ +#error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input +#endif +#define celt_isnan(x) ((x)!=(x)) +#endif + #define Q15ONE 1.0f #define NORM_SCALING 1.f @@ -148,6 +169,10 @@ typedef float celt_ener; #define VERY_LARGE16 1e15f #define Q15_ONE ((opus_val16)1.f) +/* This appears to be the same speed as C99's fabsf() but it's more portable. */ +#define ABS16(x) ((float)fabs(x)) +#define ABS32(x) ((float)fabs(x)) + #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -186,6 +211,7 @@ typedef float celt_ener; #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) +#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11(a,b) ((a)*(b)) @@ -203,6 +229,8 @@ typedef float celt_ener; #define SCALEIN(a) ((a)*CELT_SIG_SCALE) #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) +#define SIG2WORD16(x) (x) + #endif /* !FIXED_POINT */ #ifndef GLOBAL_STACK_SIZE diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h index b690bc8cea..efb3b1896a 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h @@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) #undef MAC16_32_Q15 #define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #undef MULT32_32_Q31 diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h index 1194a7d3ec..36a6321101 100644 --- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h +++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h @@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) +/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add. + Result fits in 32 bits. */ +#undef MAC16_32_Q16 +static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q16\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b)) + /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, @@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) } #define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) +#ifdef OPUS_ARM_INLINE_MEDIA + +#undef SIG2WORD16 +static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x) +{ + celt_sig res; + __asm__( + "#SIG2WORD16\n\t" + "ssat %0, #16, %1, ASR #12\n\t" + : "=r"(res) + : "r"(x+2048) + ); + return EXTRACT16(res); +} +#define SIG2WORD16(x) (SIG2WORD16_armv6(x)) + +#endif /* OPUS_ARM_INLINE_MEDIA */ + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c index 1ad786d795..caa70163b4 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.c +++ b/lib/rbcodec/codecs/libopus/celt/bands.c @@ -93,11 +93,11 @@ static int bitexact_log2tan(int isin,int icos) #if 0 #ifdef FIXED_POINT /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize< 0) { - int shift = celt_ilog2(maxval)-10; - j=M*eBands[i]; do { - sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), - EXTRACT16(VSHR32(X[j+c*N],shift))); - } while (++jlogN[i]>>BITRES)+LM+1)>>1); + j=eBands[i]<0) + { + do { + sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), + EXTRACT16(SHR32(X[j+c*N],shift))); + } while (++jnbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); } else { @@ -151,18 +156,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #else /* FIXED_POINT */ /* Compute the amplitude (sqrt energy) in each of the bands */ -void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) { int i, c, N; const opus_int16 *eBands = m->eBands; - N = M*m->shortMdctSize; + N = m->shortMdctSize<nbEBands] = celt_sqrt(sum); /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ } @@ -192,74 +195,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel /* De-normalise the energy to produce the synthesis from the unit-energy bands */ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, - celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, + int end, int M, int downsample, int silence) { - int i, c, N; + int i, N; + int bound; + celt_sig * OPUS_RESTRICT f; + const celt_norm * OPUS_RESTRICT x; const opus_int16 *eBands = m->eBands; N = M*m->shortMdctSize; - celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); - c=0; do { - celt_sig * OPUS_RESTRICT f; - const celt_norm * OPUS_RESTRICT x; - f = freq+c*N; - x = X+c*N+M*eBands[start]; - for (i=0;inbEBands], SHL16((opus_val16)eMeans[i],6)); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); #ifndef FIXED_POINT - g = celt_exp2(lg); + g = celt_exp2(lg); #else - /* Handle the integer part of the log energy */ - shift = 16-(lg>>DB_SHIFT); - if (shift>31) - { - shift=0; - g=0; - } else { - /* Handle the fractional part. */ - g = celt_exp2_frac(lg&((1<>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<eBands[i+1]-m->eBands[i]; /* depth in 1/8 bits */ - depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<=0); + depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; #ifdef FIXED_POINT thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); @@ -352,7 +362,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas } } -static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) +static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) { int i = bandID; int j; @@ -372,25 +382,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons celt_norm r, l; l = X[j]; r = Y[j]; - X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); + X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); /* Side is not encoded, no need to calculate */ } } -static void stereo_split(celt_norm *X, celt_norm *Y, int N) +static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) { int j; for (j=0;jm->nbEBands-4) - hf_sum += 32*(tcount[1]+tcount[0])/N; + hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); sum += tmp*256; nbBands++; @@ -496,7 +505,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, if (update_hf) { if (hf_sum) - hf_sum /= C*(4-m->nbEBands+end); + hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); *hf_average = (*hf_average+hf_sum)>>1; hf_sum = *hf_average; if (*tapset_decision==2) @@ -512,7 +521,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ celt_assert(nbBands>0); /* end has to be non-zero */ - sum /= nbBands; + celt_assert(sum>=0); + sum = celt_udiv(sum, nbBands); /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; @@ -571,8 +581,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard for (j=0;j=0); + itheta = celt_udiv((opus_int32)itheta*16384, qn); if (encode && stereo) { if (itheta==0) @@ -1025,8 +1035,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, fill &= cm_mask; if (!fill) { - for (j=0;j1)) { - int j; - for (j=0;joverlap; + nbEBands = mode->nbEBands; + N = mode->shortMdctSize<shortMdctSize; + B = M; + NB = mode->shortMdctSize; shift = mode->maxLM; } else { B = 1; - N = mode->shortMdctSize<shortMdctSize<maxLM-LM; } - c=0; do { - /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + + if (CC==2&&C==1) + { + /* Copying a mono streams to two channels */ + celt_sig *freq2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ + freq2 = out_syn[1]+overlap/2; + OPUS_COPY(freq2, freq, N); for (b=0;bmdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); - } while (++cmdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + for (b=0;bmdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); + } else if (CC==1&&C==2) + { + /* Downmixing a stereo stream to mono */ + celt_sig *freq2; + freq2 = out_syn[0]+overlap/2; + denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, + downsample, silence); + /* Use the output buffer as temp array before downmixing. */ + denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, + downsample, silence); + for (i=0;imdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); + } else { + /* Normal case (mono or stereo) */ + c=0; do { + denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, + downsample, silence); + for (b=0;bmdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); + } while (++c>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + RESTORE_STACK; + return pitch_index; +} + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) { int c; int i; @@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R int nbEBands; int overlap; int start; - int downsample; int loss_count; int noise_based; const opus_int16 *eBands; - VARDECL(celt_sig, scratch); SAVE_STACK; mode = st->mode; @@ -367,14 +446,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R loss_count = st->loss_count; start = st->start; - downsample = st->downsample; noise_based = loss_count >= 5 || start != 0; - ALLOC(scratch, noise_based?N*C:N, celt_sig); if (noise_based) { /* Noise-based PLC/CNG */ - celt_sig *freq; +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif opus_uint32 seed; opus_val16 *plcLogE; int end; @@ -383,10 +463,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R end = st->end; effEnd = IMAX(start, IMIN(end, mode->effEBands)); - /* Share the interleaved signal MDCT coefficient buffer with the - deemphasis scratch buffer. */ - freq = scratch; +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[C-1]+overlap/2); +#else ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif if (loss_count >= 5) plcLogE = backgroundLogE; @@ -421,20 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } st->rng = seed; - denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<>1)); } while (++cdownsample, 0); } else { /* Pitch-based PLC */ const opus_val16 *window; @@ -445,15 +520,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R if (loss_count == 0) { - VARDECL( opus_val16, lp_pitch_buf ); - ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, - DECODE_BUFFER_SIZE, C, st->arch); - pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, - DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); - pitch_index = PLC_PITCH_LAG_MAX-pitch_index; - st->last_pitch_index = pitch_index; + st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); @@ -644,25 +711,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++cpreemph, st->preemph_memD, scratch); - st->loss_count = loss_count+1; RESTORE_STACK; } -#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ -static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ -static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; opus_int32 bits; ec_dec _dec; - VARDECL(celt_sig, freq); +#ifdef NORM_ALIASING_HACK + celt_norm *X; +#else VARDECL(celt_norm, X); +#endif VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); @@ -680,6 +745,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat int intra_ener; const int CC = st->channels; int LM, M; + int start; + int end; int effEnd; int codedBands; int alloc_trim; @@ -706,11 +773,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; + start = st->start; + end = st->end; frame_size *= st->downsample; - c=0; do { - decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - } while (++c_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; @@ -728,7 +794,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data0<0) return OPUS_INVALID_PACKET; } - st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); LM = (data0>>3)&0x3; C = 1 + ((data0>>2)&0x1); data++; @@ -755,14 +821,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return OPUS_BAD_ARG; N = M*mode->shortMdctSize; + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++cend; + effEnd = end; if (effEnd > mode->effEBands) effEnd = mode->effEBands; if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, N, LM); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -798,7 +869,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat postfilter_gain = 0; postfilter_pitch = 0; postfilter_tapset = 0; - if (st->start==0 && tell+16 <= total_bits) + if (start==0 && tell+16 <= total_bits) { if(ec_dec_bit_logp(dec, 1)) { @@ -829,11 +900,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat /* Decode the global flags (first symbols in the stream) */ intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; /* Get band energies */ - unquant_coarse_energy(mode, st->start, st->end, oldBandE, + unquant_coarse_energy(mode, start, end, oldBandE, intra_ener, dec, C, LM); ALLOC(tf_res, nbEBands, int); - tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + tf_decode(start, end, isTransient, tf_res, LM, dec); tell = ec_tell(dec); spread_decision = SPREAD_NORMAL; @@ -849,7 +920,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); - for (i=st->start;iend;i++) + for (i=start;istart, st->end, offsets, cap, + codedBands = compute_allocation(mode, start, end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0, 0); - unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); + + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++c= C*N) - X = s_X; - else - ALLOC(X, C*N, celt_norm); - quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, +#ifdef NORM_ALIASING_HACK + /* This is an ugly hack that breaks aliasing rules and would be easily broken, + but it saves almost 4kB of stack. */ + X = (celt_norm*)(out_syn[CC-1]+overlap/2); +#else + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ +#endif + + quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, len*(8<rng); @@ -911,58 +989,20 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat anti_collapse_on = ec_dec_bits(dec, 1); } - unquant_energy_finalise(mode, st->start, st->end, oldBandE, + unquant_energy_finalise(mode, start, end, oldBandE, fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); if (anti_collapse_on) anti_collapse(mode, X, collapse_masks, LM, C, N, - st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); - - /**< Interleaved signal MDCTs */ - if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) - freq = s_freq; - else - ALLOC(freq, IMAX(CC,C)*N, celt_sig); + start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); if (silence) { for (i=0;istart, effEnd, C, M); } - c=0; do { - OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); - } while (++cdownsample!=1) - bound = IMIN(bound, N/st->downsample); - for (i=bound;idownsample, silence); c=0; do { st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); @@ -989,18 +1029,14 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->postfilter_tapset_old = st->postfilter_tapset; } - if (C==1) { - for (i=0;istart;i++) + for (i=0;iend;irng = dec->rng; - /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1039,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1056,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j0) for (j=0;j0); celt_assert(_n>1); while(_n>2){ @@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ } else for(p=row[_k];p>_i;p=row[_k])_k--; _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } /*Lots of dimensions case:*/ else{ @@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ do p=CELT_PVQ_U_ROW[--_k][_n]; while(p>_i); _i-=p; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); } } _n--; @@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ k0=_k; _k=(_i+1)>>1; if(_k)_i-=2*_k-1; - *_y++=(k0-_k+s)^s; + val=(k0-_k+s)^s; + *_y++=val; + yy=MAC16_16(yy,val,val); /*_n==1*/ s=-(int)_i; - *_y=(_k+s)^s; + val=(_k+s)^s; + *_y=val; + yy=MAC16_16(yy,val,val); + return yy; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ - cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ @@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ _y: Returns the vector of pulses. _u: Must contain entries [0..._k+1] of row _n of U() on input. Its contents will be destructively modified.*/ -static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ +static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ int j; + opus_int16 val; + opus_val32 yy=0; celt_assert(_n>0); j=0; do{ @@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ while(p>_i)p=_u[--_k]; _i-=p; yj-=_k; - _y[j]=(yj+s)^s; + val=(yj+s)^s; + _y[j]=val; + yy=MAC16_16(yy,val,val); uprev(_u,_k+2,0); } while(++j<_n); + return yy; } /*Returns the index of the given combination of K elements chosen from a set @@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ +opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ VARDECL(opus_uint32,u); + int ret; SAVE_STACK; celt_assert(_k>0); ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); RESTORE_STACK; + return ret; } #endif /* SMALL_FOOTPRINT */ diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.h b/lib/rbcodec/codecs/libopus/celt/cwrs.h index 7dfbd076d1..7cd4717459 100644 --- a/lib/rbcodec/codecs/libopus/celt/cwrs.h +++ b/lib/rbcodec/codecs/libopus/celt/cwrs.h @@ -43,6 +43,6 @@ void get_required_bits(opus_int16 *bits, int N, int K, int frac); void encode_pulses(const int *_y, int N, int K, ec_enc *enc); -void decode_pulses(int *_y, int N, int K, ec_dec *dec); +opus_val32 decode_pulses(int *_y, int N, int K, ec_dec *dec); #endif /* CWRS_H */ diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c index fa5d7c7c2c..461a36dd55 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.c +++ b/lib/rbcodec/codecs/libopus/celt/entcode.c @@ -62,6 +62,27 @@ int ec_ilog(opus_uint32 _v){ } #endif +#if 1 +/* This is a faster version of ec_tell_frac() that takes advantage + of the low (1/8 bit) resolution to use just a linear function + followed by a lookup to determine the exact transition thresholds. */ +opus_uint32 ec_tell_frac(ec_ctx *_this){ + static const unsigned correction[8] = + {35733, 38967, 42495, 46340, + 50535, 55109, 60097, 65535}; + opus_uint32 nbits; + opus_uint32 r; + int l; + unsigned b; + nbits=_this->nbits_total<rng); + r=_this->rng>>(l-16); + b = (r>>12)-8; + b += r>correction[b]; + l = (l<<3)+b; + return nbits-l; +} +#else opus_uint32 ec_tell_frac(ec_ctx *_this){ opus_uint32 nbits; opus_uint32 r; @@ -91,3 +112,42 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){ } return nbits-l; } +#endif + +#ifdef USE_SMALL_DIV_TABLE +/* Result of 2^32/(2*i+1), except for i=0. */ +const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { + 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, + 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, + 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, + 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, + 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, + 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, + 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, + 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, + 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, + 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, + 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, + 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, + 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, + 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, + 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, + 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, + 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, + 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, + 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, + 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, + 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, + 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, + 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, + 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, + 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, + 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, + 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, + 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, + 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, + 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, + 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, + 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 +}; +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.h b/lib/rbcodec/codecs/libopus/celt/entcode.h index dd13e49e50..13d6c84ef0 100644 --- a/lib/rbcodec/codecs/libopus/celt/entcode.h +++ b/lib/rbcodec/codecs/libopus/celt/entcode.h @@ -34,6 +34,12 @@ # include # include "ecintrin.h" +extern const opus_uint32 SMALL_DIV_TABLE[129]; + +#ifdef OPUS_ARM_ASM +#define USE_SMALL_DIV_TABLE +#endif + /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a larger type, you can speed up the decoder by using it here.*/ typedef opus_uint32 ec_window; @@ -114,4 +120,33 @@ static OPUS_INLINE int ec_tell(ec_ctx *_this){ rounding error is in the positive direction).*/ opus_uint32 ec_tell_frac(ec_ctx *_this); +/* Tested exhaustively for all n and for 1<=d<=256 */ +static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (d>256) + return n/d; + else { + opus_uint32 t, q; + t = EC_ILOG(d&-d); + q = (opus_uint64)SMALL_DIV_TABLE[d>>t]*(n>>(t-1))>>32; + return q+(n-q*d >= d); + } +#else + return n/d; +#endif +} + +static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { + celt_assert(d>0); +#ifdef USE_SMALL_DIV_TABLE + if (n<0) + return -(opus_int32)celt_udiv(-n, d); + else + return celt_udiv(n, d); +#else + return n/d; +#endif +} + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.c b/lib/rbcodec/codecs/libopus/celt/entdec.c index 3c264685c2..0b3433ed8b 100644 --- a/lib/rbcodec/codecs/libopus/celt/entdec.c +++ b/lib/rbcodec/codecs/libopus/celt/entdec.c @@ -138,7 +138,7 @@ void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ unsigned ec_decode(ec_dec *_this,unsigned _ft){ unsigned s; - _this->ext=_this->rng/_ft; + _this->ext=celt_udiv(_this->rng,_ft); s=(unsigned)(_this->val/_this->ext); return _ft-EC_MINI(s+1,_ft); } diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c index a7e34ecef9..271e4d30c5 100644 --- a/lib/rbcodec/codecs/libopus/celt/entenc.c +++ b/lib/rbcodec/codecs/libopus/celt/entenc.c @@ -127,7 +127,7 @@ void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ opus_uint32 r; - r=_this->rng/_ft; + r=celt_udiv(_this->rng,_ft); if(_fl>0){ _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); _this->rng=IMUL32(r,(_fh-_fl)); diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index ecf018a244..ac67d37ce8 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h @@ -113,7 +113,11 @@ /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ -#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) +#define MAC16_32_Q15(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +/** 16x32 multiplication, followed by a 16-bit shift right and 32-bit add. + Results fits in 32 bits */ +#define MAC16_32_Q16(c,a,b) ADD32((c),ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) @@ -131,4 +135,17 @@ /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) +#if defined(MIPSr1_ASM) +#include "mips/fixed_generic_mipsr1.h" +#endif + +static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x) +{ + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +} +#define SIG2WORD16(x) (SIG2WORD16_generic(x)) + #endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c index e2b8f3b3da..833ef5a71f 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c @@ -45,73 +45,62 @@ complex numbers. It also delares the kf_ internal functions. */ -#if 0 static void kf_bfly2( kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, int m, - int N, - int mm + int N ) { kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jr = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); - C_MUL (t, *Fout2 , *tw1); - tw1 += fstride; + Fout2 = Fout + 1; + t = *Fout2; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; + Fout += 2; } - } -} + } else #endif - -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for(j=0;jtwiddles; - for (j=0;jr = PSHR32(Fout->r, 2); - Fout->i = PSHR32(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; + kiss_fft_cpx scratch0, scratch1; + + C_SUB( scratch0 , *Fout, Fout[2] ); + C_ADDTO(*Fout, Fout[2]); + C_ADD( scratch1 , Fout[1] , Fout[3] ); + C_SUB( Fout[2], *Fout, scratch1 ); + C_ADDTO( *Fout , scratch1 ); + C_SUB( scratch1 , Fout[1] , Fout[3] ); + + Fout[1].r = scratch0.r + scratch1.i; + Fout[1].i = scratch0.i - scratch1.r; + Fout[3].r = scratch0.r - scratch1.i; + Fout[3].i = scratch0.i + scratch1.r; + Fout+=4; } - } -} -#endif - -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;itwiddles; - for (j=0;jtwiddles; + /* m is guaranteed to be a multiple of 4. */ + for (j=0;jtwiddles[fstride*m]; +#endif for (i=0;itwiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); @@ -259,59 +229,9 @@ static void kf_bfly3( } while(--k); } } -#endif - -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;itwiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - ++Fout; - }while(--k); - } -} -#if 0 +#ifndef OVERRIDE_kf_bfly5 static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, @@ -324,13 +244,19 @@ static void kf_bfly5( kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int i, u; kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; const kiss_twiddle_cpx *tw; kiss_twiddle_cpx ya,yb; kiss_fft_cpx * Fout_beg = Fout; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; +#ifdef FIXED_POINT + ya.r = 10126; + ya.i = -31164; + yb.r = -26510; + yb.i = -19261; +#else + ya = st->twiddles[fstride*m]; + yb = st->twiddles[fstride*2*m]; +#endif tw=st->twiddles; for (i=0;itwiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; +#endif /* OVERRIDE_kf_bfly5 */ - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;ir += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -496,6 +355,9 @@ static int kf_factor(int n,opus_int16 * facbuf) { int p=4; + int i; + int stages=0; + int nbak = n; /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -517,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf) { return 0; } - *facbuf++ = p; - *facbuf++ = n; + facbuf[2*stages] = p; + if (p==2 && stages > 1) + { + facbuf[2*stages] = 4; + facbuf[2] = 2; + } + stages++; } while (n > 1); + n = nbak; + /* Reverse the order to get the radix 4 at the end, so we can use the + fast degenerate case. It turns out that reversing the order also + improves the noise behaviour. */ + for (i=0;infft=nfft; -#ifndef FIXED_POINT +#ifdef FIXED_POINT + st->scale_shift = celt_ilog2(st->nfft); + if (st->nfft == 1<scale_shift) + st->scale = Q15ONE; + else + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); +#else st->scale = 1.f/nfft; #endif if (base != NULL) { st->twiddles = base->twiddles; st->shift = 0; - while (nfft<shift != base->nfft && st->shift < 32) + while (st->shift < 32 && nfft<shift != base->nfft) st->shift++; if (st->shift>=32) goto fail; @@ -614,8 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg) #endif /* CUSTOM_MODES */ -#if 0 -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) { int m2, m; int p; @@ -627,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou /* st->shift can be -1 */ shift = st->shift>0 ? st->shift : 0; - celt_assert2 (fin != fout, "In-place FFT not supported"); - /* Bit-reverse the input */ - for (i=0;infft;i++) - { - fout[st->bitrev[i]] = fin[i]; -#ifndef FIXED_POINT - fout[st->bitrev[i]].r *= st->scale; - fout[st->bitrev[i]].i *= st->scale; -#endif - } - fstride[0] = 1; L=0; do { @@ -656,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou switch (st->factors[2*i]) { case 2: - kf_bfly2(fout,fstride[i]<scale_shift-1; +#endif + scale = st->scale; - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; celt_assert2 (fin != fout, "In-place FFT not supported"); /* Bit-reverse the input */ for (i=0;infft;i++) - fout[st->bitrev[i]] = fin[i]; - - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); } + opus_fft_impl(st, fout); } +#endif + +#ifdef TEST_UNIT_DFT_C +void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;infft;i++) + fout[st->bitrev[i]] = fin[i]; + for (i=0;infft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;infft;i++) + fout[i].i = -fout[i].i; +} +#endif diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h index 66cf1f2126..390b54d948 100644 --- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h +++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h @@ -79,8 +79,9 @@ typedef struct { typedef struct kiss_fft_state{ int nfft; -#ifndef FIXED_POINT - kiss_fft_scalar scale; + opus_val16 scale; +#ifdef FIXED_POINT + int scale_shift; #endif int shift; opus_int16 factors[2*MAXFACTORS]; @@ -128,14 +129,10 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); f[k].r and f[k].i * */ void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); -#if defined(CPU_COLDFIRE) -#define IFFT_ICODE ICODE_ATTR -#else -#define IFFT_ICODE -#endif - -void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) IFFT_ICODE; +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); +void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); void opus_fft_free(const kiss_fft_state *cfg); diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c index 72ea180568..7fa8eaf6bf 100644 --- a/lib/rbcodec/codecs/libopus/celt/mdct.c +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c @@ -53,18 +53,20 @@ #include "mathops.h" #include "stack_alloc.h" +#if defined(MIPSr1_ASM) +#include "mips/mdct_mipsr1.h" +#endif + + #ifdef CUSTOM_MODES int clt_mdct_init(mdct_lookup *l,int N, int maxshift) { int i; - int N4; kiss_twiddle_scalar *trig; -#if defined(FIXED_POINT) + int shift; int N2=N>>1; -#endif l->n = N; - N4 = N>>2; l->maxshift = maxshift; for (i=0;i<=maxshift;i++) { @@ -77,17 +79,28 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift) return 0; #endif } - l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); if (l->trig==NULL) return 0; - /* We have enough points that sine isn't necessary */ + for (shift=0;shift<=maxshift;shift++) + { + /* We have enough points that sine isn't necessary */ #if defined(FIXED_POINT) - for (i=0;i<=N4;i++) - trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); +#if 1 + for (i=0;i>= 1; + N >>= 1; + } return 1; } @@ -103,27 +116,37 @@ void clt_mdct_clear(mdct_lookup *l) #if 0 /* Forward MDCT trashes the input array */ +#ifndef OVERRIDE_clt_mdct_forward void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); - VARDECL(kiss_fft_scalar, f2); + VARDECL(kiss_fft_cpx, f2); + const kiss_fft_state *st = l->kfft[shift]; + const kiss_twiddle_scalar *trig; + opus_val16 scale; +#ifdef FIXED_POINT + /* Allows us to scale with MULT16_32_Q16(), which is faster than + MULT16_32_Q15() on ARM. */ + int scale_shift = st->scale_shift-1; +#endif SAVE_STACK; + scale = st->scale; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); - ALLOC(f2, N2, kiss_fft_scalar); - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif + ALLOC(f2, N4, kiss_fft_cpx); /* Consider the input to be composed of four blocks: [a, b, c, d] */ /* Window, shuffle, fold */ @@ -168,125 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar /* Pre-rotation */ { kiss_fft_scalar * OPUS_RESTRICT yp = f; - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; for(i=0;ibitrev[i]] = yc; } } - /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + /* N/4 complex FFT, does not downscale anymore */ + opus_fft_impl(st, f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + const kiss_fft_cpx * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); - const kiss_twiddle_scalar *t = &l->trig[0]; + const kiss_twiddle_scalar *t = &trig[0]; /* Temp pointers to make it really clear to the compiler what we're doing */ for(i=0;ii,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); + *yp1 = yr; + *yp2 = yi; + fp++; yp1 += 2*stride; yp2 -= 2*stride; } } RESTORE_STACK; } +#endif /* OVERRIDE_clt_mdct_forward */ #endif +#ifndef OVERRIDE_clt_mdct_backward void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) { int i; int N, N2, N4; - kiss_twiddle_scalar sine; -/* VARDECL(kiss_fft_scalar, f2); - SAVE_STACK; */ + const kiss_twiddle_scalar *trig; + N = l->n; - N >>= shift; + trig = l->trig; + for (i=0;i>= 1; + trig += N; + } N2 = N>>1; N4 = N>>2; -/* ALLOC(f2, N2, kiss_fft_scalar); */ - kiss_fft_scalar f2[N2]; /* worst case 3840b */ - /* sin(x) ~= x here */ -#ifdef FIXED_POINT - sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; -#else - sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; -#endif /* Pre-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); + const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; + const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; for(i=0;ikfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; - const kiss_twiddle_scalar *t = &l->trig[0]; + kiss_fft_scalar * yp0 = out+(overlap>>1); + kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &trig[0]; /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the middle pair will be computed twice. */ for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = yp0[0]; - im = yp0[1]; - t0 = t[i<overlap) -#endif - -#ifndef FRAMESIZE -#define FRAMESIZE(mode) ((mode)->mdctSize) -#endif - typedef struct { int size; const opus_int16 *index; diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c index c28857297a..ee56a434f0 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.c +++ b/lib/rbcodec/codecs/libopus/celt/pitch.c @@ -252,15 +252,15 @@ void #endif celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { - int i,j; + int i; /*The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned. Since it's hard to put asserts in assembly, put them here.*/ - celt_assert(max_pitch>0); - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); for (i=0;i>1;i++) { - opus_val32 sum=0; + opus_val32 sum; xcorr[i] = 0; if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) continue; +#ifdef FIXED_POINT + sum = 0; for (j=0;j>1;j++) sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); +#else + sum = celt_inner_prod(x_lp, y+i, len>>1); +#endif xcorr[i] = MAX32(-1, sum); #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 g1; opus_val16 cont=0; opus_val16 thresh; - T1 = (2*T0+k)/(2*k); + T1 = celt_udiv(2*T0+k, 2*k); if (T1 < minperiod) break; /* Look for another strong correlation at T1b */ @@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, T1b = T0+T1; } else { - T1b = (2*second_check[k]*T0+k)/(2*k); + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); } dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); xy += xy2; @@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, pg = SHR32(frac_div32(best_xy,best_yy+1),16); for (k=0;k<3;k++) - { - int T1 = T+k-1; - xy = 0; - for (i=0;i MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) offset = 1; else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h index df317ecc1d..96dbc0d794 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.h +++ b/lib/rbcodec/codecs/libopus/celt/pitch.h @@ -41,8 +41,12 @@ #include "x86/pitch_sse.h" #endif +#if defined(MIPSr1_ASM) +#include "mips/pitch_mipsr1.h" +#endif + #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) -# include "arm/pitch_arm.h" +//# include "arm/pitch_arm.h" #endif void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, @@ -141,6 +145,18 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y } #endif +#ifndef OVERRIDE_CELT_INNER_PROD +static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, + int N) +{ + int i; + opus_val32 xy=0; + for (i=0;ieBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); band_width = m->eBands[codedBands]-m->eBands[j]; @@ -414,7 +414,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, /* Allocate the remaining bits */ left = total-psum; - percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + percoeff = celt_udiv(left, m->eBands[codedBands]-m->eBands[start]); left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; for (j=start;jeBands[j+1]-m->eBands[j])); @@ -465,7 +465,8 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, offset += NClogN>>3; /* Divide with rounding */ - ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<>BITRES; /* Make sure not to bust */ if (C*ebits[j] > (bits[j]>>BITRES)) diff --git a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h index 316a6ce12c..2b51c8d80c 100644 --- a/lib/rbcodec/codecs/libopus/celt/stack_alloc.h +++ b/lib/rbcodec/codecs/libopus/celt/stack_alloc.h @@ -116,9 +116,11 @@ #else #ifdef CELT_C +char *scratch_ptr=0; char *global_stack=0; #else extern char *global_stack; +extern char *scratch_ptr; #endif /* CELT_C */ #ifdef ENABLE_VALGRIND @@ -140,8 +142,12 @@ extern char *global_stack_top; #define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) #define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#if 0 /* Set this to 1 to instrument pseudostack usage */ +#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack) +#else #define RESTORE_STACK (global_stack = _saved_stack) -#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; +#endif +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? (scratch_ptr=opus_alloc_scratch(GLOBAL_STACK_SIZE)) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h index 92e5fe5877..0396ce38df 100644 --- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h +++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h @@ -341,84 +341,84 @@ static const kiss_twiddle_cpx fft_twiddles48000_960[480] ICONST_ATTR = { #ifndef FFT_BITREV480 #define FFT_BITREV480 static const opus_int16 fft_bitrev480[480] = { -0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, -450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, -345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, -215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, -110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, -430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, -325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, -181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, -76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, -396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, -291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, -161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, -56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, -362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, -257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, -127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, -22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, -472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, -342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, -237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, -93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, -438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, -308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, -203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, -73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, -418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, -274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, -169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, -39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, -384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, -254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, -149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +0, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448, +8, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456, +16, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464, +24, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472, +4, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452, +12, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460, +20, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468, +28, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476, +1, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449, +9, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457, +17, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465, +25, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473, +5, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453, +13, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461, +21, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469, +29, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477, +2, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450, +10, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458, +18, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466, +26, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474, +6, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454, +14, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462, +22, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470, +30, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478, +3, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451, +11, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459, +19, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467, +27, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475, +7, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455, +15, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463, +23, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471, +31, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479, }; #endif #ifndef FFT_BITREV240 #define FFT_BITREV240 static const opus_int16 fft_bitrev240[240] = { -0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, -225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, -170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, -115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, -46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, -216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, -161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, -92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, -37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, -207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, -138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, -83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, -28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, -184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, -129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, -74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +0, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224, +4, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228, +8, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232, +12, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236, +1, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225, +5, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229, +9, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233, +13, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237, +2, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226, +6, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230, +10, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234, +14, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238, +3, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227, +7, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231, +11, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235, +15, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239, }; #endif #ifndef FFT_BITREV120 #define FFT_BITREV120 static const opus_int16 fft_bitrev120[120] = { -0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, -110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, -76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, -56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, -22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, -93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, -73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, -39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +0, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112, +4, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116, +1, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113, +5, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117, +2, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114, +6, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118, +3, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115, +7, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119, }; #endif #ifndef FFT_BITREV60 #define FFT_BITREV60 static const opus_int16 fft_bitrev60[60] = { -0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, -46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, -37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, -28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +0, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56, +1, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57, +2, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58, +3, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59, }; #endif @@ -426,8 +426,10 @@ static const opus_int16 fft_bitrev60[60] = { #define FFT_STATE48000_960_0 static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 480, /* nfft */ +17476, /* scale */ +8, /* scale_shift */ -1, /* shift */ -{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev480, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -437,8 +439,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_1 static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 240, /* nfft */ +17476, /* scale */ +7, /* scale_shift */ 1, /* shift */ -{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev240, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -448,8 +452,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_2 static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 120, /* nfft */ +17476, /* scale */ +6, /* scale_shift */ 2, /* shift */ -{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev120, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -459,8 +465,10 @@ fft_twiddles48000_960, /* bitrev */ #define FFT_STATE48000_960_3 static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 60, /* nfft */ +17476, /* scale */ +5, /* scale_shift */ 3, /* shift */ -{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ fft_bitrev60, /* bitrev */ fft_twiddles48000_960, /* bitrev */ }; @@ -470,104 +478,368 @@ fft_twiddles48000_960, /* bitrev */ #ifndef MDCT_TWIDDLES960 #define MDCT_TWIDDLES960 -static const opus_val16 mdct_twiddles960[481] ICONST_ATTR = { -32767, 32767, 32767, 32767, 32766, -32763, 32762, 32759, 32757, 32753, -32751, 32747, 32743, 32738, 32733, -32729, 32724, 32717, 32711, 32705, -32698, 32690, 32683, 32676, 32667, -32658, 32650, 32640, 32631, 32620, -32610, 32599, 32588, 32577, 32566, -32554, 32541, 32528, 32515, 32502, -32487, 32474, 32459, 32444, 32429, -32413, 32397, 32381, 32364, 32348, -32331, 32313, 32294, 32277, 32257, -32239, 32219, 32200, 32180, 32159, -32138, 32118, 32096, 32074, 32051, -32029, 32006, 31984, 31960, 31936, -31912, 31888, 31863, 31837, 31812, -31786, 31760, 31734, 31707, 31679, -31652, 31624, 31596, 31567, 31539, -31508, 31479, 31450, 31419, 31388, -31357, 31326, 31294, 31262, 31230, -31198, 31164, 31131, 31097, 31063, -31030, 30994, 30959, 30924, 30889, -30853, 30816, 30779, 30743, 30705, -30668, 30629, 30592, 30553, 30515, -30475, 30435, 30396, 30356, 30315, -30274, 30233, 30191, 30149, 30107, -30065, 30022, 29979, 29936, 29891, -29847, 29803, 29758, 29713, 29668, -29622, 29577, 29529, 29483, 29436, -29390, 29341, 29293, 29246, 29197, -29148, 29098, 29050, 29000, 28949, -28899, 28848, 28797, 28746, 28694, -28642, 28590, 28537, 28485, 28432, -28378, 28324, 28271, 28217, 28162, -28106, 28051, 27995, 27940, 27884, -27827, 27770, 27713, 27657, 27598, -27540, 27481, 27423, 27365, 27305, -27246, 27187, 27126, 27066, 27006, -26945, 26883, 26822, 26760, 26698, -26636, 26574, 26510, 26448, 26383, -26320, 26257, 26191, 26127, 26062, -25997, 25931, 25866, 25800, 25734, -25667, 25601, 25533, 25466, 25398, -25330, 25262, 25194, 25125, 25056, -24987, 24917, 24848, 24778, 24707, -24636, 24566, 24495, 24424, 24352, -24280, 24208, 24135, 24063, 23990, -23917, 23842, 23769, 23695, 23622, -23546, 23472, 23398, 23322, 23246, -23171, 23095, 23018, 22942, 22866, -22788, 22711, 22634, 22557, 22478, -22400, 22322, 22244, 22165, 22085, -22006, 21927, 21846, 21766, 21687, -21606, 21524, 21443, 21363, 21282, -21199, 21118, 21035, 20954, 20870, -20788, 20705, 20621, 20538, 20455, -20371, 20286, 20202, 20118, 20034, -19947, 19863, 19777, 19692, 19606, -19520, 19434, 19347, 19260, 19174, -19088, 18999, 18911, 18825, 18737, -18648, 18560, 18472, 18384, 18294, -18205, 18116, 18025, 17936, 17846, -17757, 17666, 17576, 17485, 17395, -17303, 17212, 17122, 17030, 16937, -16846, 16755, 16662, 16569, 16477, -16385, 16291, 16198, 16105, 16012, -15917, 15824, 15730, 15636, 15541, -15447, 15352, 15257, 15162, 15067, -14973, 14875, 14781, 14685, 14589, -14493, 14396, 14300, 14204, 14107, -14010, 13914, 13815, 13718, 13621, -13524, 13425, 13328, 13230, 13133, -13033, 12935, 12836, 12738, 12638, -12540, 12441, 12341, 12241, 12142, -12044, 11943, 11843, 11744, 11643, -11542, 11442, 11342, 11241, 11139, -11039, 10939, 10836, 10736, 10635, -10534, 10431, 10330, 10228, 10127, -10024, 9921, 9820, 9718, 9614, -9512, 9410, 9306, 9204, 9101, -8998, 8895, 8791, 8689, 8585, -8481, 8377, 8274, 8171, 8067, -7962, 7858, 7753, 7650, 7545, -7441, 7336, 7231, 7129, 7023, -6917, 6813, 6709, 6604, 6498, -6393, 6288, 6182, 6077, 5973, -5867, 5760, 5656, 5549, 5445, -5339, 5232, 5127, 5022, 4914, -4809, 4703, 4596, 4490, 4384, -4278, 4171, 4065, 3958, 3852, -3745, 3640, 3532, 3426, 3318, -3212, 3106, 2998, 2891, 2786, -2679, 2570, 2465, 2358, 2251, -2143, 2037, 1929, 1823, 1715, -1609, 1501, 1393, 1287, 1180, -1073, 964, 858, 751, 644, -535, 429, 322, 214, 107, -0, }; +static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { +32767, 32767, 32767, 32766, 32765, +32763, 32761, 32759, 32756, 32753, +32750, 32746, 32742, 32738, 32733, +32728, 32722, 32717, 32710, 32704, +32697, 32690, 32682, 32674, 32666, +32657, 32648, 32639, 32629, 32619, +32609, 32598, 32587, 32576, 32564, +32552, 32539, 32526, 32513, 32500, +32486, 32472, 32457, 32442, 32427, +32411, 32395, 32379, 32362, 32345, +32328, 32310, 32292, 32274, 32255, +32236, 32217, 32197, 32177, 32157, +32136, 32115, 32093, 32071, 32049, +32027, 32004, 31981, 31957, 31933, +31909, 31884, 31859, 31834, 31809, +31783, 31756, 31730, 31703, 31676, +31648, 31620, 31592, 31563, 31534, +31505, 31475, 31445, 31415, 31384, +31353, 31322, 31290, 31258, 31226, +31193, 31160, 31127, 31093, 31059, +31025, 30990, 30955, 30920, 30884, +30848, 30812, 30775, 30738, 30701, +30663, 30625, 30587, 30548, 30509, +30470, 30430, 30390, 30350, 30309, +30269, 30227, 30186, 30144, 30102, +30059, 30016, 29973, 29930, 29886, +29842, 29797, 29752, 29707, 29662, +29616, 29570, 29524, 29477, 29430, +29383, 29335, 29287, 29239, 29190, +29142, 29092, 29043, 28993, 28943, +28892, 28842, 28791, 28739, 28688, +28636, 28583, 28531, 28478, 28425, +28371, 28317, 28263, 28209, 28154, +28099, 28044, 27988, 27932, 27876, +27820, 27763, 27706, 27648, 27591, +27533, 27474, 27416, 27357, 27298, +27238, 27178, 27118, 27058, 26997, +26936, 26875, 26814, 26752, 26690, +26628, 26565, 26502, 26439, 26375, +26312, 26247, 26183, 26119, 26054, +25988, 25923, 25857, 25791, 25725, +25658, 25592, 25524, 25457, 25389, +25322, 25253, 25185, 25116, 25047, +24978, 24908, 24838, 24768, 24698, +24627, 24557, 24485, 24414, 24342, +24270, 24198, 24126, 24053, 23980, +23907, 23834, 23760, 23686, 23612, +23537, 23462, 23387, 23312, 23237, +23161, 23085, 23009, 22932, 22856, +22779, 22701, 22624, 22546, 22468, +22390, 22312, 22233, 22154, 22075, +21996, 21916, 21836, 21756, 21676, +21595, 21515, 21434, 21352, 21271, +21189, 21107, 21025, 20943, 20860, +20777, 20694, 20611, 20528, 20444, +20360, 20276, 20192, 20107, 20022, +19937, 19852, 19767, 19681, 19595, +19509, 19423, 19336, 19250, 19163, +19076, 18988, 18901, 18813, 18725, +18637, 18549, 18460, 18372, 18283, +18194, 18104, 18015, 17925, 17835, +17745, 17655, 17565, 17474, 17383, +17292, 17201, 17110, 17018, 16927, +16835, 16743, 16650, 16558, 16465, +16372, 16279, 16186, 16093, 15999, +15906, 15812, 15718, 15624, 15529, +15435, 15340, 15245, 15150, 15055, +14960, 14864, 14769, 14673, 14577, +14481, 14385, 14288, 14192, 14095, +13998, 13901, 13804, 13706, 13609, +13511, 13414, 13316, 13218, 13119, +13021, 12923, 12824, 12725, 12626, +12527, 12428, 12329, 12230, 12130, +12030, 11930, 11831, 11730, 11630, +11530, 11430, 11329, 11228, 11128, +11027, 10926, 10824, 10723, 10622, +10520, 10419, 10317, 10215, 10113, +10011, 9909, 9807, 9704, 9602, +9499, 9397, 9294, 9191, 9088, +8985, 8882, 8778, 8675, 8572, +8468, 8364, 8261, 8157, 8053, +7949, 7845, 7741, 7637, 7532, +7428, 7323, 7219, 7114, 7009, +6905, 6800, 6695, 6590, 6485, +6380, 6274, 6169, 6064, 5958, +5853, 5747, 5642, 5536, 5430, +5325, 5219, 5113, 5007, 4901, +4795, 4689, 4583, 4476, 4370, +4264, 4157, 4051, 3945, 3838, +3732, 3625, 3518, 3412, 3305, +3198, 3092, 2985, 2878, 2771, +2664, 2558, 2451, 2344, 2237, +2130, 2023, 1916, 1809, 1702, +1594, 1487, 1380, 1273, 1166, +1059, 952, 844, 737, 630, +523, 416, 308, 201, 94, +-13, -121, -228, -335, -442, +-550, -657, -764, -871, -978, +-1086, -1193, -1300, -1407, -1514, +-1621, -1728, -1835, -1942, -2049, +-2157, -2263, -2370, -2477, -2584, +-2691, -2798, -2905, -3012, -3118, +-3225, -3332, -3439, -3545, -3652, +-3758, -3865, -3971, -4078, -4184, +-4290, -4397, -4503, -4609, -4715, +-4821, -4927, -5033, -5139, -5245, +-5351, -5457, -5562, -5668, -5774, +-5879, -5985, -6090, -6195, -6301, +-6406, -6511, -6616, -6721, -6826, +-6931, -7036, -7140, -7245, -7349, +-7454, -7558, -7663, -7767, -7871, +-7975, -8079, -8183, -8287, -8390, +-8494, -8597, -8701, -8804, -8907, +-9011, -9114, -9217, -9319, -9422, +-9525, -9627, -9730, -9832, -9934, +-10037, -10139, -10241, -10342, -10444, +-10546, -10647, -10748, -10850, -10951, +-11052, -11153, -11253, -11354, -11455, +-11555, -11655, -11756, -11856, -11955, +-12055, -12155, -12254, -12354, -12453, +-12552, -12651, -12750, -12849, -12947, +-13046, -13144, -13242, -13340, -13438, +-13536, -13633, -13731, -13828, -13925, +-14022, -14119, -14216, -14312, -14409, +-14505, -14601, -14697, -14793, -14888, +-14984, -15079, -15174, -15269, -15364, +-15459, -15553, -15647, -15741, -15835, +-15929, -16023, -16116, -16210, -16303, +-16396, -16488, -16581, -16673, -16766, +-16858, -16949, -17041, -17133, -17224, +-17315, -17406, -17497, -17587, -17678, +-17768, -17858, -17948, -18037, -18127, +-18216, -18305, -18394, -18483, -18571, +-18659, -18747, -18835, -18923, -19010, +-19098, -19185, -19271, -19358, -19444, +-19531, -19617, -19702, -19788, -19873, +-19959, -20043, -20128, -20213, -20297, +-20381, -20465, -20549, -20632, -20715, +-20798, -20881, -20963, -21046, -21128, +-21210, -21291, -21373, -21454, -21535, +-21616, -21696, -21776, -21856, -21936, +-22016, -22095, -22174, -22253, -22331, +-22410, -22488, -22566, -22643, -22721, +-22798, -22875, -22951, -23028, -23104, +-23180, -23256, -23331, -23406, -23481, +-23556, -23630, -23704, -23778, -23852, +-23925, -23998, -24071, -24144, -24216, +-24288, -24360, -24432, -24503, -24574, +-24645, -24716, -24786, -24856, -24926, +-24995, -25064, -25133, -25202, -25270, +-25339, -25406, -25474, -25541, -25608, +-25675, -25742, -25808, -25874, -25939, +-26005, -26070, -26135, -26199, -26264, +-26327, -26391, -26455, -26518, -26581, +-26643, -26705, -26767, -26829, -26891, +-26952, -27013, -27073, -27133, -27193, +-27253, -27312, -27372, -27430, -27489, +-27547, -27605, -27663, -27720, -27777, +-27834, -27890, -27946, -28002, -28058, +-28113, -28168, -28223, -28277, -28331, +-28385, -28438, -28491, -28544, -28596, +-28649, -28701, -28752, -28803, -28854, +-28905, -28955, -29006, -29055, -29105, +-29154, -29203, -29251, -29299, -29347, +-29395, -29442, -29489, -29535, -29582, +-29628, -29673, -29719, -29764, -29808, +-29853, -29897, -29941, -29984, -30027, +-30070, -30112, -30154, -30196, -30238, +-30279, -30320, -30360, -30400, -30440, +-30480, -30519, -30558, -30596, -30635, +-30672, -30710, -30747, -30784, -30821, +-30857, -30893, -30929, -30964, -30999, +-31033, -31068, -31102, -31135, -31168, +-31201, -31234, -31266, -31298, -31330, +-31361, -31392, -31422, -31453, -31483, +-31512, -31541, -31570, -31599, -31627, +-31655, -31682, -31710, -31737, -31763, +-31789, -31815, -31841, -31866, -31891, +-31915, -31939, -31963, -31986, -32010, +-32032, -32055, -32077, -32099, -32120, +-32141, -32162, -32182, -32202, -32222, +-32241, -32260, -32279, -32297, -32315, +-32333, -32350, -32367, -32383, -32399, +-32415, -32431, -32446, -32461, -32475, +-32489, -32503, -32517, -32530, -32542, +-32555, -32567, -32579, -32590, -32601, +-32612, -32622, -32632, -32641, -32651, +-32659, -32668, -32676, -32684, -32692, +-32699, -32706, -32712, -32718, -32724, +-32729, -32734, -32739, -32743, -32747, +-32751, -32754, -32757, -32760, -32762, +-32764, -32765, -32767, -32767, -32767, +32767, 32767, 32765, 32761, 32756, +32750, 32742, 32732, 32722, 32710, +32696, 32681, 32665, 32647, 32628, +32608, 32586, 32562, 32538, 32512, +32484, 32455, 32425, 32393, 32360, +32326, 32290, 32253, 32214, 32174, +32133, 32090, 32046, 32001, 31954, +31906, 31856, 31805, 31753, 31700, +31645, 31588, 31530, 31471, 31411, +31349, 31286, 31222, 31156, 31089, +31020, 30951, 30880, 30807, 30733, +30658, 30582, 30504, 30425, 30345, +30263, 30181, 30096, 30011, 29924, +29836, 29747, 29656, 29564, 29471, +29377, 29281, 29184, 29086, 28987, +28886, 28784, 28681, 28577, 28471, +28365, 28257, 28147, 28037, 27925, +27812, 27698, 27583, 27467, 27349, +27231, 27111, 26990, 26868, 26744, +26620, 26494, 26367, 26239, 26110, +25980, 25849, 25717, 25583, 25449, +25313, 25176, 25038, 24900, 24760, +24619, 24477, 24333, 24189, 24044, +23898, 23751, 23602, 23453, 23303, +23152, 22999, 22846, 22692, 22537, +22380, 22223, 22065, 21906, 21746, +21585, 21423, 21261, 21097, 20933, +20767, 20601, 20434, 20265, 20096, +19927, 19756, 19584, 19412, 19239, +19065, 18890, 18714, 18538, 18361, +18183, 18004, 17824, 17644, 17463, +17281, 17098, 16915, 16731, 16546, +16361, 16175, 15988, 15800, 15612, +15423, 15234, 15043, 14852, 14661, +14469, 14276, 14083, 13889, 13694, +13499, 13303, 13107, 12910, 12713, +12515, 12317, 12118, 11918, 11718, +11517, 11316, 11115, 10913, 10710, +10508, 10304, 10100, 9896, 9691, +9486, 9281, 9075, 8869, 8662, +8455, 8248, 8040, 7832, 7623, +7415, 7206, 6996, 6787, 6577, +6366, 6156, 5945, 5734, 5523, +5311, 5100, 4888, 4675, 4463, +4251, 4038, 3825, 3612, 3399, +3185, 2972, 2758, 2544, 2330, +2116, 1902, 1688, 1474, 1260, +1045, 831, 617, 402, 188, +-27, -241, -456, -670, -885, +-1099, -1313, -1528, -1742, -1956, +-2170, -2384, -2598, -2811, -3025, +-3239, -3452, -3665, -3878, -4091, +-4304, -4516, -4728, -4941, -5153, +-5364, -5576, -5787, -5998, -6209, +-6419, -6629, -6839, -7049, -7258, +-7467, -7676, -7884, -8092, -8300, +-8507, -8714, -8920, -9127, -9332, +-9538, -9743, -9947, -10151, -10355, +-10558, -10761, -10963, -11165, -11367, +-11568, -11768, -11968, -12167, -12366, +-12565, -12762, -12960, -13156, -13352, +-13548, -13743, -13937, -14131, -14324, +-14517, -14709, -14900, -15091, -15281, +-15470, -15659, -15847, -16035, -16221, +-16407, -16593, -16777, -16961, -17144, +-17326, -17508, -17689, -17869, -18049, +-18227, -18405, -18582, -18758, -18934, +-19108, -19282, -19455, -19627, -19799, +-19969, -20139, -20308, -20475, -20642, +-20809, -20974, -21138, -21301, -21464, +-21626, -21786, -21946, -22105, -22263, +-22420, -22575, -22730, -22884, -23037, +-23189, -23340, -23490, -23640, -23788, +-23935, -24080, -24225, -24369, -24512, +-24654, -24795, -24934, -25073, -25211, +-25347, -25482, -25617, -25750, -25882, +-26013, -26143, -26272, -26399, -26526, +-26651, -26775, -26898, -27020, -27141, +-27260, -27379, -27496, -27612, -27727, +-27841, -27953, -28065, -28175, -28284, +-28391, -28498, -28603, -28707, -28810, +-28911, -29012, -29111, -29209, -29305, +-29401, -29495, -29587, -29679, -29769, +-29858, -29946, -30032, -30118, -30201, +-30284, -30365, -30445, -30524, -30601, +-30677, -30752, -30825, -30897, -30968, +-31038, -31106, -31172, -31238, -31302, +-31365, -31426, -31486, -31545, -31602, +-31658, -31713, -31766, -31818, -31869, +-31918, -31966, -32012, -32058, -32101, +-32144, -32185, -32224, -32262, -32299, +-32335, -32369, -32401, -32433, -32463, +-32491, -32518, -32544, -32568, -32591, +-32613, -32633, -32652, -32669, -32685, +-32700, -32713, -32724, -32735, -32744, +-32751, -32757, -32762, -32766, -32767, +32767, 32764, 32755, 32741, 32720, +32694, 32663, 32626, 32583, 32535, +32481, 32421, 32356, 32286, 32209, +32128, 32041, 31948, 31850, 31747, +31638, 31523, 31403, 31278, 31148, +31012, 30871, 30724, 30572, 30415, +30253, 30086, 29913, 29736, 29553, +29365, 29172, 28974, 28771, 28564, +28351, 28134, 27911, 27684, 27452, +27216, 26975, 26729, 26478, 26223, +25964, 25700, 25432, 25159, 24882, +24601, 24315, 24026, 23732, 23434, +23133, 22827, 22517, 22204, 21886, +21565, 21240, 20912, 20580, 20244, +19905, 19563, 19217, 18868, 18516, +18160, 17802, 17440, 17075, 16708, +16338, 15964, 15588, 15210, 14829, +14445, 14059, 13670, 13279, 12886, +12490, 12093, 11693, 11291, 10888, +10482, 10075, 9666, 9255, 8843, +8429, 8014, 7597, 7180, 6760, +6340, 5919, 5496, 5073, 4649, +4224, 3798, 3372, 2945, 2517, +2090, 1661, 1233, 804, 375, +-54, -483, -911, -1340, -1768, +-2197, -2624, -3052, -3479, -3905, +-4330, -4755, -5179, -5602, -6024, +-6445, -6865, -7284, -7702, -8118, +-8533, -8946, -9358, -9768, -10177, +-10584, -10989, -11392, -11793, -12192, +-12589, -12984, -13377, -13767, -14155, +-14541, -14924, -15305, -15683, -16058, +-16430, -16800, -17167, -17531, -17892, +-18249, -18604, -18956, -19304, -19649, +-19990, -20329, -20663, -20994, -21322, +-21646, -21966, -22282, -22595, -22904, +-23208, -23509, -23806, -24099, -24387, +-24672, -24952, -25228, -25499, -25766, +-26029, -26288, -26541, -26791, -27035, +-27275, -27511, -27741, -27967, -28188, +-28405, -28616, -28823, -29024, -29221, +-29412, -29599, -29780, -29957, -30128, +-30294, -30455, -30611, -30761, -30906, +-31046, -31181, -31310, -31434, -31552, +-31665, -31773, -31875, -31972, -32063, +-32149, -32229, -32304, -32373, -32437, +-32495, -32547, -32594, -32635, -32671, +-32701, -32726, -32745, -32758, -32766, +32767, 32754, 32717, 32658, 32577, +32473, 32348, 32200, 32029, 31837, +31624, 31388, 31131, 30853, 30553, +30232, 29891, 29530, 29148, 28746, +28324, 27883, 27423, 26944, 26447, +25931, 25398, 24847, 24279, 23695, +23095, 22478, 21846, 21199, 20538, +19863, 19174, 18472, 17757, 17030, +16291, 15541, 14781, 14010, 13230, +12441, 11643, 10837, 10024, 9204, +8377, 7545, 6708, 5866, 5020, +4171, 3319, 2464, 1608, 751, +-107, -965, -1822, -2678, -3532, +-4383, -5232, -6077, -6918, -7754, +-8585, -9409, -10228, -11039, -11843, +-12639, -13426, -14204, -14972, -15730, +-16477, -17213, -17937, -18648, -19347, +-20033, -20705, -21363, -22006, -22634, +-23246, -23843, -24423, -24986, -25533, +-26062, -26573, -27066, -27540, -27995, +-28431, -28848, -29245, -29622, -29979, +-30315, -30630, -30924, -31197, -31449, +-31679, -31887, -32074, -32239, -32381, +-32501, -32600, -32675, -32729, -32759, +}; #endif static const CELTMode mode48000_960_120 ICONST_ATTR = { diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c index af991bb052..b047b22774 100644 --- a/lib/rbcodec/codecs/libopus/celt/vq.c +++ b/lib/rbcodec/codecs/libopus/celt/vq.c @@ -37,19 +37,27 @@ #include "os_support.h" #include "bands.h" #include "rate.h" +#include "pitch.h" +#if defined(MIPSr1_ASM) +#include "mips/vq_mipsr1.h" +#endif + +#ifndef OVERRIDE_vq_exp_rotation1 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) { int i; + opus_val16 ms; celt_norm *Xptr; Xptr = X; + ms = NEG16(s); for (i=0;i=0;i--) @@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ celt_norm x1, x2; x1 = Xptr[0]; x2 = Xptr[stride]; - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); } } +#endif /* OVERRIDE_vq_exp_rotation1 */ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) { @@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int } /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for extract_collapse_mask().*/ - len /= stride; + len = celt_udiv(len, stride); for (i=0;i0, "alg_unquant() needs at least one pulse"); celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); -/* ALLOC(iy, N, int); */ - decode_pulses(iy, N, K, dec); - Ryy = 0; - i=0; - do { - Ryy = MAC16_16(Ryy, iy[i], iy[i]); - } while (++i < N); + ALLOC(iy, N, int); + Ryy = decode_pulses(iy, N, K, dec); normalise_residual(iy, X, N, Ryy, gain); exp_rotation(X, N, -1, B, K, spread); collapse_mask = extract_collapse_mask(iy, N, B); -/* RESTORE_STACK; */ + RESTORE_STACK; return collapse_mask; } +#ifndef OVERRIDE_renormalise_vector void renormalise_vector(celt_norm *X, int N, opus_val16 gain) { int i; #ifdef FIXED_POINT int k; #endif - opus_val32 E = EPSILON; + opus_val32 E; opus_val16 g; opus_val32 t; - celt_norm *xptr = X; - for (i=0;i>1; #endif @@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) } /*return celt_sqrt(E);*/ } +#endif /* OVERRIDE_renormalise_vector */ -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) { int i; int itheta; @@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) Eside = MAC16_16(Eside, s, s); } } else { - for (i=0;i