From 580b307fd791c0997a8831bc800bba87797bfb7e Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 20 May 2013 22:25:57 +0200 Subject: Sync opus codec to upstream git Sync opus codec to upstream commit 02fed471a4568852d6618e041c4f2af0d7730ee2 (August 30 2013) This brings in a lot of optimizations but also makes the diff between our codec and the upstream much smaller as most of our optimizations have been upstreamed or supeceded. Speedups across the board for CELT mode files: 64kbps 128kbps H300 9.82MHz 15.48MHz c200 4.86MHz 9.63MHz fuze v1 10.32MHz 15.92MHz For the silk mode test file (16kbps) arm targets get a speedup of about 2MHz while the H300 is 7.8MHz slower, likely because it's now using the pseudostack more rather than the real stack which is in iram. Patches to get around that are upcomming. Change-Id: Ifecf963e461c51ac42e09dac1e91bc4bc3b12fa3 --- lib/rbcodec/codecs/libopus/celt/bands.c | 1190 ++++++++++++++++++------------- 1 file changed, 697 insertions(+), 493 deletions(-) (limited to 'lib/rbcodec/codecs/libopus/celt/bands.c') diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c index c7cb0d5500..5c715aff53 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.c +++ b/lib/rbcodec/codecs/libopus/celt/bands.c @@ -28,7 +28,7 @@ */ #ifdef HAVE_CONFIG_H -#include "opus_config.h" +#include "config.h" #endif #include @@ -40,6 +40,23 @@ #include "os_support.h" #include "mathops.h" #include "rate.h" +#include "quant_bands.h" +#include "pitch.h" + +int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev) +{ + int i; + for (i=0;iprev && val < thresholds[prev]+hysteresis[prev]) + i=prev; + if (i thresholds[prev-1]-hysteresis[prev-1]) + i=prev; + return i; +} opus_uint32 celt_lcg_rand(opus_uint32 seed) { @@ -174,7 +191,8 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel #endif /* De-normalise the energy to produce the synthesis from the unit-energy bands */ -void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, celt_sig * OPUS_RESTRICT freq, const celt_ener *bandE, int end, int C, int M) +void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) { int i, c, N; const opus_int16 *eBands = m->eBands; @@ -184,18 +202,39 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, cel celt_sig * OPUS_RESTRICT f; const celt_norm * OPUS_RESTRICT x; f = freq+c*N; - x = X+c*N; - for (i=0;inbEBands],1); + opus_val16 g; + opus_val16 lg; +#ifdef FIXED_POINT + int shift; +#endif j=M*eBands[i]; band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); +#ifdef FIXED_POINT + /* Handle the integer part of the log energy */ + shift = 16-(lg>>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<nbEBands;i++) - { - int j; - int c; - float g; - if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1)) - continue; - c=0; do { - g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]); - for (j=M*m->eBands[i];jeBands[i+1];j++) - MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]); - } while (++cnbEBands; -} - -#endif - /* Indexing table for converting from natural Hadamard to ordery Hadamard This is essentially a bit-reversed Gray, on top of which we've added an inversion of the order because we want the DC at the end rather than @@ -633,289 +624,304 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) return qn; } -/* This function is responsible for encoding and decoding a band for both - the mono and stereo case. Even in the mono case, it can split the band - in two and transmit the energy difference with the two half-bands. It - can be called recursively so bands can end up being split in 8 parts. */ -static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y, - int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, ec_ctx *ec, - opus_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level, - opus_uint32 *seed, opus_val16 gain, celt_norm *lowband_scratch, int fill) +struct band_ctx { + int encode; + const CELTMode *m; + int i; + int intensity; + int spread; + int tf_change; + ec_ctx *ec; + opus_int32 remaining_bits; + const celt_ener *bandE; + opus_uint32 seed; +}; + +struct split_ctx { + int inv; + int imid; + int iside; + int delta; + int itheta; + int qalloc; +}; + +static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, + celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0, + int LM, + int stereo, int *fill) { - const unsigned char *cache; - int q; - int curr_bits; - int stereo, split; - int imid=0, iside=0; - int N0=N; - int N_B=N; - int N_B0; - int B0=B; - int time_divide=0; - int recombine=0; - int inv = 0; - opus_val16 mid=0, side=0; - int longBlocks; - unsigned cm=0; -#ifdef RESYNTH - int resynth = 1; -#else - int resynth = !encode; -#endif + int qn; + int itheta=0; + int delta; + int imid, iside; + int qalloc; + int pulse_cap; + int offset; + opus_int32 tell; + int inv=0; + int encode; + const CELTMode *m; + int i; + int intensity; + ec_ctx *ec; + const celt_ener *bandE; + + encode = ctx->encode; + m = ctx->m; + i = ctx->i; + intensity = ctx->intensity; + ec = ctx->ec; + bandE = ctx->bandE; + + /* Decide on the resolution to give to the split parameter theta */ + pulse_cap = m->logN[i]+LM*(1<>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); + qn = compute_qn(N, *b, offset, pulse_cap, stereo); + if (stereo && i>=intensity) + qn = 1; + if (encode) + { + /* theta is the atan() of the ratio between the (normalized) + side and mid. With just that parameter, we can re-scale both + mid and side because we know that 1) they have unit norm and + 2) they are orthogonal. */ + itheta = stereo_itheta(X, Y, stereo, N); + } + tell = ec_tell_frac(ec); + if (qn!=1) + { + if (encode) + itheta = (itheta*qn+8192)>>14; - longBlocks = B0==1; + /* Entropy coding of the angle. We use a uniform pdf for the + time split, a step for stereo, and a triangular one for the rest. */ + if (stereo && N>2) + { + int p0 = 3; + int x = itheta; + int x0 = qn/2; + int ft = p0*(x0+1) + x0; + /* Use a probability of p0 up to itheta=8192 and then use 1 after */ + if (encode) + { + ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + } else { + int fs; + fs=ec_decode(ec,ft); + if (fs<(x0+1)*p0) + x=fs/p0; + else + x=x0+1+(fs-(x0+1)*p0); + ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + itheta = x; + } + } else if (B0>1 || stereo) { + /* Uniform pdf */ + if (encode) + ec_enc_uint(ec, itheta, qn+1); + else + itheta = ec_dec_uint(ec, qn+1); + } else { + int fs=1, ft; + ft = ((qn>>1)+1)*((qn>>1)+1); + if (encode) + { + int fl; - N_B /= B; - N_B0 = N_B; + fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta; + fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : + ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - split = stereo = Y != NULL; + ec_encode(ec, fl, fl+fs, ft); + } else { + /* Triangular pdf */ + int fl=0; + int fm; + fm = ec_decode(ec, ft); - /* Special case for one sample */ - if (N==1) - { - int c; - celt_norm *x = X; - c=0; do { - int sign=0; - if (*remaining_bits>=1<>1)*((qn>>1) + 1)>>1)) { - sign = x[0]<0; - ec_enc_bits(ec, sign, 1); - } else { - sign = ec_dec_bits(ec, 1); + itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; + fs = itheta + 1; + fl = itheta*(itheta + 1)>>1; + } + else + { + itheta = (2*(qn + 1) + - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; + fs = qn + 1 - itheta; + fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); } - *remaining_bits -= 1<0) - recombine = tf_change; - /* Band recombining to increase frequency resolution */ - if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) - { - int j; - for (j=0;j>k, 1<>k, 1<>4]<<2; + if (itheta==0) + intensity_stereo(m, X, Y, bandE, i, N); + else + stereo_split(X, Y, N); } - B>>=recombine; - N_B<<=recombine; - - /* Increasing the time resolution */ - while ((N_B&1) == 0 && tf_change<0) + /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. + Let's do that at higher complexity */ + } else if (stereo) { + if (encode) { - if (encode) - haar1(X, N_B, B); - if (lowband) - haar1(lowband, N_B, B); - fill |= fill<>= 1; - time_divide++; - tf_change++; + inv = itheta > 8192; + if (inv) + { + int j; + for (j=0;j1) + if (*b>2<remaining_bits > 2<>recombine, B0<>recombine, B0<cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; - if (!stereo && LM != -1 && b > cache[cache[0]]+12 && N>2) + if (itheta == 0) { - N >>= 1; - Y = X+N; - split = 1; - LM -= 1; - if (B==1) - fill = (fill&1)|(fill<<1); - B = (B+1)>>1; + imid = 32767; + iside = 0; + *fill &= (1<inv = inv; + sctx->imid = imid; + sctx->iside = iside; + sctx->delta = delta; + sctx->itheta = itheta; + sctx->qalloc = qalloc; +} +static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, + celt_norm *lowband_out) +{ +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + int c; + int stereo; + celt_norm *x = X; + int encode; + ec_ctx *ec; - /* Decide on the resolution to give to the split parameter theta */ - pulse_cap = m->logN[i]+LM*(1<>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); - qn = compute_qn(N, b, offset, pulse_cap, stereo); - if (stereo && i>=intensity) - qn = 1; - if (encode) - { - /* theta is the atan() of the ratio between the (normalized) - side and mid. With just that parameter, we can re-scale both - mid and side because we know that 1) they have unit norm and - 2) they are orthogonal. */ - itheta = stereo_itheta(X, Y, stereo, N); - } - tell = ec_tell_frac(ec); - if (qn!=1) + encode = ctx->encode; + ec = ctx->ec; + + stereo = Y != NULL; + c=0; do { + int sign=0; + if (ctx->remaining_bits>=1<>14; - - /* Entropy coding of the angle. We use a uniform pdf for the - time split, a step for stereo, and a triangular one for the rest. */ - if (stereo && N>2) { - int p0 = 3; - int x = itheta; - int x0 = qn/2; - int ft = p0*(x0+1) + x0; - /* Use a probability of p0 up to itheta=8192 and then use 1 after */ - if (encode) - { - ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - } else { - int fs; - fs=ec_decode(ec,ft); - if (fs<(x0+1)*p0) - x=fs/p0; - else - x=x0+1+(fs-(x0+1)*p0); - ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); - itheta = x; - } - } else if (B0>1 || stereo) { - /* Uniform pdf */ - if (encode) - ec_enc_uint(ec, itheta, qn+1); - else - itheta = ec_dec_uint(ec, qn+1); + sign = x[0]<0; + ec_enc_bits(ec, sign, 1); } else { - int fs=1, ft; - ft = ((qn>>1)+1)*((qn>>1)+1); - if (encode) - { - int fl; + sign = ec_dec_bits(ec, 1); + } + ctx->remaining_bits -= 1<>1) ? itheta + 1 : qn + 1 - itheta; - fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : - ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); +/* This function is responsible for encoding and decoding a mono partition. + It can split the band in two and transmit the energy difference with + the two half-bands. It can be called recursively so bands can end up being + split in 8 parts. */ +static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, + int N, int b, int B, celt_norm *lowband, + int LM, + opus_val16 gain, int fill) +{ + const unsigned char *cache; + int q; + int curr_bits; + int imid=0, iside=0; + int N_B=N; + int B0=B; + opus_val16 mid=0, side=0; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + celt_norm *Y=NULL; + int encode; + const CELTMode *m; + int i; + int spread; + ec_ctx *ec; - ec_encode(ec, fl, fl+fs, ft); - } else { - /* Triangular pdf */ - int fl=0; - int fm; - fm = ec_decode(ec, ft); + encode = ctx->encode; + m = ctx->m; + i = ctx->i; + spread = ctx->spread; + ec = ctx->ec; - if (fm < ((qn>>1)*((qn>>1) + 1)>>1)) - { - itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; - fs = itheta + 1; - fl = itheta*(itheta + 1)>>1; - } - else - { - itheta = (2*(qn + 1) - - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; - fs = qn + 1 - itheta; - fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); - } + N_B /= B; - ec_dec_update(ec, fl, fl+fs, ft); - } - } - itheta = (opus_int32)itheta*16384/qn; - if (encode && stereo) - { - if (itheta==0) - intensity_stereo(m, X, Y, bandE, i, N); - else - stereo_split(X, Y, N); - } - /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. - Let's do that at higher complexity */ - } else if (stereo) { - if (encode) - { - inv = itheta > 8192; - if (inv) - { - int j; - for (j=0;j2< 2<cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; + if (LM != -1 && b > cache[cache[0]]+12 && N>2) + { + int mbits, sbits, delta; + int itheta; + int qalloc; + struct split_ctx sctx; + celt_norm *next_lowband2=NULL; + opus_int32 rebalance; - orig_fill = fill; - if (itheta == 0) - { - imid = 32767; - iside = 0; - fill &= (1<>= 1; + Y = X+N; + LM -= 1; + if (B==1) + fill = (fill&1)|(fill<<1); + B = (B+1)>>1; + compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, + LM, 0, &fill); + imid = sctx.imid; + iside = sctx.iside; + delta = sctx.delta; + itheta = sctx.itheta; + qalloc = sctx.qalloc; #ifdef FIXED_POINT mid = imid; side = iside; @@ -924,136 +930,59 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c side = (1.f/32768)*iside; #endif - /* This is a special case for N=2 that only works for stereo and takes - advantage of the fact that mid and side are orthogonal to encode - the side with just one bit. */ - if (N==2 && stereo) + /* Give more bits to low-energy MDCTs than they would otherwise deserve */ + if (B0>1 && (itheta&0x3fff)) { - int c; - int sign=0; - celt_norm *x2, *y2; - mbits = b; - sbits = 0; - /* Only need one bit for the side */ - if (itheta != 0 && itheta != 16384) - sbits = 1< 8192; - *remaining_bits -= qalloc+sbits; - - x2 = c ? Y : X; - y2 = c ? X : Y; - if (sbits) - { - if (encode) - { - /* Here we only need to encode a sign for the side */ - sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; - ec_enc_bits(ec, sign, 1); - } else { - sign = ec_dec_bits(ec, 1); - } - } - sign = 1-2*sign; - /* We use orig_fill here because we want to fold the side, but if - itheta==16384, we'll have cleared the low bits of fill. */ - cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill); - /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), - and there's no need to worry about mixing with the other channel. */ - y2[0] = -sign*x2[1]; - y2[1] = sign*x2[0]; - if (resynth) - { - celt_norm tmp; - X[0] = MULT16_16_Q15(mid, X[0]); - X[1] = MULT16_16_Q15(mid, X[1]); - Y[0] = MULT16_16_Q15(side, Y[0]); - Y[1] = MULT16_16_Q15(side, Y[1]); - tmp = X[0]; - X[0] = SUB16(tmp,Y[0]); - Y[0] = ADD16(tmp,Y[0]); - tmp = X[1]; - X[1] = SUB16(tmp,Y[1]); - Y[1] = ADD16(tmp,Y[1]); - } - } else { - /* "Normal" split code */ - celt_norm *next_lowband2=NULL; - celt_norm *next_lowband_out1=NULL; - int next_level=0; - opus_int32 rebalance; - - /* Give more bits to low-energy MDCTs than they would otherwise deserve */ - if (B0>1 && !stereo && (itheta&0x3fff)) - { - if (itheta > 8192) - /* Rough approximation for pre-echo masking */ - delta -= delta>>(4-LM); - else - /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */ - delta = IMIN(0, delta + (N<>(5-LM))); - } - mbits = IMAX(0, IMIN(b, (b-delta)/2)); - sbits = b-mbits; - *remaining_bits -= qalloc; - - if (lowband && !stereo) - next_lowband2 = lowband+N; /* >32-bit split case */ - - /* Only stereo needs to pass on lowband_out. Otherwise, it's - handled at the end */ - if (stereo) - next_lowband_out1 = lowband_out; + if (itheta > 8192) + /* Rough approximation for pre-echo masking */ + delta -= delta>>(4-LM); else - next_level = level+1; - - rebalance = *remaining_bits; - if (mbits >= sbits) - { - /* In stereo mode, we do not apply a scaling to the mid because we need the normalized - mid for folding later */ - cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change, - lowband, ec, remaining_bits, LM, next_lowband_out1, - NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill); - rebalance = mbits - (rebalance-*remaining_bits); - if (rebalance > 3<>B)<<((B0>>1)&(stereo-1)); - } else { - /* For a stereo split, the high bits of fill are always zero, so no - folding will be done to the side. */ - cm = quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change, - next_lowband2, ec, remaining_bits, LM, NULL, - NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1)); - rebalance = sbits - (rebalance-*remaining_bits); - if (rebalance > 3<>(5-LM))); } + mbits = IMAX(0, IMIN(b, (b-delta)/2)); + sbits = b-mbits; + ctx->remaining_bits -= qalloc; + + if (lowband) + next_lowband2 = lowband+N; /* >32-bit split case */ + rebalance = ctx->remaining_bits; + if (mbits >= sbits) + { + cm = quant_partition(ctx, X, N, mbits, B, + lowband, LM, + MULT16_16_P15(gain,mid), fill); + rebalance = mbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<>B)<<(B0>>1); + } else { + cm = quant_partition(ctx, Y, N, sbits, B, + next_lowband2, LM, + MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); + rebalance = sbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<remaining_bits -= curr_bits; /* Ensures we can never bust the budget */ - while (*remaining_bits < 0 && q > 0) + while (ctx->remaining_bits < 0 && q > 0) { - *remaining_bits += curr_bits; + ctx->remaining_bits += curr_bits; q--; curr_bits = pulses2bits(m, i, LM, q); - *remaining_bits -= curr_bits; + ctx->remaining_bits -= curr_bits; } if (q!=0) @@ -1077,7 +1006,7 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c if (resynth) { unsigned cm_mask; - /*B can be as large as 16, so this shift might overflow an int on a + /* B can be as large as 16, so this shift might overflow an int on a 16-bit platform; use a long to get defined behavior.*/ cm_mask = (unsigned)(1UL<>20); + ctx->seed = celt_lcg_rand(ctx->seed); + X[j] = (celt_norm)((opus_int32)ctx->seed>>20); } cm = cm_mask; } else { @@ -1100,10 +1029,10 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c for (j=0;jseed = celt_lcg_rand(ctx->seed); /* About 48 dB below the "normal" folding level */ tmp = QCONST16(1.0f/256, 10); - tmp = (*seed)&0x8000 ? tmp : -tmp; + tmp = (ctx->seed)&0x8000 ? tmp : -tmp; X[j] = lowband[j]+tmp; } cm = fill; @@ -1114,64 +1043,307 @@ static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, c } } + return cm; +} + + +/* This function is responsible for encoding and decoding a band for the mono case. */ +static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, + int N, int b, int B, celt_norm *lowband, + int LM, celt_norm *lowband_out, + opus_val16 gain, celt_norm *lowband_scratch, int fill) +{ + int N0=N; + int N_B=N; + int N_B0; + int B0=B; + int time_divide=0; + int recombine=0; + int longBlocks; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + int k; + int encode; + int tf_change; + + encode = ctx->encode; + tf_change = ctx->tf_change; + + longBlocks = B0==1; + + N_B /= B; + N_B0 = N_B; + + /* Special case for one sample */ + if (N==1) + { + return quant_band_n1(ctx, X, NULL, b, lowband_out); + } + + if (tf_change>0) + recombine = tf_change; + /* Band recombining to increase frequency resolution */ + + if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) + { + int j; + for (j=0;j>k, 1<>k, 1<>4]<<2; + } + B>>=recombine; + N_B<<=recombine; + + /* Increasing the time resolution */ + while ((N_B&1) == 0 && tf_change<0) + { + if (encode) + haar1(X, N_B, B); + if (lowband) + haar1(lowband, N_B, B); + fill |= fill<>= 1; + time_divide++; + tf_change++; + } + B0=B; + N_B0 = N_B; + + /* Reorganize the samples in time order instead of frequency order */ + if (B0>1) + { + if (encode) + deinterleave_hadamard(X, N_B>>recombine, B0<>recombine, B0<1) + interleave_hadamard(X, N_B>>recombine, B0<>= 1; + N_B <<= 1; + cm |= cm>>B; + haar1(X, N_B, B); + } + + for (k=0;k>k, 1<1) - interleave_hadamard(X, N_B>>recombine, B0<>= 1; - N_B <<= 1; - cm |= cm>>B; - haar1(X, N_B, B); - } - for (k=0;k>k, 1<encode; +#endif + int mbits, sbits, delta; + int itheta; + int qalloc; + struct split_ctx sctx; + int orig_fill; + int encode; + ec_ctx *ec; + + encode = ctx->encode; + ec = ctx->ec; + + /* Special case for one sample */ + if (N==1) + { + return quant_band_n1(ctx, X, Y, b, lowband_out); + } + + orig_fill = fill; + + compute_theta(ctx, &sctx, X, Y, N, &b, B, B, + LM, 1, &fill); + inv = sctx.inv; + imid = sctx.imid; + iside = sctx.iside; + delta = sctx.delta; + itheta = sctx.itheta; + qalloc = sctx.qalloc; +#ifdef FIXED_POINT + mid = imid; + side = iside; +#else + mid = (1.f/32768)*imid; + side = (1.f/32768)*iside; +#endif - /* Scale output for later folding */ - if (lowband_out) + /* This is a special case for N=2 that only works for stereo and takes + advantage of the fact that mid and side are orthogonal to encode + the side with just one bit. */ + if (N==2) + { + int c; + int sign=0; + celt_norm *x2, *y2; + mbits = b; + sbits = 0; + /* Only need one bit for the side. */ + if (itheta != 0 && itheta != 16384) + sbits = 1< 8192; + ctx->remaining_bits -= qalloc+sbits; + + x2 = c ? Y : X; + y2 = c ? X : Y; + if (sbits) + { + if (encode) { - int j; - opus_val16 n; - n = celt_sqrt(SHL32(EXTEND32(N0),22)); - for (j=0;jremaining_bits -= qalloc; + + rebalance = ctx->remaining_bits; + if (mbits >= sbits) + { + /* In stereo mode, we do not apply a scaling to the mid because we need the normalized + mid for folding later. */ + cm = quant_band(ctx, X, N, mbits, B, + lowband, LM, lowband_out, + Q15ONE, lowband_scratch, fill); + rebalance = mbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<>B); + } else { + /* For a stereo split, the high bits of fill are always zero, so no + folding will be done to the side. */ + cm = quant_band(ctx, Y, N, sbits, B, + NULL, LM, NULL, + side, NULL, fill>>B); + rebalance = sbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<eBands; celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; VARDECL(celt_norm, _norm); - VARDECL(celt_norm, lowband_scratch); + celt_norm *lowband_scratch; int B; int M; int lowband_offset; int update_lowband = 1; int C = Y_ != NULL ? 2 : 1; + int norm_offset; #ifdef RESYNTH int resynth = 1; #else int resynth = !encode; #endif + struct band_ctx ctx; SAVE_STACK; M = 1<nbEBands], celt_norm); - ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm); + norm_offset = M*eBands[start]; + /* No need to allocate norm for the last band because we don't need an + output in that band. */ + ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm); norm = _norm; - norm2 = norm + M*eBands[m->nbEBands]; + norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; + /* We can use the last band as scratch space because we don't need that + scratch space for the last band. */ + lowband_scratch = X_+M*eBands[m->nbEBands-1]; lowband_offset = 0; + ctx.bandE = bandE; + ctx.ec = ec; + ctx.encode = encode; + ctx.intensity = intensity; + ctx.m = m; + ctx.seed = *seed; + ctx.spread = spread; for (i=start;i=m->effEBands) { X=norm; if (Y_!=NULL) Y = norm; + lowband_scratch = NULL; } + if (i==end-1) + lowband_scratch = NULL; /* Get a conservative estimate of the collapse_mask's for the bands we're - going to be folding from. */ + going to be folding from. */ if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0)) { int fold_start; int fold_end; int fold_i; /* This ensures we never repeat spectral content within one band */ - effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N); + effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N); fold_start = lowband_offset; - while(M*eBands[--fold_start] > effective_lowband); + while(M*eBands[--fold_start] > effective_lowband+norm_offset); fold_end = lowband_offset-1; - while(M*eBands[++fold_end] < effective_lowband+N); + while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); x_cm = y_cm = 0; fold_i = fold_start; do { x_cm |= collapse_masks[fold_i*C+0]; @@ -1266,7 +1461,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, } while (++fold_i(N<