summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/celt_decoder.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/celt_decoder.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt_decoder.c325
1 files changed, 180 insertions, 145 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
index 77fa2d01f7..8af96b7931 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
@@ -51,6 +51,9 @@
51#include "celt_lpc.h" 51#include "celt_lpc.h"
52#include "vq.h" 52#include "vq.h"
53 53
54#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
55#define NORM_ALIASING_HACK
56#endif
54/**********************************************************************/ 57/**********************************************************************/
55/* */ 58/* */
56/* DECODER */ 59/* DECODER */
@@ -175,28 +178,24 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
175} 178}
176#endif /* CUSTOM_MODES */ 179#endif /* CUSTOM_MODES */
177 180
178static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x)
179{
180#ifdef FIXED_POINT
181 x = PSHR32(x, SIG_SHIFT);
182 x = MAX32(x, -32768);
183 x = MIN32(x, 32767);
184 return EXTRACT16(x);
185#else
186 return (opus_val16)x;
187#endif
188}
189 181
190#ifndef RESYNTH 182#ifndef RESYNTH
191static 183static
192#endif 184#endif
193void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) 185void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
186 celt_sig *mem, int accum)
194{ 187{
195 int c; 188 int c;
196 int Nd; 189 int Nd;
197 int apply_downsampling=0; 190 int apply_downsampling=0;
198 opus_val16 coef0; 191 opus_val16 coef0;
199 192 VARDECL(celt_sig, scratch);
193 SAVE_STACK;
194#ifndef FIXED_POINT
195 (void)accum;
196 celt_assert(accum==0);
197#endif
198 ALLOC(scratch, N, celt_sig);
200 coef0 = coef[0]; 199 coef0 = coef[0];
201 Nd = N/downsample; 200 Nd = N/downsample;
202 c=0; do { 201 c=0; do {
@@ -234,11 +233,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
234 apply_downsampling=1; 233 apply_downsampling=1;
235 } else { 234 } else {
236 /* Shortcut for the standard (non-custom modes) case */ 235 /* Shortcut for the standard (non-custom modes) case */
237 for (j=0;j<N;j++) 236#ifdef FIXED_POINT
237 if (accum)
238 { 238 {
239 celt_sig tmp = x[j] + m + VERY_SMALL; 239 for (j=0;j<N;j++)
240 m = MULT16_32_Q15(coef0, tmp); 240 {
241 y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 241 celt_sig tmp = x[j] + m + VERY_SMALL;
242 m = MULT16_32_Q15(coef0, tmp);
243 y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
244 }
245 } else
246#endif
247 {
248 for (j=0;j<N;j++)
249 {
250 celt_sig tmp = x[j] + m + VERY_SMALL;
251 m = MULT16_32_Q15(coef0, tmp);
252 y[j*C] = SCALEOUT(SIG2WORD16(tmp));
253 }
242 } 254 }
243 } 255 }
244 mem[c] = m; 256 mem[c] = m;
@@ -246,41 +258,94 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
246 if (apply_downsampling) 258 if (apply_downsampling)
247 { 259 {
248 /* Perform down-sampling */ 260 /* Perform down-sampling */
249 for (j=0;j<Nd;j++) 261#ifdef FIXED_POINT
250 y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); 262 if (accum)
263 {
264 for (j=0;j<Nd;j++)
265 y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
266 } else
267#endif
268 {
269 for (j=0;j<Nd;j++)
270 y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
271 }
251 } 272 }
252 } while (++c<C); 273 } while (++c<C);
274 RESTORE_STACK;
253} 275}
254 276
255/** Compute the IMDCT and apply window for all sub-frames and
256 all channels in a frame */
257#ifndef RESYNTH 277#ifndef RESYNTH
258static 278static
259#endif 279#endif
260void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, 280void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
261 celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) 281 opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
282 int LM, int downsample, int silence)
262{ 283{
263 int b, c; 284 int c, i;
285 int M;
286 int b;
264 int B; 287 int B;
265 int N; 288 int N, NB;
266 int shift; 289 int shift;
267 const int overlap = OVERLAP(mode); 290 int nbEBands;
291 int overlap;
292 VARDECL(celt_sig, freq);
293 SAVE_STACK;
268 294
269 if (shortBlocks) 295 overlap = mode->overlap;
296 nbEBands = mode->nbEBands;
297 N = mode->shortMdctSize<<LM;
298 ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */
299 M = 1<<LM;
300
301 if (isTransient)
270 { 302 {
271 B = shortBlocks; 303 B = M;
272 N = mode->shortMdctSize; 304 NB = mode->shortMdctSize;
273 shift = mode->maxLM; 305 shift = mode->maxLM;
274 } else { 306 } else {
275 B = 1; 307 B = 1;
276 N = mode->shortMdctSize<<LM; 308 NB = mode->shortMdctSize<<LM;
277 shift = mode->maxLM-LM; 309 shift = mode->maxLM-LM;
278 } 310 }
279 c=0; do { 311
280 /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ 312 if (CC==2&&C==1)
313 {
314 /* Copying a mono streams to two channels */
315 celt_sig *freq2;
316 denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
317 downsample, silence);
318 /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */
319 freq2 = out_syn[1]+overlap/2;
320 OPUS_COPY(freq2, freq, N);
281 for (b=0;b<B;b++) 321 for (b=0;b<B;b++)
282 clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); 322 clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B);
283 } while (++c<C); 323 for (b=0;b<B;b++)
324 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B);
325 } else if (CC==1&&C==2)
326 {
327 /* Downmixing a stereo stream to mono */
328 celt_sig *freq2;
329 freq2 = out_syn[0]+overlap/2;
330 denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M,
331 downsample, silence);
332 /* Use the output buffer as temp array before downmixing. */
333 denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
334 downsample, silence);
335 for (i=0;i<N;i++)
336 freq[i] = HALF32(ADD32(freq[i],freq2[i]));
337 for (b=0;b<B;b++)
338 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B);
339 } else {
340 /* Normal case (mono or stereo) */
341 c=0; do {
342 denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
343 downsample, silence);
344 for (b=0;b<B;b++)
345 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B);
346 } while (++c<CC);
347 }
348 RESTORE_STACK;
284} 349}
285 350
286static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) 351static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@@ -330,7 +395,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
330 pitch of 480 Hz. */ 395 pitch of 480 Hz. */
331#define PLC_PITCH_LAG_MIN (100) 396#define PLC_PITCH_LAG_MIN (100)
332 397
333static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) 398static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
399{
400 int pitch_index;
401 VARDECL( opus_val16, lp_pitch_buf );
402 SAVE_STACK;
403 ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
404 pitch_downsample(decode_mem, lp_pitch_buf,
405 DECODE_BUFFER_SIZE, C, arch);
406 pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
407 DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
408 PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch);
409 pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
410 RESTORE_STACK;
411 return pitch_index;
412}
413
414static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
334{ 415{
335 int c; 416 int c;
336 int i; 417 int i;
@@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
343 int nbEBands; 424 int nbEBands;
344 int overlap; 425 int overlap;
345 int start; 426 int start;
346 int downsample;
347 int loss_count; 427 int loss_count;
348 int noise_based; 428 int noise_based;
349 const opus_int16 *eBands; 429 const opus_int16 *eBands;
350 VARDECL(celt_sig, scratch);
351 SAVE_STACK; 430 SAVE_STACK;
352 431
353 mode = st->mode; 432 mode = st->mode;
@@ -367,14 +446,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
367 446
368 loss_count = st->loss_count; 447 loss_count = st->loss_count;
369 start = st->start; 448 start = st->start;
370 downsample = st->downsample;
371 noise_based = loss_count >= 5 || start != 0; 449 noise_based = loss_count >= 5 || start != 0;
372 ALLOC(scratch, noise_based?N*C:N, celt_sig);
373 if (noise_based) 450 if (noise_based)
374 { 451 {
375 /* Noise-based PLC/CNG */ 452 /* Noise-based PLC/CNG */
376 celt_sig *freq; 453#ifdef NORM_ALIASING_HACK
454 celt_norm *X;
455#else
377 VARDECL(celt_norm, X); 456 VARDECL(celt_norm, X);
457#endif
378 opus_uint32 seed; 458 opus_uint32 seed;
379 opus_val16 *plcLogE; 459 opus_val16 *plcLogE;
380 int end; 460 int end;
@@ -383,10 +463,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
383 end = st->end; 463 end = st->end;
384 effEnd = IMAX(start, IMIN(end, mode->effEBands)); 464 effEnd = IMAX(start, IMIN(end, mode->effEBands));
385 465
386 /* Share the interleaved signal MDCT coefficient buffer with the 466#ifdef NORM_ALIASING_HACK
387 deemphasis scratch buffer. */ 467 /* This is an ugly hack that breaks aliasing rules and would be easily broken,
388 freq = scratch; 468 but it saves almost 4kB of stack. */
469 X = (celt_norm*)(out_syn[C-1]+overlap/2);
470#else
389 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 471 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
472#endif
390 473
391 if (loss_count >= 5) 474 if (loss_count >= 5)
392 plcLogE = backgroundLogE; 475 plcLogE = backgroundLogE;
@@ -421,20 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
421 } 504 }
422 st->rng = seed; 505 st->rng = seed;
423 506
424 denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM);
425
426 c=0; do {
427 int bound = eBands[effEnd]<<LM;
428 if (downsample!=1)
429 bound = IMIN(bound, N/downsample);
430 for (i=bound;i<N;i++)
431 freq[c*N+i] = 0;
432 } while (++c<C);
433 c=0; do { 507 c=0; do {
434 OPUS_MOVE(decode_mem[c], decode_mem[c]+N, 508 OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
435 DECODE_BUFFER_SIZE-N+(overlap>>1)); 509 DECODE_BUFFER_SIZE-N+(overlap>>1));
436 } while (++c<C); 510 } while (++c<C);
437 compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); 511
512 celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0);
438 } else { 513 } else {
439 /* Pitch-based PLC */ 514 /* Pitch-based PLC */
440 const opus_val16 *window; 515 const opus_val16 *window;
@@ -445,15 +520,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
445 520
446 if (loss_count == 0) 521 if (loss_count == 0)
447 { 522 {
448 VARDECL( opus_val16, lp_pitch_buf ); 523 st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
449 ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
450 pitch_downsample(decode_mem, lp_pitch_buf,
451 DECODE_BUFFER_SIZE, C, st->arch);
452 pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
453 DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
454 PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch);
455 pitch_index = PLC_PITCH_LAG_MAX-pitch_index;
456 st->last_pitch_index = pitch_index;
457 } else { 524 } else {
458 pitch_index = st->last_pitch_index; 525 pitch_index = st->last_pitch_index;
459 fade = QCONST16(.8f,15); 526 fade = QCONST16(.8f,15);
@@ -644,25 +711,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
644 } while (++c<C); 711 } while (++c<C);
645 } 712 }
646 713
647 deemphasis(out_syn, pcm, N, C, downsample,
648 mode->preemph, st->preemph_memD, scratch);
649
650 st->loss_count = loss_count+1; 714 st->loss_count = loss_count+1;
651 715
652 RESTORE_STACK; 716 RESTORE_STACK;
653} 717}
654 718
655#define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ 719int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
656static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ 720 int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
657static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */
658int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec)
659{ 721{
660 int c, i, N; 722 int c, i, N;
661 int spread_decision; 723 int spread_decision;
662 opus_int32 bits; 724 opus_int32 bits;
663 ec_dec _dec; 725 ec_dec _dec;
664 VARDECL(celt_sig, freq); 726#ifdef NORM_ALIASING_HACK
727 celt_norm *X;
728#else
665 VARDECL(celt_norm, X); 729 VARDECL(celt_norm, X);
730#endif
666 VARDECL(int, fine_quant); 731 VARDECL(int, fine_quant);
667 VARDECL(int, pulses); 732 VARDECL(int, pulses);
668 VARDECL(int, cap); 733 VARDECL(int, cap);
@@ -680,6 +745,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
680 int intra_ener; 745 int intra_ener;
681 const int CC = st->channels; 746 const int CC = st->channels;
682 int LM, M; 747 int LM, M;
748 int start;
749 int end;
683 int effEnd; 750 int effEnd;
684 int codedBands; 751 int codedBands;
685 int alloc_trim; 752 int alloc_trim;
@@ -706,11 +773,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
706 nbEBands = mode->nbEBands; 773 nbEBands = mode->nbEBands;
707 overlap = mode->overlap; 774 overlap = mode->overlap;
708 eBands = mode->eBands; 775 eBands = mode->eBands;
776 start = st->start;
777 end = st->end;
709 frame_size *= st->downsample; 778 frame_size *= st->downsample;
710 779
711 c=0; do {
712 decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
713 } while (++c<CC);
714 lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); 780 lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
715 oldBandE = lpc+CC*LPC_ORDER; 781 oldBandE = lpc+CC*LPC_ORDER;
716 oldLogE = oldBandE + 2*nbEBands; 782 oldLogE = oldBandE + 2*nbEBands;
@@ -728,7 +794,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
728 if (data0<0) 794 if (data0<0)
729 return OPUS_INVALID_PACKET; 795 return OPUS_INVALID_PACKET;
730 } 796 }
731 st->end = IMAX(1, mode->effEBands-2*(data0>>5)); 797 st->end = end = IMAX(1, mode->effEBands-2*(data0>>5));
732 LM = (data0>>3)&0x3; 798 LM = (data0>>3)&0x3;
733 C = 1 + ((data0>>2)&0x1); 799 C = 1 + ((data0>>2)&0x1);
734 data++; 800 data++;
@@ -755,14 +821,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
755 return OPUS_BAD_ARG; 821 return OPUS_BAD_ARG;
756 822
757 N = M*mode->shortMdctSize; 823 N = M*mode->shortMdctSize;
824 c=0; do {
825 decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
826 out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
827 } while (++c<CC);
758 828
759 effEnd = st->end; 829 effEnd = end;
760 if (effEnd > mode->effEBands) 830 if (effEnd > mode->effEBands)
761 effEnd = mode->effEBands; 831 effEnd = mode->effEBands;
762 832
763 if (data == NULL || len<=1) 833 if (data == NULL || len<=1)
764 { 834 {
765 celt_decode_lost(st, pcm, N, LM); 835 celt_decode_lost(st, N, LM);
836 deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
766 RESTORE_STACK; 837 RESTORE_STACK;
767 return frame_size/st->downsample; 838 return frame_size/st->downsample;
768 } 839 }
@@ -798,7 +869,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
798 postfilter_gain = 0; 869 postfilter_gain = 0;
799 postfilter_pitch = 0; 870 postfilter_pitch = 0;
800 postfilter_tapset = 0; 871 postfilter_tapset = 0;
801 if (st->start==0 && tell+16 <= total_bits) 872 if (start==0 && tell+16 <= total_bits)
802 { 873 {
803 if(ec_dec_bit_logp(dec, 1)) 874 if(ec_dec_bit_logp(dec, 1))
804 { 875 {
@@ -829,11 +900,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
829 /* Decode the global flags (first symbols in the stream) */ 900 /* Decode the global flags (first symbols in the stream) */
830 intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; 901 intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
831 /* Get band energies */ 902 /* Get band energies */
832 unquant_coarse_energy(mode, st->start, st->end, oldBandE, 903 unquant_coarse_energy(mode, start, end, oldBandE,
833 intra_ener, dec, C, LM); 904 intra_ener, dec, C, LM);
834 905
835 ALLOC(tf_res, nbEBands, int); 906 ALLOC(tf_res, nbEBands, int);
836 tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); 907 tf_decode(start, end, isTransient, tf_res, LM, dec);
837 908
838 tell = ec_tell(dec); 909 tell = ec_tell(dec);
839 spread_decision = SPREAD_NORMAL; 910 spread_decision = SPREAD_NORMAL;
@@ -849,7 +920,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
849 dynalloc_logp = 6; 920 dynalloc_logp = 6;
850 total_bits<<=BITRES; 921 total_bits<<=BITRES;
851 tell = ec_tell_frac(dec); 922 tell = ec_tell_frac(dec);
852 for (i=st->start;i<st->end;i++) 923 for (i=start;i<end;i++)
853 { 924 {
854 int width, quanta; 925 int width, quanta;
855 int dynalloc_loop_logp; 926 int dynalloc_loop_logp;
@@ -888,21 +959,28 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
888 ALLOC(pulses, nbEBands, int); 959 ALLOC(pulses, nbEBands, int);
889 ALLOC(fine_priority, nbEBands, int); 960 ALLOC(fine_priority, nbEBands, int);
890 961
891 codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, 962 codedBands = compute_allocation(mode, start, end, offsets, cap,
892 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, 963 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
893 fine_quant, fine_priority, C, LM, dec, 0, 0, 0); 964 fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
894 965
895 unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); 966 unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
967
968 c=0; do {
969 OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
970 } while (++c<CC);
896 971
897 /* Decode fixed codebook */ 972 /* Decode fixed codebook */
898 ALLOC(collapse_masks, C*nbEBands, unsigned char); 973 ALLOC(collapse_masks, C*nbEBands, unsigned char);
899 /**< Interleaved normalised MDCTs */
900 if (FREQ_X_BUF_SIZE >= C*N)
901 X = s_X;
902 else
903 ALLOC(X, C*N, celt_norm);
904 974
905 quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, 975#ifdef NORM_ALIASING_HACK
976 /* This is an ugly hack that breaks aliasing rules and would be easily broken,
977 but it saves almost 4kB of stack. */
978 X = (celt_norm*)(out_syn[CC-1]+overlap/2);
979#else
980 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
981#endif
982
983 quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
906 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 984 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
907 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); 985 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng);
908 986
@@ -911,58 +989,20 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
911 anti_collapse_on = ec_dec_bits(dec, 1); 989 anti_collapse_on = ec_dec_bits(dec, 1);
912 } 990 }
913 991
914 unquant_energy_finalise(mode, st->start, st->end, oldBandE, 992 unquant_energy_finalise(mode, start, end, oldBandE,
915 fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); 993 fine_quant, fine_priority, len*8-ec_tell(dec), dec, C);
916 994
917 if (anti_collapse_on) 995 if (anti_collapse_on)
918 anti_collapse(mode, X, collapse_masks, LM, C, N, 996 anti_collapse(mode, X, collapse_masks, LM, C, N,
919 st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 997 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
920
921 /**< Interleaved signal MDCTs */
922 if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N)
923 freq = s_freq;
924 else
925 ALLOC(freq, IMAX(CC,C)*N, celt_sig);
926 998
927 if (silence) 999 if (silence)
928 { 1000 {
929 for (i=0;i<C*nbEBands;i++) 1001 for (i=0;i<C*nbEBands;i++)
930 oldBandE[i] = -QCONST16(28.f,DB_SHIFT); 1002 oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
931 for (i=0;i<C*N;i++)
932 freq[i] = 0;
933 } else {
934 /* Synthesis */
935 denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M);
936 } 1003 }
937 c=0; do {
938 OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
939 } while (++c<CC);
940 1004
941 c=0; do { 1005 celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence);
942 int bound = M*eBands[effEnd];
943 if (st->downsample!=1)
944 bound = IMIN(bound, N/st->downsample);
945 for (i=bound;i<N;i++)
946 freq[c*N+i] = 0;
947 } while (++c<C);
948
949 c=0; do {
950 out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
951 } while (++c<CC);
952
953 if (CC==2&&C==1)
954 {
955 for (i=0;i<N;i++)
956 freq[N+i] = freq[i];
957 }
958 if (CC==1&&C==2)
959 {
960 for (i=0;i<N;i++)
961 freq[i] = HALF32(ADD32(freq[i],freq[N+i]));
962 }
963
964 /* Compute inverse MDCTs */
965 compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM);
966 1006
967 c=0; do { 1007 c=0; do {
968 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); 1008 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
@@ -989,18 +1029,14 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
989 st->postfilter_tapset_old = st->postfilter_tapset; 1029 st->postfilter_tapset_old = st->postfilter_tapset;
990 } 1030 }
991 1031
992 if (C==1) { 1032 if (C==1)
993 for (i=0;i<nbEBands;i++) 1033 OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
994 oldBandE[nbEBands+i]=oldBandE[i];
995 }
996 1034
997 /* In case start or end were to change */ 1035 /* In case start or end were to change */
998 if (!isTransient) 1036 if (!isTransient)
999 { 1037 {
1000 for (i=0;i<2*nbEBands;i++) 1038 OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
1001 oldLogE2[i] = oldLogE[i]; 1039 OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
1002 for (i=0;i<2*nbEBands;i++)
1003 oldLogE[i] = oldBandE[i];
1004 for (i=0;i<2*nbEBands;i++) 1040 for (i=0;i<2*nbEBands;i++)
1005 backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); 1041 backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]);
1006 } else { 1042 } else {
@@ -1009,12 +1045,12 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
1009 } 1045 }
1010 c=0; do 1046 c=0; do
1011 { 1047 {
1012 for (i=0;i<st->start;i++) 1048 for (i=0;i<start;i++)
1013 { 1049 {
1014 oldBandE[c*nbEBands+i]=0; 1050 oldBandE[c*nbEBands+i]=0;
1015 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 1051 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
1016 } 1052 }
1017 for (i=st->end;i<nbEBands;i++) 1053 for (i=end;i<nbEBands;i++)
1018 { 1054 {
1019 oldBandE[c*nbEBands+i]=0; 1055 oldBandE[c*nbEBands+i]=0;
1020 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); 1056 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
@@ -1022,8 +1058,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
1022 } while (++c<2); 1058 } while (++c<2);
1023 st->rng = dec->rng; 1059 st->rng = dec->rng;
1024 1060
1025 /* We reuse freq[] as scratch space for the de-emphasis */ 1061 deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
1026 deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq);
1027 st->loss_count = 0; 1062 st->loss_count = 0;
1028 RESTORE_STACK; 1063 RESTORE_STACK;
1029 if (ec_tell(dec) > 8*len) 1064 if (ec_tell(dec) > 8*len)
@@ -1039,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
1039#ifdef FIXED_POINT 1074#ifdef FIXED_POINT
1040int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) 1075int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
1041{ 1076{
1042 return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1077 return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
1043} 1078}
1044 1079
1045#ifndef DISABLE_FLOAT_API 1080#ifndef DISABLE_FLOAT_API
@@ -1056,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
1056 N = frame_size; 1091 N = frame_size;
1057 1092
1058 ALLOC(out, C*N, opus_int16); 1093 ALLOC(out, C*N, opus_int16);
1059 ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1094 ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
1060 if (ret>0) 1095 if (ret>0)
1061 for (j=0;j<C*ret;j++) 1096 for (j=0;j<C*ret;j++)
1062 pcm[j]=out[j]*(1.f/32768.f); 1097 pcm[j]=out[j]*(1.f/32768.f);
@@ -1070,7 +1105,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
1070 1105
1071int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) 1106int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
1072{ 1107{
1073 return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); 1108 return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
1074} 1109}
1075 1110
1076int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) 1111int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
@@ -1086,7 +1121,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data
1086 N = frame_size; 1121 N = frame_size;
1087 ALLOC(out, C*N, celt_sig); 1122 ALLOC(out, C*N, celt_sig);
1088 1123
1089 ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); 1124 ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
1090 1125
1091 if (ret>0) 1126 if (ret>0)
1092 for (j=0;j<C*ret;j++) 1127 for (j=0;j<C*ret;j++)