diff options
author | Nils Wallménius <nils@rockbox.org> | 2014-01-19 16:31:59 +0100 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2014-07-13 11:12:40 +0200 |
commit | 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch) | |
tree | 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/celt_decoder.c | |
parent | e557951c94c1efa769900257e466900f0ffeb53b (diff) | |
download | rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip |
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c
This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.
This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.
Speeds up decoding of the following test files:
H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz
Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/celt_decoder.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/celt_decoder.c | 325 |
1 files changed, 180 insertions, 145 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c index 77fa2d01f7..8af96b7931 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c +++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c | |||
@@ -51,6 +51,9 @@ | |||
51 | #include "celt_lpc.h" | 51 | #include "celt_lpc.h" |
52 | #include "vq.h" | 52 | #include "vq.h" |
53 | 53 | ||
54 | #if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) | ||
55 | #define NORM_ALIASING_HACK | ||
56 | #endif | ||
54 | /**********************************************************************/ | 57 | /**********************************************************************/ |
55 | /* */ | 58 | /* */ |
56 | /* DECODER */ | 59 | /* DECODER */ |
@@ -175,28 +178,24 @@ void opus_custom_decoder_destroy(CELTDecoder *st) | |||
175 | } | 178 | } |
176 | #endif /* CUSTOM_MODES */ | 179 | #endif /* CUSTOM_MODES */ |
177 | 180 | ||
178 | static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) | ||
179 | { | ||
180 | #ifdef FIXED_POINT | ||
181 | x = PSHR32(x, SIG_SHIFT); | ||
182 | x = MAX32(x, -32768); | ||
183 | x = MIN32(x, 32767); | ||
184 | return EXTRACT16(x); | ||
185 | #else | ||
186 | return (opus_val16)x; | ||
187 | #endif | ||
188 | } | ||
189 | 181 | ||
190 | #ifndef RESYNTH | 182 | #ifndef RESYNTH |
191 | static | 183 | static |
192 | #endif | 184 | #endif |
193 | void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) | 185 | void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, |
186 | celt_sig *mem, int accum) | ||
194 | { | 187 | { |
195 | int c; | 188 | int c; |
196 | int Nd; | 189 | int Nd; |
197 | int apply_downsampling=0; | 190 | int apply_downsampling=0; |
198 | opus_val16 coef0; | 191 | opus_val16 coef0; |
199 | 192 | VARDECL(celt_sig, scratch); | |
193 | SAVE_STACK; | ||
194 | #ifndef FIXED_POINT | ||
195 | (void)accum; | ||
196 | celt_assert(accum==0); | ||
197 | #endif | ||
198 | ALLOC(scratch, N, celt_sig); | ||
200 | coef0 = coef[0]; | 199 | coef0 = coef[0]; |
201 | Nd = N/downsample; | 200 | Nd = N/downsample; |
202 | c=0; do { | 201 | c=0; do { |
@@ -234,11 +233,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c | |||
234 | apply_downsampling=1; | 233 | apply_downsampling=1; |
235 | } else { | 234 | } else { |
236 | /* Shortcut for the standard (non-custom modes) case */ | 235 | /* Shortcut for the standard (non-custom modes) case */ |
237 | for (j=0;j<N;j++) | 236 | #ifdef FIXED_POINT |
237 | if (accum) | ||
238 | { | 238 | { |
239 | celt_sig tmp = x[j] + m + VERY_SMALL; | 239 | for (j=0;j<N;j++) |
240 | m = MULT16_32_Q15(coef0, tmp); | 240 | { |
241 | y[j*C] = SCALEOUT(SIG2WORD16(tmp)); | 241 | celt_sig tmp = x[j] + m + VERY_SMALL; |
242 | m = MULT16_32_Q15(coef0, tmp); | ||
243 | y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); | ||
244 | } | ||
245 | } else | ||
246 | #endif | ||
247 | { | ||
248 | for (j=0;j<N;j++) | ||
249 | { | ||
250 | celt_sig tmp = x[j] + m + VERY_SMALL; | ||
251 | m = MULT16_32_Q15(coef0, tmp); | ||
252 | y[j*C] = SCALEOUT(SIG2WORD16(tmp)); | ||
253 | } | ||
242 | } | 254 | } |
243 | } | 255 | } |
244 | mem[c] = m; | 256 | mem[c] = m; |
@@ -246,41 +258,94 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c | |||
246 | if (apply_downsampling) | 258 | if (apply_downsampling) |
247 | { | 259 | { |
248 | /* Perform down-sampling */ | 260 | /* Perform down-sampling */ |
249 | for (j=0;j<Nd;j++) | 261 | #ifdef FIXED_POINT |
250 | y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); | 262 | if (accum) |
263 | { | ||
264 | for (j=0;j<Nd;j++) | ||
265 | y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); | ||
266 | } else | ||
267 | #endif | ||
268 | { | ||
269 | for (j=0;j<Nd;j++) | ||
270 | y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); | ||
271 | } | ||
251 | } | 272 | } |
252 | } while (++c<C); | 273 | } while (++c<C); |
274 | RESTORE_STACK; | ||
253 | } | 275 | } |
254 | 276 | ||
255 | /** Compute the IMDCT and apply window for all sub-frames and | ||
256 | all channels in a frame */ | ||
257 | #ifndef RESYNTH | 277 | #ifndef RESYNTH |
258 | static | 278 | static |
259 | #endif | 279 | #endif |
260 | void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, | 280 | void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], |
261 | celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) | 281 | opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, |
282 | int LM, int downsample, int silence) | ||
262 | { | 283 | { |
263 | int b, c; | 284 | int c, i; |
285 | int M; | ||
286 | int b; | ||
264 | int B; | 287 | int B; |
265 | int N; | 288 | int N, NB; |
266 | int shift; | 289 | int shift; |
267 | const int overlap = OVERLAP(mode); | 290 | int nbEBands; |
291 | int overlap; | ||
292 | VARDECL(celt_sig, freq); | ||
293 | SAVE_STACK; | ||
268 | 294 | ||
269 | if (shortBlocks) | 295 | overlap = mode->overlap; |
296 | nbEBands = mode->nbEBands; | ||
297 | N = mode->shortMdctSize<<LM; | ||
298 | ALLOC(freq, N, celt_sig); /**< Interleaved signal MDCTs */ | ||
299 | M = 1<<LM; | ||
300 | |||
301 | if (isTransient) | ||
270 | { | 302 | { |
271 | B = shortBlocks; | 303 | B = M; |
272 | N = mode->shortMdctSize; | 304 | NB = mode->shortMdctSize; |
273 | shift = mode->maxLM; | 305 | shift = mode->maxLM; |
274 | } else { | 306 | } else { |
275 | B = 1; | 307 | B = 1; |
276 | N = mode->shortMdctSize<<LM; | 308 | NB = mode->shortMdctSize<<LM; |
277 | shift = mode->maxLM-LM; | 309 | shift = mode->maxLM-LM; |
278 | } | 310 | } |
279 | c=0; do { | 311 | |
280 | /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ | 312 | if (CC==2&&C==1) |
313 | { | ||
314 | /* Copying a mono streams to two channels */ | ||
315 | celt_sig *freq2; | ||
316 | denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, | ||
317 | downsample, silence); | ||
318 | /* Store a temporary copy in the output buffer because the IMDCT destroys its input. */ | ||
319 | freq2 = out_syn[1]+overlap/2; | ||
320 | OPUS_COPY(freq2, freq, N); | ||
281 | for (b=0;b<B;b++) | 321 | for (b=0;b<B;b++) |
282 | clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); | 322 | clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); |
283 | } while (++c<C); | 323 | for (b=0;b<B;b++) |
324 | clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); | ||
325 | } else if (CC==1&&C==2) | ||
326 | { | ||
327 | /* Downmixing a stereo stream to mono */ | ||
328 | celt_sig *freq2; | ||
329 | freq2 = out_syn[0]+overlap/2; | ||
330 | denormalise_bands(mode, X, freq, oldBandE, start, effEnd, M, | ||
331 | downsample, silence); | ||
332 | /* Use the output buffer as temp array before downmixing. */ | ||
333 | denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, | ||
334 | downsample, silence); | ||
335 | for (i=0;i<N;i++) | ||
336 | freq[i] = HALF32(ADD32(freq[i],freq2[i])); | ||
337 | for (b=0;b<B;b++) | ||
338 | clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); | ||
339 | } else { | ||
340 | /* Normal case (mono or stereo) */ | ||
341 | c=0; do { | ||
342 | denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, | ||
343 | downsample, silence); | ||
344 | for (b=0;b<B;b++) | ||
345 | clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); | ||
346 | } while (++c<CC); | ||
347 | } | ||
348 | RESTORE_STACK; | ||
284 | } | 349 | } |
285 | 350 | ||
286 | static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) | 351 | static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) |
@@ -330,7 +395,23 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, | |||
330 | pitch of 480 Hz. */ | 395 | pitch of 480 Hz. */ |
331 | #define PLC_PITCH_LAG_MIN (100) | 396 | #define PLC_PITCH_LAG_MIN (100) |
332 | 397 | ||
333 | static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) | 398 | static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) |
399 | { | ||
400 | int pitch_index; | ||
401 | VARDECL( opus_val16, lp_pitch_buf ); | ||
402 | SAVE_STACK; | ||
403 | ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); | ||
404 | pitch_downsample(decode_mem, lp_pitch_buf, | ||
405 | DECODE_BUFFER_SIZE, C, arch); | ||
406 | pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, | ||
407 | DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, | ||
408 | PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, arch); | ||
409 | pitch_index = PLC_PITCH_LAG_MAX-pitch_index; | ||
410 | RESTORE_STACK; | ||
411 | return pitch_index; | ||
412 | } | ||
413 | |||
414 | static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM) | ||
334 | { | 415 | { |
335 | int c; | 416 | int c; |
336 | int i; | 417 | int i; |
@@ -343,11 +424,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
343 | int nbEBands; | 424 | int nbEBands; |
344 | int overlap; | 425 | int overlap; |
345 | int start; | 426 | int start; |
346 | int downsample; | ||
347 | int loss_count; | 427 | int loss_count; |
348 | int noise_based; | 428 | int noise_based; |
349 | const opus_int16 *eBands; | 429 | const opus_int16 *eBands; |
350 | VARDECL(celt_sig, scratch); | ||
351 | SAVE_STACK; | 430 | SAVE_STACK; |
352 | 431 | ||
353 | mode = st->mode; | 432 | mode = st->mode; |
@@ -367,14 +446,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
367 | 446 | ||
368 | loss_count = st->loss_count; | 447 | loss_count = st->loss_count; |
369 | start = st->start; | 448 | start = st->start; |
370 | downsample = st->downsample; | ||
371 | noise_based = loss_count >= 5 || start != 0; | 449 | noise_based = loss_count >= 5 || start != 0; |
372 | ALLOC(scratch, noise_based?N*C:N, celt_sig); | ||
373 | if (noise_based) | 450 | if (noise_based) |
374 | { | 451 | { |
375 | /* Noise-based PLC/CNG */ | 452 | /* Noise-based PLC/CNG */ |
376 | celt_sig *freq; | 453 | #ifdef NORM_ALIASING_HACK |
454 | celt_norm *X; | ||
455 | #else | ||
377 | VARDECL(celt_norm, X); | 456 | VARDECL(celt_norm, X); |
457 | #endif | ||
378 | opus_uint32 seed; | 458 | opus_uint32 seed; |
379 | opus_val16 *plcLogE; | 459 | opus_val16 *plcLogE; |
380 | int end; | 460 | int end; |
@@ -383,10 +463,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
383 | end = st->end; | 463 | end = st->end; |
384 | effEnd = IMAX(start, IMIN(end, mode->effEBands)); | 464 | effEnd = IMAX(start, IMIN(end, mode->effEBands)); |
385 | 465 | ||
386 | /* Share the interleaved signal MDCT coefficient buffer with the | 466 | #ifdef NORM_ALIASING_HACK |
387 | deemphasis scratch buffer. */ | 467 | /* This is an ugly hack that breaks aliasing rules and would be easily broken, |
388 | freq = scratch; | 468 | but it saves almost 4kB of stack. */ |
469 | X = (celt_norm*)(out_syn[C-1]+overlap/2); | ||
470 | #else | ||
389 | ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ | 471 | ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ |
472 | #endif | ||
390 | 473 | ||
391 | if (loss_count >= 5) | 474 | if (loss_count >= 5) |
392 | plcLogE = backgroundLogE; | 475 | plcLogE = backgroundLogE; |
@@ -421,20 +504,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
421 | } | 504 | } |
422 | st->rng = seed; | 505 | st->rng = seed; |
423 | 506 | ||
424 | denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM); | ||
425 | |||
426 | c=0; do { | ||
427 | int bound = eBands[effEnd]<<LM; | ||
428 | if (downsample!=1) | ||
429 | bound = IMIN(bound, N/downsample); | ||
430 | for (i=bound;i<N;i++) | ||
431 | freq[c*N+i] = 0; | ||
432 | } while (++c<C); | ||
433 | c=0; do { | 507 | c=0; do { |
434 | OPUS_MOVE(decode_mem[c], decode_mem[c]+N, | 508 | OPUS_MOVE(decode_mem[c], decode_mem[c]+N, |
435 | DECODE_BUFFER_SIZE-N+(overlap>>1)); | 509 | DECODE_BUFFER_SIZE-N+(overlap>>1)); |
436 | } while (++c<C); | 510 | } while (++c<C); |
437 | compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); | 511 | |
512 | celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0); | ||
438 | } else { | 513 | } else { |
439 | /* Pitch-based PLC */ | 514 | /* Pitch-based PLC */ |
440 | const opus_val16 *window; | 515 | const opus_val16 *window; |
@@ -445,15 +520,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
445 | 520 | ||
446 | if (loss_count == 0) | 521 | if (loss_count == 0) |
447 | { | 522 | { |
448 | VARDECL( opus_val16, lp_pitch_buf ); | 523 | st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch); |
449 | ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); | ||
450 | pitch_downsample(decode_mem, lp_pitch_buf, | ||
451 | DECODE_BUFFER_SIZE, C, st->arch); | ||
452 | pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, | ||
453 | DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, | ||
454 | PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); | ||
455 | pitch_index = PLC_PITCH_LAG_MAX-pitch_index; | ||
456 | st->last_pitch_index = pitch_index; | ||
457 | } else { | 524 | } else { |
458 | pitch_index = st->last_pitch_index; | 525 | pitch_index = st->last_pitch_index; |
459 | fade = QCONST16(.8f,15); | 526 | fade = QCONST16(.8f,15); |
@@ -644,25 +711,23 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R | |||
644 | } while (++c<C); | 711 | } while (++c<C); |
645 | } | 712 | } |
646 | 713 | ||
647 | deemphasis(out_syn, pcm, N, C, downsample, | ||
648 | mode->preemph, st->preemph_memD, scratch); | ||
649 | |||
650 | st->loss_count = loss_count+1; | 714 | st->loss_count = loss_count+1; |
651 | 715 | ||
652 | RESTORE_STACK; | 716 | RESTORE_STACK; |
653 | } | 717 | } |
654 | 718 | ||
655 | #define FREQ_X_BUF_SIZE (2*8*120) /* stereo * nbShortMdcts * shortMdctSize */ | 719 | int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, |
656 | static celt_sig s_freq[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 7680 byte */ | 720 | int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) |
657 | static celt_norm s_X[FREQ_X_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; /* 3840 byte */ | ||
658 | int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) | ||
659 | { | 721 | { |
660 | int c, i, N; | 722 | int c, i, N; |
661 | int spread_decision; | 723 | int spread_decision; |
662 | opus_int32 bits; | 724 | opus_int32 bits; |
663 | ec_dec _dec; | 725 | ec_dec _dec; |
664 | VARDECL(celt_sig, freq); | 726 | #ifdef NORM_ALIASING_HACK |
727 | celt_norm *X; | ||
728 | #else | ||
665 | VARDECL(celt_norm, X); | 729 | VARDECL(celt_norm, X); |
730 | #endif | ||
666 | VARDECL(int, fine_quant); | 731 | VARDECL(int, fine_quant); |
667 | VARDECL(int, pulses); | 732 | VARDECL(int, pulses); |
668 | VARDECL(int, cap); | 733 | VARDECL(int, cap); |
@@ -680,6 +745,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
680 | int intra_ener; | 745 | int intra_ener; |
681 | const int CC = st->channels; | 746 | const int CC = st->channels; |
682 | int LM, M; | 747 | int LM, M; |
748 | int start; | ||
749 | int end; | ||
683 | int effEnd; | 750 | int effEnd; |
684 | int codedBands; | 751 | int codedBands; |
685 | int alloc_trim; | 752 | int alloc_trim; |
@@ -706,11 +773,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
706 | nbEBands = mode->nbEBands; | 773 | nbEBands = mode->nbEBands; |
707 | overlap = mode->overlap; | 774 | overlap = mode->overlap; |
708 | eBands = mode->eBands; | 775 | eBands = mode->eBands; |
776 | start = st->start; | ||
777 | end = st->end; | ||
709 | frame_size *= st->downsample; | 778 | frame_size *= st->downsample; |
710 | 779 | ||
711 | c=0; do { | ||
712 | decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); | ||
713 | } while (++c<CC); | ||
714 | lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); | 780 | lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); |
715 | oldBandE = lpc+CC*LPC_ORDER; | 781 | oldBandE = lpc+CC*LPC_ORDER; |
716 | oldLogE = oldBandE + 2*nbEBands; | 782 | oldLogE = oldBandE + 2*nbEBands; |
@@ -728,7 +794,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
728 | if (data0<0) | 794 | if (data0<0) |
729 | return OPUS_INVALID_PACKET; | 795 | return OPUS_INVALID_PACKET; |
730 | } | 796 | } |
731 | st->end = IMAX(1, mode->effEBands-2*(data0>>5)); | 797 | st->end = end = IMAX(1, mode->effEBands-2*(data0>>5)); |
732 | LM = (data0>>3)&0x3; | 798 | LM = (data0>>3)&0x3; |
733 | C = 1 + ((data0>>2)&0x1); | 799 | C = 1 + ((data0>>2)&0x1); |
734 | data++; | 800 | data++; |
@@ -755,14 +821,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
755 | return OPUS_BAD_ARG; | 821 | return OPUS_BAD_ARG; |
756 | 822 | ||
757 | N = M*mode->shortMdctSize; | 823 | N = M*mode->shortMdctSize; |
824 | c=0; do { | ||
825 | decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); | ||
826 | out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; | ||
827 | } while (++c<CC); | ||
758 | 828 | ||
759 | effEnd = st->end; | 829 | effEnd = end; |
760 | if (effEnd > mode->effEBands) | 830 | if (effEnd > mode->effEBands) |
761 | effEnd = mode->effEBands; | 831 | effEnd = mode->effEBands; |
762 | 832 | ||
763 | if (data == NULL || len<=1) | 833 | if (data == NULL || len<=1) |
764 | { | 834 | { |
765 | celt_decode_lost(st, pcm, N, LM); | 835 | celt_decode_lost(st, N, LM); |
836 | deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); | ||
766 | RESTORE_STACK; | 837 | RESTORE_STACK; |
767 | return frame_size/st->downsample; | 838 | return frame_size/st->downsample; |
768 | } | 839 | } |
@@ -798,7 +869,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
798 | postfilter_gain = 0; | 869 | postfilter_gain = 0; |
799 | postfilter_pitch = 0; | 870 | postfilter_pitch = 0; |
800 | postfilter_tapset = 0; | 871 | postfilter_tapset = 0; |
801 | if (st->start==0 && tell+16 <= total_bits) | 872 | if (start==0 && tell+16 <= total_bits) |
802 | { | 873 | { |
803 | if(ec_dec_bit_logp(dec, 1)) | 874 | if(ec_dec_bit_logp(dec, 1)) |
804 | { | 875 | { |
@@ -829,11 +900,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
829 | /* Decode the global flags (first symbols in the stream) */ | 900 | /* Decode the global flags (first symbols in the stream) */ |
830 | intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; | 901 | intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; |
831 | /* Get band energies */ | 902 | /* Get band energies */ |
832 | unquant_coarse_energy(mode, st->start, st->end, oldBandE, | 903 | unquant_coarse_energy(mode, start, end, oldBandE, |
833 | intra_ener, dec, C, LM); | 904 | intra_ener, dec, C, LM); |
834 | 905 | ||
835 | ALLOC(tf_res, nbEBands, int); | 906 | ALLOC(tf_res, nbEBands, int); |
836 | tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); | 907 | tf_decode(start, end, isTransient, tf_res, LM, dec); |
837 | 908 | ||
838 | tell = ec_tell(dec); | 909 | tell = ec_tell(dec); |
839 | spread_decision = SPREAD_NORMAL; | 910 | spread_decision = SPREAD_NORMAL; |
@@ -849,7 +920,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
849 | dynalloc_logp = 6; | 920 | dynalloc_logp = 6; |
850 | total_bits<<=BITRES; | 921 | total_bits<<=BITRES; |
851 | tell = ec_tell_frac(dec); | 922 | tell = ec_tell_frac(dec); |
852 | for (i=st->start;i<st->end;i++) | 923 | for (i=start;i<end;i++) |
853 | { | 924 | { |
854 | int width, quanta; | 925 | int width, quanta; |
855 | int dynalloc_loop_logp; | 926 | int dynalloc_loop_logp; |
@@ -888,21 +959,28 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
888 | ALLOC(pulses, nbEBands, int); | 959 | ALLOC(pulses, nbEBands, int); |
889 | ALLOC(fine_priority, nbEBands, int); | 960 | ALLOC(fine_priority, nbEBands, int); |
890 | 961 | ||
891 | codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, | 962 | codedBands = compute_allocation(mode, start, end, offsets, cap, |
892 | alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, | 963 | alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, |
893 | fine_quant, fine_priority, C, LM, dec, 0, 0, 0); | 964 | fine_quant, fine_priority, C, LM, dec, 0, 0, 0); |
894 | 965 | ||
895 | unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); | 966 | unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C); |
967 | |||
968 | c=0; do { | ||
969 | OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); | ||
970 | } while (++c<CC); | ||
896 | 971 | ||
897 | /* Decode fixed codebook */ | 972 | /* Decode fixed codebook */ |
898 | ALLOC(collapse_masks, C*nbEBands, unsigned char); | 973 | ALLOC(collapse_masks, C*nbEBands, unsigned char); |
899 | /**< Interleaved normalised MDCTs */ | ||
900 | if (FREQ_X_BUF_SIZE >= C*N) | ||
901 | X = s_X; | ||
902 | else | ||
903 | ALLOC(X, C*N, celt_norm); | ||
904 | 974 | ||
905 | quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, | 975 | #ifdef NORM_ALIASING_HACK |
976 | /* This is an ugly hack that breaks aliasing rules and would be easily broken, | ||
977 | but it saves almost 4kB of stack. */ | ||
978 | X = (celt_norm*)(out_syn[CC-1]+overlap/2); | ||
979 | #else | ||
980 | ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ | ||
981 | #endif | ||
982 | |||
983 | quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, | ||
906 | NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, | 984 | NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, |
907 | len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); | 985 | len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); |
908 | 986 | ||
@@ -911,58 +989,20 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
911 | anti_collapse_on = ec_dec_bits(dec, 1); | 989 | anti_collapse_on = ec_dec_bits(dec, 1); |
912 | } | 990 | } |
913 | 991 | ||
914 | unquant_energy_finalise(mode, st->start, st->end, oldBandE, | 992 | unquant_energy_finalise(mode, start, end, oldBandE, |
915 | fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); | 993 | fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); |
916 | 994 | ||
917 | if (anti_collapse_on) | 995 | if (anti_collapse_on) |
918 | anti_collapse(mode, X, collapse_masks, LM, C, N, | 996 | anti_collapse(mode, X, collapse_masks, LM, C, N, |
919 | st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); | 997 | start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); |
920 | |||
921 | /**< Interleaved signal MDCTs */ | ||
922 | if (FREQ_X_BUF_SIZE >= IMAX(CC,C)*N) | ||
923 | freq = s_freq; | ||
924 | else | ||
925 | ALLOC(freq, IMAX(CC,C)*N, celt_sig); | ||
926 | 998 | ||
927 | if (silence) | 999 | if (silence) |
928 | { | 1000 | { |
929 | for (i=0;i<C*nbEBands;i++) | 1001 | for (i=0;i<C*nbEBands;i++) |
930 | oldBandE[i] = -QCONST16(28.f,DB_SHIFT); | 1002 | oldBandE[i] = -QCONST16(28.f,DB_SHIFT); |
931 | for (i=0;i<C*N;i++) | ||
932 | freq[i] = 0; | ||
933 | } else { | ||
934 | /* Synthesis */ | ||
935 | denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); | ||
936 | } | 1003 | } |
937 | c=0; do { | ||
938 | OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); | ||
939 | } while (++c<CC); | ||
940 | 1004 | ||
941 | c=0; do { | 1005 | celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence); |
942 | int bound = M*eBands[effEnd]; | ||
943 | if (st->downsample!=1) | ||
944 | bound = IMIN(bound, N/st->downsample); | ||
945 | for (i=bound;i<N;i++) | ||
946 | freq[c*N+i] = 0; | ||
947 | } while (++c<C); | ||
948 | |||
949 | c=0; do { | ||
950 | out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; | ||
951 | } while (++c<CC); | ||
952 | |||
953 | if (CC==2&&C==1) | ||
954 | { | ||
955 | for (i=0;i<N;i++) | ||
956 | freq[N+i] = freq[i]; | ||
957 | } | ||
958 | if (CC==1&&C==2) | ||
959 | { | ||
960 | for (i=0;i<N;i++) | ||
961 | freq[i] = HALF32(ADD32(freq[i],freq[N+i])); | ||
962 | } | ||
963 | |||
964 | /* Compute inverse MDCTs */ | ||
965 | compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM); | ||
966 | 1006 | ||
967 | c=0; do { | 1007 | c=0; do { |
968 | st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); | 1008 | st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); |
@@ -989,18 +1029,14 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
989 | st->postfilter_tapset_old = st->postfilter_tapset; | 1029 | st->postfilter_tapset_old = st->postfilter_tapset; |
990 | } | 1030 | } |
991 | 1031 | ||
992 | if (C==1) { | 1032 | if (C==1) |
993 | for (i=0;i<nbEBands;i++) | 1033 | OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); |
994 | oldBandE[nbEBands+i]=oldBandE[i]; | ||
995 | } | ||
996 | 1034 | ||
997 | /* In case start or end were to change */ | 1035 | /* In case start or end were to change */ |
998 | if (!isTransient) | 1036 | if (!isTransient) |
999 | { | 1037 | { |
1000 | for (i=0;i<2*nbEBands;i++) | 1038 | OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); |
1001 | oldLogE2[i] = oldLogE[i]; | 1039 | OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); |
1002 | for (i=0;i<2*nbEBands;i++) | ||
1003 | oldLogE[i] = oldBandE[i]; | ||
1004 | for (i=0;i<2*nbEBands;i++) | 1040 | for (i=0;i<2*nbEBands;i++) |
1005 | backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); | 1041 | backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); |
1006 | } else { | 1042 | } else { |
@@ -1009,12 +1045,12 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
1009 | } | 1045 | } |
1010 | c=0; do | 1046 | c=0; do |
1011 | { | 1047 | { |
1012 | for (i=0;i<st->start;i++) | 1048 | for (i=0;i<start;i++) |
1013 | { | 1049 | { |
1014 | oldBandE[c*nbEBands+i]=0; | 1050 | oldBandE[c*nbEBands+i]=0; |
1015 | oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); | 1051 | oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); |
1016 | } | 1052 | } |
1017 | for (i=st->end;i<nbEBands;i++) | 1053 | for (i=end;i<nbEBands;i++) |
1018 | { | 1054 | { |
1019 | oldBandE[c*nbEBands+i]=0; | 1055 | oldBandE[c*nbEBands+i]=0; |
1020 | oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); | 1056 | oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); |
@@ -1022,8 +1058,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
1022 | } while (++c<2); | 1058 | } while (++c<2); |
1023 | st->rng = dec->rng; | 1059 | st->rng = dec->rng; |
1024 | 1060 | ||
1025 | /* We reuse freq[] as scratch space for the de-emphasis */ | 1061 | deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); |
1026 | deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); | ||
1027 | st->loss_count = 0; | 1062 | st->loss_count = 0; |
1028 | RESTORE_STACK; | 1063 | RESTORE_STACK; |
1029 | if (ec_tell(dec) > 8*len) | 1064 | if (ec_tell(dec) > 8*len) |
@@ -1039,7 +1074,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat | |||
1039 | #ifdef FIXED_POINT | 1074 | #ifdef FIXED_POINT |
1040 | int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) | 1075 | int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) |
1041 | { | 1076 | { |
1042 | return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); | 1077 | return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); |
1043 | } | 1078 | } |
1044 | 1079 | ||
1045 | #ifndef DISABLE_FLOAT_API | 1080 | #ifndef DISABLE_FLOAT_API |
@@ -1056,7 +1091,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char | |||
1056 | N = frame_size; | 1091 | N = frame_size; |
1057 | 1092 | ||
1058 | ALLOC(out, C*N, opus_int16); | 1093 | ALLOC(out, C*N, opus_int16); |
1059 | ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); | 1094 | ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); |
1060 | if (ret>0) | 1095 | if (ret>0) |
1061 | for (j=0;j<C*ret;j++) | 1096 | for (j=0;j<C*ret;j++) |
1062 | pcm[j]=out[j]*(1.f/32768.f); | 1097 | pcm[j]=out[j]*(1.f/32768.f); |
@@ -1070,7 +1105,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char | |||
1070 | 1105 | ||
1071 | int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) | 1106 | int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) |
1072 | { | 1107 | { |
1073 | return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); | 1108 | return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); |
1074 | } | 1109 | } |
1075 | 1110 | ||
1076 | int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) | 1111 | int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) |
@@ -1086,7 +1121,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data | |||
1086 | N = frame_size; | 1121 | N = frame_size; |
1087 | ALLOC(out, C*N, celt_sig); | 1122 | ALLOC(out, C*N, celt_sig); |
1088 | 1123 | ||
1089 | ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); | 1124 | ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); |
1090 | 1125 | ||
1091 | if (ret>0) | 1126 | if (ret>0) |
1092 | for (j=0;j<C*ret;j++) | 1127 | for (j=0;j<C*ret;j++) |