diff options
author | Nils Wallménius <nils@rockbox.org> | 2014-01-19 16:31:59 +0100 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2014-07-13 11:12:40 +0200 |
commit | 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch) | |
tree | 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/pitch.c | |
parent | e557951c94c1efa769900257e466900f0ffeb53b (diff) | |
download | rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip |
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c
This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.
This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.
Speeds up decoding of the following test files:
H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz
Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/pitch.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/pitch.c | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c index c28857297a..ee56a434f0 100644 --- a/lib/rbcodec/codecs/libopus/celt/pitch.c +++ b/lib/rbcodec/codecs/libopus/celt/pitch.c | |||
@@ -252,15 +252,15 @@ void | |||
252 | #endif | 252 | #endif |
253 | celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) | 253 | celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) |
254 | { | 254 | { |
255 | int i,j; | 255 | int i; |
256 | /*The EDSP version requires that max_pitch is at least 1, and that _x is | 256 | /*The EDSP version requires that max_pitch is at least 1, and that _x is |
257 | 32-bit aligned. | 257 | 32-bit aligned. |
258 | Since it's hard to put asserts in assembly, put them here.*/ | 258 | Since it's hard to put asserts in assembly, put them here.*/ |
259 | celt_assert(max_pitch>0); | ||
260 | celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); | ||
261 | #ifdef FIXED_POINT | 259 | #ifdef FIXED_POINT |
262 | opus_val32 maxcorr=1; | 260 | opus_val32 maxcorr=1; |
263 | #endif | 261 | #endif |
262 | celt_assert(max_pitch>0); | ||
263 | celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); | ||
264 | for (i=0;i<max_pitch-3;i+=4) | 264 | for (i=0;i<max_pitch-3;i+=4) |
265 | { | 265 | { |
266 | opus_val32 sum[4]={0,0,0,0}; | 266 | opus_val32 sum[4]={0,0,0,0}; |
@@ -279,9 +279,8 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr | |||
279 | /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ | 279 | /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ |
280 | for (;i<max_pitch;i++) | 280 | for (;i<max_pitch;i++) |
281 | { | 281 | { |
282 | opus_val32 sum = 0; | 282 | opus_val32 sum; |
283 | for (j=0;j<len;j++) | 283 | sum = celt_inner_prod(_x, _y+i, len); |
284 | sum = MAC16_16(sum, _x[j],_y[i+j]); | ||
285 | xcorr[i] = sum; | 284 | xcorr[i] = sum; |
286 | #ifdef FIXED_POINT | 285 | #ifdef FIXED_POINT |
287 | maxcorr = MAX32(maxcorr, sum); | 286 | maxcorr = MAX32(maxcorr, sum); |
@@ -361,12 +360,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR | |||
361 | #endif | 360 | #endif |
362 | for (i=0;i<max_pitch>>1;i++) | 361 | for (i=0;i<max_pitch>>1;i++) |
363 | { | 362 | { |
364 | opus_val32 sum=0; | 363 | opus_val32 sum; |
365 | xcorr[i] = 0; | 364 | xcorr[i] = 0; |
366 | if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) | 365 | if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) |
367 | continue; | 366 | continue; |
367 | #ifdef FIXED_POINT | ||
368 | sum = 0; | ||
368 | for (j=0;j<len>>1;j++) | 369 | for (j=0;j<len>>1;j++) |
369 | sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); | 370 | sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); |
371 | #else | ||
372 | sum = celt_inner_prod(x_lp, y+i, len>>1); | ||
373 | #endif | ||
370 | xcorr[i] = MAX32(-1, sum); | 374 | xcorr[i] = MAX32(-1, sum); |
371 | #ifdef FIXED_POINT | 375 | #ifdef FIXED_POINT |
372 | maxcorr = MAX32(maxcorr, sum); | 376 | maxcorr = MAX32(maxcorr, sum); |
@@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, | |||
457 | opus_val16 g1; | 461 | opus_val16 g1; |
458 | opus_val16 cont=0; | 462 | opus_val16 cont=0; |
459 | opus_val16 thresh; | 463 | opus_val16 thresh; |
460 | T1 = (2*T0+k)/(2*k); | 464 | T1 = celt_udiv(2*T0+k, 2*k); |
461 | if (T1 < minperiod) | 465 | if (T1 < minperiod) |
462 | break; | 466 | break; |
463 | /* Look for another strong correlation at T1b */ | 467 | /* Look for another strong correlation at T1b */ |
@@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, | |||
469 | T1b = T0+T1; | 473 | T1b = T0+T1; |
470 | } else | 474 | } else |
471 | { | 475 | { |
472 | T1b = (2*second_check[k]*T0+k)/(2*k); | 476 | T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); |
473 | } | 477 | } |
474 | dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); | 478 | dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); |
475 | xy += xy2; | 479 | xy += xy2; |
@@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, | |||
514 | pg = SHR32(frac_div32(best_xy,best_yy+1),16); | 518 | pg = SHR32(frac_div32(best_xy,best_yy+1),16); |
515 | 519 | ||
516 | for (k=0;k<3;k++) | 520 | for (k=0;k<3;k++) |
517 | { | 521 | xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); |
518 | int T1 = T+k-1; | ||
519 | xy = 0; | ||
520 | for (i=0;i<N;i++) | ||
521 | xy = MAC16_16(xy, x[i], x[i-T1]); | ||
522 | xcorr[k] = xy; | ||
523 | } | ||
524 | if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) | 522 | if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) |
525 | offset = 1; | 523 | offset = 1; |
526 | else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) | 524 | else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) |