summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/pitch.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/pitch.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/pitch.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/pitch.c30
1 files changed, 14 insertions, 16 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index c28857297a..ee56a434f0 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -252,15 +252,15 @@ void
252#endif 252#endif
253celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) 253celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
254{ 254{
255 int i,j; 255 int i;
256 /*The EDSP version requires that max_pitch is at least 1, and that _x is 256 /*The EDSP version requires that max_pitch is at least 1, and that _x is
257 32-bit aligned. 257 32-bit aligned.
258 Since it's hard to put asserts in assembly, put them here.*/ 258 Since it's hard to put asserts in assembly, put them here.*/
259 celt_assert(max_pitch>0);
260 celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
261#ifdef FIXED_POINT 259#ifdef FIXED_POINT
262 opus_val32 maxcorr=1; 260 opus_val32 maxcorr=1;
263#endif 261#endif
262 celt_assert(max_pitch>0);
263 celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
264 for (i=0;i<max_pitch-3;i+=4) 264 for (i=0;i<max_pitch-3;i+=4)
265 { 265 {
266 opus_val32 sum[4]={0,0,0,0}; 266 opus_val32 sum[4]={0,0,0,0};
@@ -279,9 +279,8 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
279 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ 279 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
280 for (;i<max_pitch;i++) 280 for (;i<max_pitch;i++)
281 { 281 {
282 opus_val32 sum = 0; 282 opus_val32 sum;
283 for (j=0;j<len;j++) 283 sum = celt_inner_prod(_x, _y+i, len);
284 sum = MAC16_16(sum, _x[j],_y[i+j]);
285 xcorr[i] = sum; 284 xcorr[i] = sum;
286#ifdef FIXED_POINT 285#ifdef FIXED_POINT
287 maxcorr = MAX32(maxcorr, sum); 286 maxcorr = MAX32(maxcorr, sum);
@@ -361,12 +360,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
361#endif 360#endif
362 for (i=0;i<max_pitch>>1;i++) 361 for (i=0;i<max_pitch>>1;i++)
363 { 362 {
364 opus_val32 sum=0; 363 opus_val32 sum;
365 xcorr[i] = 0; 364 xcorr[i] = 0;
366 if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) 365 if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
367 continue; 366 continue;
367#ifdef FIXED_POINT
368 sum = 0;
368 for (j=0;j<len>>1;j++) 369 for (j=0;j<len>>1;j++)
369 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 370 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
371#else
372 sum = celt_inner_prod(x_lp, y+i, len>>1);
373#endif
370 xcorr[i] = MAX32(-1, sum); 374 xcorr[i] = MAX32(-1, sum);
371#ifdef FIXED_POINT 375#ifdef FIXED_POINT
372 maxcorr = MAX32(maxcorr, sum); 376 maxcorr = MAX32(maxcorr, sum);
@@ -457,7 +461,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
457 opus_val16 g1; 461 opus_val16 g1;
458 opus_val16 cont=0; 462 opus_val16 cont=0;
459 opus_val16 thresh; 463 opus_val16 thresh;
460 T1 = (2*T0+k)/(2*k); 464 T1 = celt_udiv(2*T0+k, 2*k);
461 if (T1 < minperiod) 465 if (T1 < minperiod)
462 break; 466 break;
463 /* Look for another strong correlation at T1b */ 467 /* Look for another strong correlation at T1b */
@@ -469,7 +473,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
469 T1b = T0+T1; 473 T1b = T0+T1;
470 } else 474 } else
471 { 475 {
472 T1b = (2*second_check[k]*T0+k)/(2*k); 476 T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
473 } 477 }
474 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 478 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
475 xy += xy2; 479 xy += xy2;
@@ -514,13 +518,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
514 pg = SHR32(frac_div32(best_xy,best_yy+1),16); 518 pg = SHR32(frac_div32(best_xy,best_yy+1),16);
515 519
516 for (k=0;k<3;k++) 520 for (k=0;k<3;k++)
517 { 521 xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);
518 int T1 = T+k-1;
519 xy = 0;
520 for (i=0;i<N;i++)
521 xy = MAC16_16(xy, x[i], x[i-T1]);
522 xcorr[k] = xy;
523 }
524 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) 522 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
525 offset = 1; 523 offset = 1;
526 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) 524 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))