summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/cwrs.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/cwrs.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/cwrs.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/cwrs.c40
1 files changed, 29 insertions, 11 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c
index 03b86985d2..921100fdc2 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.c
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c
@@ -460,10 +460,12 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
460 ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); 460 ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
461} 461}
462 462
463static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ 463static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
464 opus_uint32 p; 464 opus_uint32 p;
465 int s; 465 int s;
466 int k0; 466 int k0;
467 opus_int16 val;
468 opus_val32 yy=0;
467 celt_assert(_k>0); 469 celt_assert(_k>0);
468 celt_assert(_n>1); 470 celt_assert(_n>1);
469 while(_n>2){ 471 while(_n>2){
@@ -487,7 +489,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
487 } 489 }
488 else for(p=row[_k];p>_i;p=row[_k])_k--; 490 else for(p=row[_k];p>_i;p=row[_k])_k--;
489 _i-=p; 491 _i-=p;
490 *_y++=(k0-_k+s)^s; 492 val=(k0-_k+s)^s;
493 *_y++=val;
494 yy=MAC16_16(yy,val,val);
491 } 495 }
492 /*Lots of dimensions case:*/ 496 /*Lots of dimensions case:*/
493 else{ 497 else{
@@ -507,7 +511,9 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
507 do p=CELT_PVQ_U_ROW[--_k][_n]; 511 do p=CELT_PVQ_U_ROW[--_k][_n];
508 while(p>_i); 512 while(p>_i);
509 _i-=p; 513 _i-=p;
510 *_y++=(k0-_k+s)^s; 514 val=(k0-_k+s)^s;
515 *_y++=val;
516 yy=MAC16_16(yy,val,val);
511 } 517 }
512 } 518 }
513 _n--; 519 _n--;
@@ -519,14 +525,19 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
519 k0=_k; 525 k0=_k;
520 _k=(_i+1)>>1; 526 _k=(_i+1)>>1;
521 if(_k)_i-=2*_k-1; 527 if(_k)_i-=2*_k-1;
522 *_y++=(k0-_k+s)^s; 528 val=(k0-_k+s)^s;
529 *_y++=val;
530 yy=MAC16_16(yy,val,val);
523 /*_n==1*/ 531 /*_n==1*/
524 s=-(int)_i; 532 s=-(int)_i;
525 *_y=(_k+s)^s; 533 val=(_k+s)^s;
534 *_y=val;
535 yy=MAC16_16(yy,val,val);
536 return yy;
526} 537}
527 538
528void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 539opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
529 cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); 540 return cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
530} 541}
531 542
532#else /* SMALL_FOOTPRINT */ 543#else /* SMALL_FOOTPRINT */
@@ -591,8 +602,10 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
591 _y: Returns the vector of pulses. 602 _y: Returns the vector of pulses.
592 _u: Must contain entries [0..._k+1] of row _n of U() on input. 603 _u: Must contain entries [0..._k+1] of row _n of U() on input.
593 Its contents will be destructively modified.*/ 604 Its contents will be destructively modified.*/
594static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ 605static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
595 int j; 606 int j;
607 opus_int16 val;
608 opus_val32 yy=0;
596 celt_assert(_n>0); 609 celt_assert(_n>0);
597 j=0; 610 j=0;
598 do{ 611 do{
@@ -607,10 +620,13 @@ static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
607 while(p>_i)p=_u[--_k]; 620 while(p>_i)p=_u[--_k];
608 _i-=p; 621 _i-=p;
609 yj-=_k; 622 yj-=_k;
610 _y[j]=(yj+s)^s; 623 val=(yj+s)^s;
624 _y[j]=val;
625 yy=MAC16_16(yy,val,val);
611 uprev(_u,_k+2,0); 626 uprev(_u,_k+2,0);
612 } 627 }
613 while(++j<_n); 628 while(++j<_n);
629 return yy;
614} 630}
615 631
616/*Returns the index of the given combination of K elements chosen from a set 632/*Returns the index of the given combination of K elements chosen from a set
@@ -685,13 +701,15 @@ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
685 RESTORE_STACK; 701 RESTORE_STACK;
686} 702}
687 703
688void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ 704opus_val32 decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
689 VARDECL(opus_uint32,u); 705 VARDECL(opus_uint32,u);
706 int ret;
690 SAVE_STACK; 707 SAVE_STACK;
691 celt_assert(_k>0); 708 celt_assert(_k>0);
692 ALLOC(u,_k+2U,opus_uint32); 709 ALLOC(u,_k+2U,opus_uint32);
693 cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); 710 ret = cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
694 RESTORE_STACK; 711 RESTORE_STACK;
712 return ret;
695} 713}
696 714
697#endif /* SMALL_FOOTPRINT */ 715#endif /* SMALL_FOOTPRINT */