summaryrefslogtreecommitdiff
path: root/lib/rbcodec
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2012-10-06 14:17:30 +0200
committerNils Wallménius <nils@rockbox.org>2012-10-06 14:25:20 +0200
commitdceec0909295b56c140b83cd6f8d019fddb2b689 (patch)
treebdd49c4367ec988caecd0fe17e5a54284dbb8cef /lib/rbcodec
parent78ca74a56f1b7535468e77e1af99ca7ea3097b6e (diff)
downloadrockbox-dceec0909295b56c140b83cd6f8d019fddb2b689.tar.gz
rockbox-dceec0909295b56c140b83cd6f8d019fddb2b689.zip
opus: speed up comb_filter
Skip expensive multiply-accumulate loop when gains are 0 and just copy using memcpy if soure and destination are not the same Speeds up decoding of a 64kbps test file by 6MHz on h300 (cf) 7MHz on c200 (pp) and 6MHz on fuzev1 (amsv1) Change-Id: Ibbc9ddfd45a9ac661467b1327b8c67761924fb8b Signed-off-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt.c90
1 files changed, 58 insertions, 32 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 74ebee91b4..a4e5131a04 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -497,43 +497,69 @@ static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
497 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 497 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
498 const opus_val16 *window, int overlap) 498 const opus_val16 *window, int overlap)
499{ 499{
500 int i; 500 /* Multiply-adds are only needed if g0 or g1 are non-zero. In all other cases a simple
501 /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ 501 * copy of vector x to y is possible. */
502 opus_val16 g00, g01, g02, g10, g11, g12; 502 if (g0!=0 || g1!=0)
503 static const opus_val16 gains[3][3] = { 503 {
504 int i;
505 opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1;
506 static const opus_val16 gains[3][3] = {
504 {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, 507 {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
505 {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, 508 {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
506 {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; 509 {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
507 g00 = MULT16_16_Q15(g0, gains[tapset0][0]); 510 g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
508 g01 = MULT16_16_Q15(g0, gains[tapset0][1]); 511 g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
509 g02 = MULT16_16_Q15(g0, gains[tapset0][2]); 512 g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
510 g10 = MULT16_16_Q15(g1, gains[tapset1][0]); 513 g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
511 g11 = MULT16_16_Q15(g1, gains[tapset1][1]); 514 g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
512 g12 = MULT16_16_Q15(g1, gains[tapset1][2]); 515 g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
513 for (i=0;i<overlap;i++) 516 /* printf("g0 %d g1 %d\n", g0,g1); */
517 idx0 = -T0;
518 idx1 = -T1;
519 for (i=0;i<overlap;i++,idx0++,idx1++)
520 {
521 opus_val16 f0, f1;
522 f1 = MULT16_16_Q15(window[i],window[i]);
523 f0 = Q15ONE - f1;
524 y[i] = x[i]
525 + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0-2])
526 + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0-1])
527 + MULT16_32_Q15(MULT16_16_Q15(f0,g00), x[idx0 ])
528 + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0+1])
529 + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0+2])
530 + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1-2])
531 + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1-1])
532 + MULT16_32_Q15(MULT16_16_Q15(f1,g10), x[idx1 ])
533 + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1+1])
534 + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1+2]);
535 }
536 /* No multiply-add required if g1=0 as all multiplicants are =0. */
537 if (g1!=0)
538 {
539 idx1 = overlap-T1;
540 for (i=overlap;i<N;i++,idx1++)
541 {
542 y[i] = x[i]
543 + MULT16_32_Q15(g12, x[idx1-2])
544 + MULT16_32_Q15(g11, x[idx1-1])
545 + MULT16_32_Q15(g10, x[idx1 ])
546 + MULT16_32_Q15(g11, x[idx1+1])
547 + MULT16_32_Q15(g12, x[idx1+2]);
548 }
549 }
550 /* Only perform vector copy if source and destination are not same. */
551 else if (x != y)
552 {
553 /* Copy part of vector from x[overlap..N] to y[overlap..N] */
554 OPUS_COPY(y+overlap, x+overlap, N-overlap);
555 }
556 }
557 /* Only perform vector copy if source and destination are not same. */
558 else if (x != y)
514 { 559 {
515 opus_val16 f; 560 /* Copy full vector from x[0..N] to y[0..N] */
516 f = MULT16_16_Q15(window[i],window[i]); 561 OPUS_COPY(y, x, N);
517 y[i] = x[i]
518 + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
519 + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0-1])
520 + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0+1])
521 + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0-2])
522 + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+2])
523 + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1])
524 + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1-1])
525 + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1+1])
526 + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1-2])
527 + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+2]);
528
529 } 562 }
530 for (i=overlap;i<N;i++)
531 y[i] = x[i]
532 + MULT16_32_Q15(g10,x[i-T1])
533 + MULT16_32_Q15(g11,x[i-T1-1])
534 + MULT16_32_Q15(g11,x[i-T1+1])
535 + MULT16_32_Q15(g12,x[i-T1-2])
536 + MULT16_32_Q15(g12,x[i-T1+2]);
537} 563}
538 564
539static const signed char tf_select_table[4][8] = { 565static const signed char tf_select_table[4][8] = {