From dceec0909295b56c140b83cd6f8d019fddb2b689 Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sat, 6 Oct 2012 14:17:30 +0200 Subject: opus: speed up comb_filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip expensive multiply-accumulate loop when gains are 0 and just copy using memcpy if soure and destination are not the same Speeds up decoding of a 64kbps test file by 6MHz on h300 (cf) 7MHz on c200 (pp) and 6MHz on fuzev1 (amsv1) Change-Id: Ibbc9ddfd45a9ac661467b1327b8c67761924fb8b Signed-off-by: Nils Wallménius --- lib/rbcodec/codecs/libopus/celt/celt.c | 90 ++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c index 74ebee91b4..a4e5131a04 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt.c +++ b/lib/rbcodec/codecs/libopus/celt/celt.c @@ -497,43 +497,69 @@ static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap) { - int i; - /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ - opus_val16 g00, g01, g02, g10, g11, g12; - static const opus_val16 gains[3][3] = { + /* Multiply-adds are only needed if g0 or g1 are non-zero. In all other cases a simple + * copy of vector x to y is possible. */ + if (g0!=0 || g1!=0) + { + int i; + opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1; + static const opus_val16 gains[3][3] = { {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; - g00 = MULT16_16_Q15(g0, gains[tapset0][0]); - g01 = MULT16_16_Q15(g0, gains[tapset0][1]); - g02 = MULT16_16_Q15(g0, gains[tapset0][2]); - g10 = MULT16_16_Q15(g1, gains[tapset1][0]); - g11 = MULT16_16_Q15(g1, gains[tapset1][1]); - g12 = MULT16_16_Q15(g1, gains[tapset1][2]); - for (i=0;i