From 2119f75af3e94a959451cd0dccde9ce38b19bcf0 Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sat, 6 Oct 2012 23:35:19 +0200 Subject: opus: full precision MULT32_32_Q31 (32*32=64>>31) multiplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace complicated macro doing three 16*16 muls and add an inline asm implementation for arm, speeds up decoding a 64kbps test file by 0.5MHz on c200 (pp) and gives slightly better precision. Change-Id: I6fc5b83c210f01bffdc38aec54cc5a8b646d8169 Signed-off-by: Nils Wallménius --- lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index 5682a6793d..53f513b67b 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h @@ -71,9 +71,23 @@ static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) #endif -/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ -#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) +#if defined(CPU_ARM) +static inline int32_t MULT32_32_Q31(int32_t a, int32_t b) +{ + int32_t lo, hi; + asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t" + "mov %[lo], %[lo], lsr #31 \n\t" + "orr %[hi], %[lo], %[hi], lsl #1 \n\t" + : [lo] "=&r" (lo), [hi] "=&r" (hi) + : [a] "r" (a), [b] "r" (b) ); + return(hi); +} +#else +/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +//#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) +#define MULT32_32_Q31(a,b) (opus_val32)((((int64_t)(a)) * ((int64_t)(b)))>>31) +#endif /** Compile-time conversion of float constant to 16-bit value */ #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) -- cgit v1.2.3