diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2012-10-06 23:35:19 +0200 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-10-06 23:43:05 +0200 |
commit | 2119f75af3e94a959451cd0dccde9ce38b19bcf0 (patch) | |
tree | ae48902b742982dd15807880b5fb80254d86d159 /lib/rbcodec/codecs/libopus | |
parent | d2875fc77370509248b638b54969f5ed625d8cac (diff) | |
download | rockbox-2119f75af3e94a959451cd0dccde9ce38b19bcf0.tar.gz rockbox-2119f75af3e94a959451cd0dccde9ce38b19bcf0.zip |
opus: full precision MULT32_32_Q31 (32*32=64>>31) multiplication
Replace complicated macro doing three 16*16 muls and add an inline
asm implementation for arm, speeds up decoding a 64kbps test file
by 0.5MHz on c200 (pp) and gives slightly better precision.
Change-Id: I6fc5b83c210f01bffdc38aec54cc5a8b646d8169
Signed-off-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libopus')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index 5682a6793d..53f513b67b 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h | |||
@@ -71,9 +71,23 @@ static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) | |||
71 | #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) | 71 | #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) |
72 | #endif | 72 | #endif |
73 | 73 | ||
74 | /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ | 74 | #if defined(CPU_ARM) |
75 | #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) | 75 | static inline int32_t MULT32_32_Q31(int32_t a, int32_t b) |
76 | { | ||
77 | int32_t lo, hi; | ||
78 | asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t" | ||
79 | "mov %[lo], %[lo], lsr #31 \n\t" | ||
80 | "orr %[hi], %[lo], %[hi], lsl #1 \n\t" | ||
81 | : [lo] "=&r" (lo), [hi] "=&r" (hi) | ||
82 | : [a] "r" (a), [b] "r" (b) ); | ||
83 | return(hi); | ||
84 | } | ||
76 | 85 | ||
86 | #else | ||
87 | /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ | ||
88 | //#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) | ||
89 | #define MULT32_32_Q31(a,b) (opus_val32)((((int64_t)(a)) * ((int64_t)(b)))>>31) | ||
90 | #endif | ||
77 | /** Compile-time conversion of float constant to 16-bit value */ | 91 | /** Compile-time conversion of float constant to 16-bit value */ |
78 | #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) | 92 | #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) |
79 | 93 | ||