diff options
author | Nils Wallménius <nils@rockbox.org> | 2012-09-24 23:14:58 +0200 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-09-25 11:40:59 +0200 |
commit | afc6b3f0215037821382c957d975dfc8f727b2a9 (patch) | |
tree | c6f5a46eecf88ca3ee2965ba425fafac156fd955 /lib/rbcodec/codecs/libopus/celt | |
parent | 06fc6fdd0a6e0758043fd7aebb98f5098a8344e4 (diff) | |
download | rockbox-afc6b3f0215037821382c957d975dfc8f727b2a9.tar.gz rockbox-afc6b3f0215037821382c957d975dfc8f727b2a9.zip |
opus: asm MULT16_32_Q15 for arm and cf
Speeds up decoding of a 64kbps opus test file by 34MHz on h300 (cf),
24MHz on c200 (pp) and 13MHz on fuzev1 (amsv1)
Change-Id: I0dce6b3bfe6c81d0a722dfebb13891b9a428c6ba
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index 71e28d62a8..d2271e7972 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h | |||
@@ -42,8 +42,35 @@ | |||
42 | /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ | 42 | /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ |
43 | #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16)) | 43 | #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16)) |
44 | 44 | ||
45 | #if defined(CPU_COLDFIRE) | ||
46 | static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) | ||
47 | { | ||
48 | asm volatile ("lsl.l #8, %[a];" | ||
49 | "lsl.l #8, %[a];" | ||
50 | "mac.l %[a], %[b], %%acc0;" | ||
51 | "movclr.l %%acc0, %[a];" | ||
52 | : [a] "+d" (a) | ||
53 | : [b] "d" (b) | ||
54 | : "cc"); | ||
55 | return a; | ||
56 | } | ||
57 | |||
58 | #elif defined(CPU_ARM) | ||
59 | static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) | ||
60 | { | ||
61 | int32_t lo, hi; | ||
62 | asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t" | ||
63 | "mov %[lo], %[lo], lsr #15 \n\t" | ||
64 | "orr %[hi], %[lo], %[hi], lsl #17 \n\t" | ||
65 | : [lo] "=&r" (lo), [hi] "=&r" (hi) | ||
66 | : [a] "r" (a), [b] "r" (b) ); | ||
67 | return(hi); | ||
68 | } | ||
69 | |||
70 | #else | ||
45 | /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ | 71 | /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ |
46 | #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) | 72 | #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) |
73 | #endif | ||
47 | 74 | ||
48 | /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ | 75 | /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ |
49 | #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) | 76 | #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) |