From afc6b3f0215037821382c957d975dfc8f727b2a9 Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 24 Sep 2012 23:14:58 +0200 Subject: opus: asm MULT16_32_Q15 for arm and cf Speeds up decoding of a 64kbps opus test file by 34MHz on h300 (cf), 24MHz on c200 (pp) and 13MHz on fuzev1 (amsv1) Change-Id: I0dce6b3bfe6c81d0a722dfebb13891b9a428c6ba --- lib/rbcodec/codecs/libopus/celt/fixed_generic.h | 27 +++++++++++++++++++++++++ lib/rbcodec/codecs/opus.c | 6 ++++++ 2 files changed, 33 insertions(+) diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h index 71e28d62a8..d2271e7972 100644 --- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h +++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h @@ -42,8 +42,35 @@ /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16)) +#if defined(CPU_COLDFIRE) +static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) +{ + asm volatile ("lsl.l #8, %[a];" + "lsl.l #8, %[a];" + "mac.l %[a], %[b], %%acc0;" + "movclr.l %%acc0, %[a];" + : [a] "+d" (a) + : [b] "d" (b) + : "cc"); + return a; +} + +#elif defined(CPU_ARM) +static inline int32_t MULT16_32_Q15(int32_t a, int32_t b) +{ + int32_t lo, hi; + asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t" + "mov %[lo], %[lo], lsr #15 \n\t" + "orr %[hi], %[lo], %[hi], lsl #17 \n\t" + : [lo] "=&r" (lo), [hi] "=&r" (hi) + : [a] "r" (a), [b] "r" (b) ); + return(hi); +} + +#else /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) +#endif /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) diff --git a/lib/rbcodec/codecs/opus.c b/lib/rbcodec/codecs/opus.c index 19bdb8daae..cf2d69738f 100644 --- a/lib/rbcodec/codecs/opus.c +++ b/lib/rbcodec/codecs/opus.c @@ -332,6 +332,12 @@ enum codec_status codec_run(void) } global_stack = 0; +#if defined(CPU_COLDFIRE) + /* EMAC rounding is disabled because of MULT16_32_Q15, which will be + inaccurate with rounding in its current incarnation */ + coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); +#endif + /* pre-init the ogg_sync_state buffer, so it won't need many reallocs */ ogg_sync_init(&oy); oy.storage = 64*1024; -- cgit v1.2.3