summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/arm
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/arm
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/arm')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h4
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h35
2 files changed, 39 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
index b690bc8cea..efb3b1896a 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
@@ -68,6 +68,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
68#undef MAC16_32_Q15 68#undef MAC16_32_Q15
69#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) 69#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
70 70
71/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
72 Result fits in 32 bits. */
73#undef MAC16_32_Q16
74#define MAC16_32_Q16(c, a, b) ADD32(c, MULT16_32_Q16(a, b))
71 75
72/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ 76/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
73#undef MULT32_32_Q31 77#undef MULT32_32_Q31
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
index 1194a7d3ec..36a6321101 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
@@ -82,6 +82,23 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
82} 82}
83#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) 83#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))
84 84
85/** 16x32 multiply, followed by a 16-bit shift right and 32-bit add.
86 Result fits in 32 bits. */
87#undef MAC16_32_Q16
88static OPUS_INLINE opus_val32 MAC16_32_Q16_armv5e(opus_val32 c, opus_val16 a,
89 opus_val32 b)
90{
91 int res;
92 __asm__(
93 "#MAC16_32_Q16\n\t"
94 "smlawb %0, %1, %2, %3;\n"
95 : "=r"(res)
96 : "r"(b), "r"(a), "r"(c)
97 );
98 return res;
99}
100#define MAC16_32_Q16(c, a, b) (MAC16_32_Q16_armv5e(c, a, b))
101
85/** 16x16 multiply-add where the result fits in 32 bits */ 102/** 16x16 multiply-add where the result fits in 32 bits */
86#undef MAC16_16 103#undef MAC16_16
87static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, 104static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a,
@@ -113,4 +130,22 @@ static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b)
113} 130}
114#define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) 131#define MULT16_16(a, b) (MULT16_16_armv5e(a, b))
115 132
133#ifdef OPUS_ARM_INLINE_MEDIA
134
135#undef SIG2WORD16
136static OPUS_INLINE opus_val16 SIG2WORD16_armv6(opus_val32 x)
137{
138 celt_sig res;
139 __asm__(
140 "#SIG2WORD16\n\t"
141 "ssat %0, #16, %1, ASR #12\n\t"
142 : "=r"(res)
143 : "r"(x+2048)
144 );
145 return EXTRACT16(res);
146}
147#define SIG2WORD16(x) (SIG2WORD16_armv6(x))
148
149#endif /* OPUS_ARM_INLINE_MEDIA */
150
116#endif 151#endif