summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/celt.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/celt.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/celt.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt.c79
1 files changed, 73 insertions, 6 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 3e0ce6e6a5..c0a1e0dab9 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -54,6 +54,10 @@
54#define PACKAGE_VERSION "unknown" 54#define PACKAGE_VERSION "unknown"
55#endif 55#endif
56 56
57#if defined(MIPSr1_ASM)
58#include "mips/celt_mipsr1.h"
59#endif
60
57 61
58int resampling_factor(opus_int32 rate) 62int resampling_factor(opus_int32 rate)
59{ 63{
@@ -86,6 +90,63 @@ int resampling_factor(opus_int32 rate)
86} 90}
87 91
88#ifndef OVERRIDE_COMB_FILTER_CONST 92#ifndef OVERRIDE_COMB_FILTER_CONST
93/* This version should be faster on ARM */
94#ifdef OPUS_ARM_ASM
95static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
96 opus_val16 g10, opus_val16 g11, opus_val16 g12)
97{
98 opus_val32 x0, x1, x2, x3, x4;
99 int i;
100 x4 = SHL32(x[-T-2], 1);
101 x3 = SHL32(x[-T-1], 1);
102 x2 = SHL32(x[-T], 1);
103 x1 = SHL32(x[-T+1], 1);
104 for (i=0;i<N-4;i+=5)
105 {
106 opus_val32 t;
107 x0=SHL32(x[i-T+2],1);
108 t = MAC16_32_Q16(x[i], g10, x2);
109 t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
110 t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
111 y[i] = t;
112 x4=SHL32(x[i-T+3],1);
113 t = MAC16_32_Q16(x[i+1], g10, x1);
114 t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
115 t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
116 y[i+1] = t;
117 x3=SHL32(x[i-T+4],1);
118 t = MAC16_32_Q16(x[i+2], g10, x0);
119 t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
120 t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
121 y[i+2] = t;
122 x2=SHL32(x[i-T+5],1);
123 t = MAC16_32_Q16(x[i+3], g10, x4);
124 t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
125 t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
126 y[i+3] = t;
127 x1=SHL32(x[i-T+6],1);
128 t = MAC16_32_Q16(x[i+4], g10, x3);
129 t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
130 t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
131 y[i+4] = t;
132 }
133#ifdef CUSTOM_MODES
134 for (;i<N;i++)
135 {
136 opus_val32 t;
137 x0=SHL32(x[i-T+2],1);
138 t = MAC16_32_Q16(x[i], g10, x2);
139 t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
140 t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
141 y[i] = t;
142 x4=x3;
143 x3=x2;
144 x2=x1;
145 x1=x0;
146 }
147#endif
148}
149#else
89static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 150static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
90 opus_val16 g10, opus_val16 g11, opus_val16 g12) 151 opus_val16 g10, opus_val16 g11, opus_val16 g12)
91{ 152{
@@ -110,7 +171,9 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
110 171
111} 172}
112#endif 173#endif
174#endif
113 175
176#ifndef OVERRIDE_comb_filter
114void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 177void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
115 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 178 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
116 const opus_val16 *window, int overlap) 179 const opus_val16 *window, int overlap)
@@ -131,16 +194,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
131 OPUS_MOVE(y, x, N); 194 OPUS_MOVE(y, x, N);
132 return; 195 return;
133 } 196 }
134 g00 = MULT16_16_Q15(g0, gains[tapset0][0]); 197 g00 = MULT16_16_P15(g0, gains[tapset0][0]);
135 g01 = MULT16_16_Q15(g0, gains[tapset0][1]); 198 g01 = MULT16_16_P15(g0, gains[tapset0][1]);
136 g02 = MULT16_16_Q15(g0, gains[tapset0][2]); 199 g02 = MULT16_16_P15(g0, gains[tapset0][2]);
137 g10 = MULT16_16_Q15(g1, gains[tapset1][0]); 200 g10 = MULT16_16_P15(g1, gains[tapset1][0]);
138 g11 = MULT16_16_Q15(g1, gains[tapset1][1]); 201 g11 = MULT16_16_P15(g1, gains[tapset1][1]);
139 g12 = MULT16_16_Q15(g1, gains[tapset1][2]); 202 g12 = MULT16_16_P15(g1, gains[tapset1][2]);
140 x1 = x[-T1+1]; 203 x1 = x[-T1+1];
141 x2 = x[-T1 ]; 204 x2 = x[-T1 ];
142 x3 = x[-T1-1]; 205 x3 = x[-T1-1];
143 x4 = x[-T1-2]; 206 x4 = x[-T1-2];
207 /* If the filter didn't change, we don't need the overlap */
208 if (g0==g1 && T0==T1 && tapset0==tapset1)
209 overlap=0;
144 for (i=0;i<overlap;i++) 210 for (i=0;i<overlap;i++)
145 { 211 {
146 opus_val16 f; 212 opus_val16 f;
@@ -170,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
170 /* Compute the part with the constant filter. */ 236 /* Compute the part with the constant filter. */
171 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); 237 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
172} 238}
239#endif /* OVERRIDE_comb_filter */
173 240
174const signed char tf_select_table[4][8] = { 241const signed char tf_select_table[4][8] = {
175 {0, -1, 0, -1, 0,-1, 0,-1}, 242 {0, -1, 0, -1, 0,-1, 0,-1},