summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/vq.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/vq.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/vq.c130
1 files changed, 78 insertions, 52 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index b047b22774..a6b5552d69 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -39,10 +39,6 @@
39#include "rate.h" 39#include "rate.h"
40#include "pitch.h" 40#include "pitch.h"
41 41
42#if defined(MIPSr1_ASM)
43#include "mips/vq_mipsr1.h"
44#endif
45
46#ifndef OVERRIDE_vq_exp_rotation1 42#ifndef OVERRIDE_vq_exp_rotation1
47static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 43static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
48{ 44{
@@ -71,7 +67,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
71} 67}
72#endif /* OVERRIDE_vq_exp_rotation1 */ 68#endif /* OVERRIDE_vq_exp_rotation1 */
73 69
74static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 70void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
75{ 71{
76 static const int SPREAD_FACTOR[3]={15,10,5}; 72 static const int SPREAD_FACTOR[3]={15,10,5};
77 int i; 73 int i;
@@ -162,42 +158,27 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
162 return collapse_mask; 158 return collapse_mask;
163} 159}
164 160
165unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 161opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
166#ifdef RESYNTH
167 , opus_val16 gain
168#endif
169 )
170{ 162{
171 VARDECL(celt_norm, y); 163 VARDECL(celt_norm, y);
172 VARDECL(int, iy); 164 VARDECL(int, signx);
173 VARDECL(opus_val16, signx);
174 int i, j; 165 int i, j;
175 opus_val16 s;
176 int pulsesLeft; 166 int pulsesLeft;
177 opus_val32 sum; 167 opus_val32 sum;
178 opus_val32 xy; 168 opus_val32 xy;
179 opus_val16 yy; 169 opus_val16 yy;
180 unsigned collapse_mask;
181 SAVE_STACK; 170 SAVE_STACK;
182 171
183 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 172 (void)arch;
184 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
185
186 ALLOC(y, N, celt_norm); 173 ALLOC(y, N, celt_norm);
187 ALLOC(iy, N, int); 174 ALLOC(signx, N, int);
188 ALLOC(signx, N, opus_val16);
189
190 exp_rotation(X, N, 1, B, K, spread);
191 175
192 /* Get rid of the sign */ 176 /* Get rid of the sign */
193 sum = 0; 177 sum = 0;
194 j=0; do { 178 j=0; do {
195 if (X[j]>0) 179 signx[j] = X[j]<0;
196 signx[j]=1; 180 /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
197 else { 181 X[j] = ABS16(X[j]);
198 signx[j]=-1;
199 X[j]=-X[j];
200 }
201 iy[j] = 0; 182 iy[j] = 0;
202 y[j] = 0; 183 y[j] = 0;
203 } while (++j<N); 184 } while (++j<N);
@@ -229,7 +210,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
229 while (++j<N); 210 while (++j<N);
230 sum = QCONST16(1.f,14); 211 sum = QCONST16(1.f,14);
231 } 212 }
232 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 213#ifdef FIXED_POINT
214 rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
215#else
216 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
217 rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
218#endif
233 j=0; do { 219 j=0; do {
234#ifdef FIXED_POINT 220#ifdef FIXED_POINT
235 /* It's really important to round *towards zero* here */ 221 /* It's really important to round *towards zero* here */
@@ -244,12 +230,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
244 pulsesLeft -= iy[j]; 230 pulsesLeft -= iy[j];
245 } while (++j<N); 231 } while (++j<N);
246 } 232 }
247 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 233 celt_sig_assert(pulsesLeft>=0);
248 234
249 /* This should never happen, but just in case it does (e.g. on silence) 235 /* This should never happen, but just in case it does (e.g. on silence)
250 we fill the first bin with pulses. */ 236 we fill the first bin with pulses. */
251#ifdef FIXED_POINT_DEBUG 237#ifdef FIXED_POINT_DEBUG
252 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 238 celt_sig_assert(pulsesLeft<=N+3);
253#endif 239#endif
254 if (pulsesLeft > N+3) 240 if (pulsesLeft > N+3)
255 { 241 {
@@ -260,12 +246,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
260 pulsesLeft=0; 246 pulsesLeft=0;
261 } 247 }
262 248
263 s = 1;
264 for (i=0;i<pulsesLeft;i++) 249 for (i=0;i<pulsesLeft;i++)
265 { 250 {
251 opus_val16 Rxy, Ryy;
266 int best_id; 252 int best_id;
267 opus_val32 best_num = -VERY_LARGE16; 253 opus_val32 best_num;
268 opus_val16 best_den = 0; 254 opus_val16 best_den;
269#ifdef FIXED_POINT 255#ifdef FIXED_POINT
270 int rshift; 256 int rshift;
271#endif 257#endif
@@ -275,10 +261,23 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
275 best_id = 0; 261 best_id = 0;
276 /* The squared magnitude term gets added anyway, so we might as well 262 /* The squared magnitude term gets added anyway, so we might as well
277 add it outside the loop */ 263 add it outside the loop */
278 yy = ADD32(yy, 1); 264 yy = ADD16(yy, 1);
279 j=0; 265
266 /* Calculations for position 0 are out of the loop, in part to reduce
267 mispredicted branches (since the if condition is usually false)
268 in the loop. */
269 /* Temporary sums of the new pulse(s) */
270 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
271 /* We're multiplying y[j] by two so we don't have to do it here */
272 Ryy = ADD16(yy, y[0]);
273
274 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
275 Rxy is positive because the sign is pre-computed) */
276 Rxy = MULT16_16_Q15(Rxy,Rxy);
277 best_den = Ryy;
278 best_num = Rxy;
279 j=1;
280 do { 280 do {
281 opus_val16 Rxy, Ryy;
282 /* Temporary sums of the new pulse(s) */ 281 /* Temporary sums of the new pulse(s) */
283 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 282 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
284 /* We're multiplying y[j] by two so we don't have to do it here */ 283 /* We're multiplying y[j] by two so we don't have to do it here */
@@ -289,8 +288,11 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
289 Rxy = MULT16_16_Q15(Rxy,Rxy); 288 Rxy = MULT16_16_Q15(Rxy,Rxy);
290 /* The idea is to check for num/den >= best_num/best_den, but that way 289 /* The idea is to check for num/den >= best_num/best_den, but that way
291 we can do it without any division */ 290 we can do it without any division */
292 /* OPT: Make sure to use conditional moves here */ 291 /* OPT: It's not clear whether a cmov is faster than a branch here
293 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 292 since the condition is more often false than true and using
293 a cmov introduces data dependencies across iterations. The optimal
294 choice may be architecture-dependent. */
295 if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
294 { 296 {
295 best_den = Ryy; 297 best_den = Ryy;
296 best_num = Rxy; 298 best_num = Rxy;
@@ -305,23 +307,47 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
305 307
306 /* Only now that we've made the final choice, update y/iy */ 308 /* Only now that we've made the final choice, update y/iy */
307 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 309 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
308 y[best_id] += 2*s; 310 y[best_id] += 2;
309 iy[best_id]++; 311 iy[best_id]++;
310 } 312 }
311 313
312 /* Put the original sign back */ 314 /* Put the original sign back */
313 j=0; 315 j=0;
314 do { 316 do {
315 X[j] = MULT16_16(signx[j],X[j]); 317 /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
316 if (signx[j] < 0) 318 /* OPT: The is more likely to be compiled without a branch than the code above
317 iy[j] = -iy[j]; 319 but has the same performance otherwise. */
320 iy[j] = (iy[j]^-signx[j]) + signx[j];
318 } while (++j<N); 321 } while (++j<N);
322 RESTORE_STACK;
323 return yy;
324}
325
326unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
327 opus_val16 gain, int resynth, int arch)
328{
329 VARDECL(int, iy);
330 opus_val16 yy;
331 unsigned collapse_mask;
332 SAVE_STACK;
333
334 celt_assert2(K>0, "alg_quant() needs at least one pulse");
335 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
336
337 /* Covers vectorization by up to 4. */
338 ALLOC(iy, N+3, int);
339
340 exp_rotation(X, N, 1, B, K, spread);
341
342 yy = op_pvq_search(X, iy, K, N, arch);
343
319 encode_pulses(iy, N, K, enc); 344 encode_pulses(iy, N, K, enc);
320 345
321#ifdef RESYNTH 346 if (resynth)
322 normalise_residual(iy, X, N, yy, gain); 347 {
323 exp_rotation(X, N, -1, B, K, spread); 348 normalise_residual(iy, X, N, yy, gain);
324#endif 349 exp_rotation(X, N, -1, B, K, spread);
350 }
325 351
326 collapse_mask = extract_collapse_mask(iy, N, B); 352 collapse_mask = extract_collapse_mask(iy, N, B);
327 RESTORE_STACK; 353 RESTORE_STACK;
@@ -350,7 +376,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
350} 376}
351 377
352#ifndef OVERRIDE_renormalise_vector 378#ifndef OVERRIDE_renormalise_vector
353void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 379void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
354{ 380{
355 int i; 381 int i;
356#ifdef FIXED_POINT 382#ifdef FIXED_POINT
@@ -360,7 +386,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
360 opus_val16 g; 386 opus_val16 g;
361 opus_val32 t; 387 opus_val32 t;
362 celt_norm *xptr; 388 celt_norm *xptr;
363 E = EPSILON + celt_inner_prod(X, X, N); 389 E = EPSILON + celt_inner_prod(X, X, N, arch);
364#ifdef FIXED_POINT 390#ifdef FIXED_POINT
365 k = celt_ilog2(E)>>1; 391 k = celt_ilog2(E)>>1;
366#endif 392#endif
@@ -377,7 +403,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
377} 403}
378#endif /* OVERRIDE_renormalise_vector */ 404#endif /* OVERRIDE_renormalise_vector */
379 405
380int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) 406int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
381{ 407{
382 int i; 408 int i;
383 int itheta; 409 int itheta;
@@ -396,8 +422,8 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
396 Eside = MAC16_16(Eside, s, s); 422 Eside = MAC16_16(Eside, s, s);
397 } 423 }
398 } else { 424 } else {
399 Emid += celt_inner_prod(X, X, N); 425 Emid += celt_inner_prod(X, X, N, arch);
400 Eside += celt_inner_prod(Y, Y, N); 426 Eside += celt_inner_prod(Y, Y, N, arch);
401 } 427 }
402 mid = celt_sqrt(Emid); 428 mid = celt_sqrt(Emid);
403 side = celt_sqrt(Eside); 429 side = celt_sqrt(Eside);
@@ -405,7 +431,7 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
405 /* 0.63662 = 2/pi */ 431 /* 0.63662 = 2/pi */
406 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 432 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
407#else 433#else
408 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 434 itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
409#endif 435#endif
410 436
411 return itheta; 437 return itheta;