summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/vq.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2014-01-19 16:31:59 +0100
committerNils Wallménius <nils@rockbox.org>2014-07-13 11:12:40 +0200
commit9b7ec42403073ee887efc531c153e6b1b6c15bab (patch)
tree07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/vq.c
parente557951c94c1efa769900257e466900f0ffeb53b (diff)
downloadrockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz
rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c This brings in a bunch of optimizations to decode speed and memory usage. Allocations are switched from using the pseudostack to using the real stack. Enabled hacks to reduce stack usage. This should fix crashes on sansa clip, although some files will not play due to failing allocations in the codec buffer. Speeds up decoding of the following test files: H300 (cf) C200 (arm7tdmi) ipod classic (arm9e) 16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz 64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz 128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/vq.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/vq.c69
1 files changed, 31 insertions, 38 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index af991bb052..b047b22774 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -37,19 +37,27 @@
37#include "os_support.h" 37#include "os_support.h"
38#include "bands.h" 38#include "bands.h"
39#include "rate.h" 39#include "rate.h"
40#include "pitch.h"
40 41
42#if defined(MIPSr1_ASM)
43#include "mips/vq_mipsr1.h"
44#endif
45
46#ifndef OVERRIDE_vq_exp_rotation1
41static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 47static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
42{ 48{
43 int i; 49 int i;
50 opus_val16 ms;
44 celt_norm *Xptr; 51 celt_norm *Xptr;
45 Xptr = X; 52 Xptr = X;
53 ms = NEG16(s);
46 for (i=0;i<len-stride;i++) 54 for (i=0;i<len-stride;i++)
47 { 55 {
48 celt_norm x1, x2; 56 celt_norm x1, x2;
49 x1 = Xptr[0]; 57 x1 = Xptr[0];
50 x2 = Xptr[stride]; 58 x2 = Xptr[stride];
51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 59 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 60 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
53 } 61 }
54 Xptr = &X[len-2*stride-1]; 62 Xptr = &X[len-2*stride-1];
55 for (i=len-2*stride-1;i>=0;i--) 63 for (i=len-2*stride-1;i>=0;i--)
@@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
57 celt_norm x1, x2; 65 celt_norm x1, x2;
58 x1 = Xptr[0]; 66 x1 = Xptr[0];
59 x2 = Xptr[stride]; 67 x2 = Xptr[stride];
60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 68 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 69 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
62 } 70 }
63} 71}
72#endif /* OVERRIDE_vq_exp_rotation1 */
64 73
65static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 74static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
66{ 75{
@@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
91 } 100 }
92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 101 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
93 extract_collapse_mask().*/ 102 extract_collapse_mask().*/
94 len /= stride; 103 len = celt_udiv(len, stride);
95 for (i=0;i<stride;i++) 104 for (i=0;i<stride;i++)
96 { 105 {
97 if (dir < 0) 106 if (dir < 0)
@@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
140 return 1; 149 return 1;
141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 150 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
142 exp_rotation().*/ 151 exp_rotation().*/
143 N0 = N/B; 152 N0 = celt_udiv(N, B);
144 collapse_mask = 0; 153 collapse_mask = 0;
145 i=0; do { 154 i=0; do {
146 int j; 155 int j;
156 unsigned tmp=0;
147 j=0; do { 157 j=0; do {
148 collapse_mask |= (iy[i*N0+j]!=0)<<i; 158 tmp |= iy[i*N0+j];
149 } while (++j<N0); 159 } while (++j<N0);
160 collapse_mask |= (tmp!=0)<<i;
150 } while (++i<B); 161 } while (++i<B);
151 return collapse_mask; 162 return collapse_mask;
152} 163}
@@ -322,47 +333,34 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
322unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 333unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
323 ec_dec *dec, opus_val16 gain) 334 ec_dec *dec, opus_val16 gain)
324{ 335{
325 int i;
326 opus_val32 Ryy; 336 opus_val32 Ryy;
327 unsigned collapse_mask; 337 unsigned collapse_mask;
328/* VARDECL(int, iy); 338 VARDECL(int, iy);
329 SAVE_STACK; */ 339 SAVE_STACK;
330
331 /* the difference between the last two values of eband5ms shifted by maxLM
332 which is 22 << 3 with the static mode */
333 int iy[176];
334 340
335 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 341 celt_assert2(K>0, "alg_unquant() needs at least one pulse");
336 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 342 celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
337/* ALLOC(iy, N, int); */ 343 ALLOC(iy, N, int);
338 decode_pulses(iy, N, K, dec); 344 Ryy = decode_pulses(iy, N, K, dec);
339 Ryy = 0;
340 i=0;
341 do {
342 Ryy = MAC16_16(Ryy, iy[i], iy[i]);
343 } while (++i < N);
344 normalise_residual(iy, X, N, Ryy, gain); 345 normalise_residual(iy, X, N, Ryy, gain);
345 exp_rotation(X, N, -1, B, K, spread); 346 exp_rotation(X, N, -1, B, K, spread);
346 collapse_mask = extract_collapse_mask(iy, N, B); 347 collapse_mask = extract_collapse_mask(iy, N, B);
347/* RESTORE_STACK; */ 348 RESTORE_STACK;
348 return collapse_mask; 349 return collapse_mask;
349} 350}
350 351
352#ifndef OVERRIDE_renormalise_vector
351void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 353void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
352{ 354{
353 int i; 355 int i;
354#ifdef FIXED_POINT 356#ifdef FIXED_POINT
355 int k; 357 int k;
356#endif 358#endif
357 opus_val32 E = EPSILON; 359 opus_val32 E;
358 opus_val16 g; 360 opus_val16 g;
359 opus_val32 t; 361 opus_val32 t;
360 celt_norm *xptr = X; 362 celt_norm *xptr;
361 for (i=0;i<N;i++) 363 E = EPSILON + celt_inner_prod(X, X, N);
362 {
363 E = MAC16_16(E, *xptr, *xptr);
364 xptr++;
365 }
366#ifdef FIXED_POINT 364#ifdef FIXED_POINT
367 k = celt_ilog2(E)>>1; 365 k = celt_ilog2(E)>>1;
368#endif 366#endif
@@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
377 } 375 }
378 /*return celt_sqrt(E);*/ 376 /*return celt_sqrt(E);*/
379} 377}
378#endif /* OVERRIDE_renormalise_vector */
380 379
381int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) 380int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
382{ 381{
383 int i; 382 int i;
384 int itheta; 383 int itheta;
@@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
397 Eside = MAC16_16(Eside, s, s); 396 Eside = MAC16_16(Eside, s, s);
398 } 397 }
399 } else { 398 } else {
400 for (i=0;i<N;i++) 399 Emid += celt_inner_prod(X, X, N);
401 { 400 Eside += celt_inner_prod(Y, Y, N);
402 celt_norm m, s;
403 m = X[i];
404 s = Y[i];
405 Emid = MAC16_16(Emid, m, m);
406 Eside = MAC16_16(Eside, s, s);
407 }
408 } 401 }
409 mid = celt_sqrt(Emid); 402 mid = celt_sqrt(Emid);
410 side = celt_sqrt(Eside); 403 side = celt_sqrt(Eside);