diff options
author | Nils Wallménius <nils@rockbox.org> | 2014-01-19 16:31:59 +0100 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2014-07-13 11:12:40 +0200 |
commit | 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch) | |
tree | 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/vq.c | |
parent | e557951c94c1efa769900257e466900f0ffeb53b (diff) | |
download | rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip |
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c
This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.
This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.
Speeds up decoding of the following test files:
H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz
Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/vq.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/vq.c | 69 |
1 files changed, 31 insertions, 38 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c index af991bb052..b047b22774 100644 --- a/lib/rbcodec/codecs/libopus/celt/vq.c +++ b/lib/rbcodec/codecs/libopus/celt/vq.c | |||
@@ -37,19 +37,27 @@ | |||
37 | #include "os_support.h" | 37 | #include "os_support.h" |
38 | #include "bands.h" | 38 | #include "bands.h" |
39 | #include "rate.h" | 39 | #include "rate.h" |
40 | #include "pitch.h" | ||
40 | 41 | ||
42 | #if defined(MIPSr1_ASM) | ||
43 | #include "mips/vq_mipsr1.h" | ||
44 | #endif | ||
45 | |||
46 | #ifndef OVERRIDE_vq_exp_rotation1 | ||
41 | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) | 47 | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
42 | { | 48 | { |
43 | int i; | 49 | int i; |
50 | opus_val16 ms; | ||
44 | celt_norm *Xptr; | 51 | celt_norm *Xptr; |
45 | Xptr = X; | 52 | Xptr = X; |
53 | ms = NEG16(s); | ||
46 | for (i=0;i<len-stride;i++) | 54 | for (i=0;i<len-stride;i++) |
47 | { | 55 | { |
48 | celt_norm x1, x2; | 56 | celt_norm x1, x2; |
49 | x1 = Xptr[0]; | 57 | x1 = Xptr[0]; |
50 | x2 = Xptr[stride]; | 58 | x2 = Xptr[stride]; |
51 | Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | 59 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
52 | *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | 60 | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
53 | } | 61 | } |
54 | Xptr = &X[len-2*stride-1]; | 62 | Xptr = &X[len-2*stride-1]; |
55 | for (i=len-2*stride-1;i>=0;i--) | 63 | for (i=len-2*stride-1;i>=0;i--) |
@@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ | |||
57 | celt_norm x1, x2; | 65 | celt_norm x1, x2; |
58 | x1 = Xptr[0]; | 66 | x1 = Xptr[0]; |
59 | x2 = Xptr[stride]; | 67 | x2 = Xptr[stride]; |
60 | Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | 68 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
61 | *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | 69 | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
62 | } | 70 | } |
63 | } | 71 | } |
72 | #endif /* OVERRIDE_vq_exp_rotation1 */ | ||
64 | 73 | ||
65 | static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) | 74 | static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
66 | { | 75 | { |
@@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int | |||
91 | } | 100 | } |
92 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | 101 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
93 | extract_collapse_mask().*/ | 102 | extract_collapse_mask().*/ |
94 | len /= stride; | 103 | len = celt_udiv(len, stride); |
95 | for (i=0;i<stride;i++) | 104 | for (i=0;i<stride;i++) |
96 | { | 105 | { |
97 | if (dir < 0) | 106 | if (dir < 0) |
@@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B) | |||
140 | return 1; | 149 | return 1; |
141 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | 150 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
142 | exp_rotation().*/ | 151 | exp_rotation().*/ |
143 | N0 = N/B; | 152 | N0 = celt_udiv(N, B); |
144 | collapse_mask = 0; | 153 | collapse_mask = 0; |
145 | i=0; do { | 154 | i=0; do { |
146 | int j; | 155 | int j; |
156 | unsigned tmp=0; | ||
147 | j=0; do { | 157 | j=0; do { |
148 | collapse_mask |= (iy[i*N0+j]!=0)<<i; | 158 | tmp |= iy[i*N0+j]; |
149 | } while (++j<N0); | 159 | } while (++j<N0); |
160 | collapse_mask |= (tmp!=0)<<i; | ||
150 | } while (++i<B); | 161 | } while (++i<B); |
151 | return collapse_mask; | 162 | return collapse_mask; |
152 | } | 163 | } |
@@ -322,47 +333,34 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc | |||
322 | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, | 333 | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
323 | ec_dec *dec, opus_val16 gain) | 334 | ec_dec *dec, opus_val16 gain) |
324 | { | 335 | { |
325 | int i; | ||
326 | opus_val32 Ryy; | 336 | opus_val32 Ryy; |
327 | unsigned collapse_mask; | 337 | unsigned collapse_mask; |
328 | /* VARDECL(int, iy); | 338 | VARDECL(int, iy); |
329 | SAVE_STACK; */ | 339 | SAVE_STACK; |
330 | |||
331 | /* the difference between the last two values of eband5ms shifted by maxLM | ||
332 | which is 22 << 3 with the static mode */ | ||
333 | int iy[176]; | ||
334 | 340 | ||
335 | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); | 341 | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
336 | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); | 342 | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
337 | /* ALLOC(iy, N, int); */ | 343 | ALLOC(iy, N, int); |
338 | decode_pulses(iy, N, K, dec); | 344 | Ryy = decode_pulses(iy, N, K, dec); |
339 | Ryy = 0; | ||
340 | i=0; | ||
341 | do { | ||
342 | Ryy = MAC16_16(Ryy, iy[i], iy[i]); | ||
343 | } while (++i < N); | ||
344 | normalise_residual(iy, X, N, Ryy, gain); | 345 | normalise_residual(iy, X, N, Ryy, gain); |
345 | exp_rotation(X, N, -1, B, K, spread); | 346 | exp_rotation(X, N, -1, B, K, spread); |
346 | collapse_mask = extract_collapse_mask(iy, N, B); | 347 | collapse_mask = extract_collapse_mask(iy, N, B); |
347 | /* RESTORE_STACK; */ | 348 | RESTORE_STACK; |
348 | return collapse_mask; | 349 | return collapse_mask; |
349 | } | 350 | } |
350 | 351 | ||
352 | #ifndef OVERRIDE_renormalise_vector | ||
351 | void renormalise_vector(celt_norm *X, int N, opus_val16 gain) | 353 | void renormalise_vector(celt_norm *X, int N, opus_val16 gain) |
352 | { | 354 | { |
353 | int i; | 355 | int i; |
354 | #ifdef FIXED_POINT | 356 | #ifdef FIXED_POINT |
355 | int k; | 357 | int k; |
356 | #endif | 358 | #endif |
357 | opus_val32 E = EPSILON; | 359 | opus_val32 E; |
358 | opus_val16 g; | 360 | opus_val16 g; |
359 | opus_val32 t; | 361 | opus_val32 t; |
360 | celt_norm *xptr = X; | 362 | celt_norm *xptr; |
361 | for (i=0;i<N;i++) | 363 | E = EPSILON + celt_inner_prod(X, X, N); |
362 | { | ||
363 | E = MAC16_16(E, *xptr, *xptr); | ||
364 | xptr++; | ||
365 | } | ||
366 | #ifdef FIXED_POINT | 364 | #ifdef FIXED_POINT |
367 | k = celt_ilog2(E)>>1; | 365 | k = celt_ilog2(E)>>1; |
368 | #endif | 366 | #endif |
@@ -377,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) | |||
377 | } | 375 | } |
378 | /*return celt_sqrt(E);*/ | 376 | /*return celt_sqrt(E);*/ |
379 | } | 377 | } |
378 | #endif /* OVERRIDE_renormalise_vector */ | ||
380 | 379 | ||
381 | int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) | 380 | int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) |
382 | { | 381 | { |
383 | int i; | 382 | int i; |
384 | int itheta; | 383 | int itheta; |
@@ -397,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) | |||
397 | Eside = MAC16_16(Eside, s, s); | 396 | Eside = MAC16_16(Eside, s, s); |
398 | } | 397 | } |
399 | } else { | 398 | } else { |
400 | for (i=0;i<N;i++) | 399 | Emid += celt_inner_prod(X, X, N); |
401 | { | 400 | Eside += celt_inner_prod(Y, Y, N); |
402 | celt_norm m, s; | ||
403 | m = X[i]; | ||
404 | s = Y[i]; | ||
405 | Emid = MAC16_16(Emid, m, m); | ||
406 | Eside = MAC16_16(Eside, s, s); | ||
407 | } | ||
408 | } | 401 | } |
409 | mid = celt_sqrt(Emid); | 402 | mid = celt_sqrt(Emid); |
410 | side = celt_sqrt(Eside); | 403 | side = celt_sqrt(Eside); |