diff options
author | Nils Wallménius <nils@rockbox.org> | 2014-01-19 16:31:59 +0100 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2014-07-13 11:12:40 +0200 |
commit | 9b7ec42403073ee887efc531c153e6b1b6c15bab (patch) | |
tree | 07e72fe9d817c65a6fede22955344a870842d5e6 /lib/rbcodec/codecs/libopus/celt/bands.c | |
parent | e557951c94c1efa769900257e466900f0ffeb53b (diff) | |
download | rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.tar.gz rockbox-9b7ec42403073ee887efc531c153e6b1b6c15bab.zip |
Sync to upstream libopus
Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c
This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.
This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.
Speeds up decoding of the following test files:
H300 (cf) C200 (arm7tdmi) ipod classic (arm9e)
16 kbps (silk) 14.28 MHz 4.00 MHz 2.61 MHz
64 kbps (celt) 4.09 MHz 8.08 MHz 6.24 MHz
128 kbps (celt) 1.93 MHz 8.83 MHz 6.53 MHz
Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/bands.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/bands.c | 215 |
1 files changed, 111 insertions, 104 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c index 1ad786d795..caa70163b4 100644 --- a/lib/rbcodec/codecs/libopus/celt/bands.c +++ b/lib/rbcodec/codecs/libopus/celt/bands.c | |||
@@ -93,11 +93,11 @@ static int bitexact_log2tan(int isin,int icos) | |||
93 | #if 0 | 93 | #if 0 |
94 | #ifdef FIXED_POINT | 94 | #ifdef FIXED_POINT |
95 | /* Compute the amplitude (sqrt energy) in each of the bands */ | 95 | /* Compute the amplitude (sqrt energy) in each of the bands */ |
96 | void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) | 96 | void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) |
97 | { | 97 | { |
98 | int i, c, N; | 98 | int i, c, N; |
99 | const opus_int16 *eBands = m->eBands; | 99 | const opus_int16 *eBands = m->eBands; |
100 | N = M*m->shortMdctSize; | 100 | N = m->shortMdctSize<<LM; |
101 | c=0; do { | 101 | c=0; do { |
102 | for (i=0;i<end;i++) | 102 | for (i=0;i<end;i++) |
103 | { | 103 | { |
@@ -105,18 +105,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band | |||
105 | opus_val32 maxval=0; | 105 | opus_val32 maxval=0; |
106 | opus_val32 sum = 0; | 106 | opus_val32 sum = 0; |
107 | 107 | ||
108 | j=M*eBands[i]; do { | 108 | maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); |
109 | maxval = MAX32(maxval, X[j+c*N]); | ||
110 | maxval = MAX32(maxval, -X[j+c*N]); | ||
111 | } while (++j<M*eBands[i+1]); | ||
112 | |||
113 | if (maxval > 0) | 109 | if (maxval > 0) |
114 | { | 110 | { |
115 | int shift = celt_ilog2(maxval)-10; | 111 | int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); |
116 | j=M*eBands[i]; do { | 112 | j=eBands[i]<<LM; |
117 | sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), | 113 | if (shift>0) |
118 | EXTRACT16(VSHR32(X[j+c*N],shift))); | 114 | { |
119 | } while (++j<M*eBands[i+1]); | 115 | do { |
116 | sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), | ||
117 | EXTRACT16(SHR32(X[j+c*N],shift))); | ||
118 | } while (++j<eBands[i+1]<<LM); | ||
119 | } else { | ||
120 | do { | ||
121 | sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), | ||
122 | EXTRACT16(SHL32(X[j+c*N],-shift))); | ||
123 | } while (++j<eBands[i+1]<<LM); | ||
124 | } | ||
120 | /* We're adding one here to ensure the normalized band isn't larger than unity norm */ | 125 | /* We're adding one here to ensure the normalized band isn't larger than unity norm */ |
121 | bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); | 126 | bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); |
122 | } else { | 127 | } else { |
@@ -151,18 +156,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel | |||
151 | 156 | ||
152 | #else /* FIXED_POINT */ | 157 | #else /* FIXED_POINT */ |
153 | /* Compute the amplitude (sqrt energy) in each of the bands */ | 158 | /* Compute the amplitude (sqrt energy) in each of the bands */ |
154 | void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) | 159 | void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) |
155 | { | 160 | { |
156 | int i, c, N; | 161 | int i, c, N; |
157 | const opus_int16 *eBands = m->eBands; | 162 | const opus_int16 *eBands = m->eBands; |
158 | N = M*m->shortMdctSize; | 163 | N = m->shortMdctSize<<LM; |
159 | c=0; do { | 164 | c=0; do { |
160 | for (i=0;i<end;i++) | 165 | for (i=0;i<end;i++) |
161 | { | 166 | { |
162 | int j; | 167 | opus_val32 sum; |
163 | opus_val32 sum = 1e-27f; | 168 | sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); |
164 | for (j=M*eBands[i];j<M*eBands[i+1];j++) | ||
165 | sum += X[j+c*N]*X[j+c*N]; | ||
166 | bandE[i+c*m->nbEBands] = celt_sqrt(sum); | 169 | bandE[i+c*m->nbEBands] = celt_sqrt(sum); |
167 | /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ | 170 | /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ |
168 | } | 171 | } |
@@ -192,74 +195,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel | |||
192 | 195 | ||
193 | /* De-normalise the energy to produce the synthesis from the unit-energy bands */ | 196 | /* De-normalise the energy to produce the synthesis from the unit-energy bands */ |
194 | void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, | 197 | void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, |
195 | celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) | 198 | celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, |
199 | int end, int M, int downsample, int silence) | ||
196 | { | 200 | { |
197 | int i, c, N; | 201 | int i, N; |
202 | int bound; | ||
203 | celt_sig * OPUS_RESTRICT f; | ||
204 | const celt_norm * OPUS_RESTRICT x; | ||
198 | const opus_int16 *eBands = m->eBands; | 205 | const opus_int16 *eBands = m->eBands; |
199 | N = M*m->shortMdctSize; | 206 | N = M*m->shortMdctSize; |
200 | celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); | 207 | bound = M*eBands[end]; |
201 | c=0; do { | 208 | if (downsample!=1) |
202 | celt_sig * OPUS_RESTRICT f; | 209 | bound = IMIN(bound, N/downsample); |
203 | const celt_norm * OPUS_RESTRICT x; | 210 | if (silence) |
204 | f = freq+c*N; | 211 | { |
205 | x = X+c*N+M*eBands[start]; | 212 | bound = 0; |
206 | for (i=0;i<M*eBands[start];i++) | 213 | start = end = 0; |
207 | *f++ = 0; | 214 | } |
208 | for (i=start;i<end;i++) | 215 | f = freq; |
209 | { | 216 | x = X+M*eBands[start]; |
210 | int j, band_end; | 217 | for (i=0;i<M*eBands[start];i++) |
211 | opus_val16 g; | 218 | *f++ = 0; |
212 | opus_val16 lg; | 219 | for (i=start;i<end;i++) |
220 | { | ||
221 | int j, band_end; | ||
222 | opus_val16 g; | ||
223 | opus_val16 lg; | ||
213 | #ifdef FIXED_POINT | 224 | #ifdef FIXED_POINT |
214 | int shift; | 225 | int shift; |
215 | #endif | 226 | #endif |
216 | j=M*eBands[i]; | 227 | j=M*eBands[i]; |
217 | band_end = M*eBands[i+1]; | 228 | band_end = M*eBands[i+1]; |
218 | lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); | 229 | lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); |
219 | #ifndef FIXED_POINT | 230 | #ifndef FIXED_POINT |
220 | g = celt_exp2(lg); | 231 | g = celt_exp2(lg); |
221 | #else | 232 | #else |
222 | /* Handle the integer part of the log energy */ | 233 | /* Handle the integer part of the log energy */ |
223 | shift = 16-(lg>>DB_SHIFT); | 234 | shift = 16-(lg>>DB_SHIFT); |
224 | if (shift>31) | 235 | if (shift>31) |
225 | { | 236 | { |
226 | shift=0; | 237 | shift=0; |
227 | g=0; | 238 | g=0; |
228 | } else { | 239 | } else { |
229 | /* Handle the fractional part. */ | 240 | /* Handle the fractional part. */ |
230 | g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); | 241 | g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); |
231 | } | 242 | } |
232 | /* Handle extreme gains with negative shift. */ | 243 | /* Handle extreme gains with negative shift. */ |
233 | if (shift<0) | 244 | if (shift<0) |
234 | { | 245 | { |
235 | /* For shift < -2 we'd be likely to overflow, so we're capping | 246 | /* For shift < -2 we'd be likely to overflow, so we're capping |
236 | the gain here. This shouldn't happen unless the bitstream is | 247 | the gain here. This shouldn't happen unless the bitstream is |
237 | already corrupted. */ | 248 | already corrupted. */ |
238 | if (shift < -2) | 249 | if (shift < -2) |
239 | { | 250 | { |
240 | g = 32767; | 251 | g = 32767; |
241 | shift = -2; | 252 | shift = -2; |
242 | } | 253 | } |
243 | do { | 254 | do { |
244 | *f++ = SHL32(MULT16_16(*x++, g), -shift); | 255 | *f++ = SHL32(MULT16_16(*x++, g), -shift); |
245 | } while (++j<band_end); | 256 | } while (++j<band_end); |
246 | } else | 257 | } else |
247 | #endif | 258 | #endif |
248 | /* Be careful of the fixed-point "else" just above when changing this code */ | 259 | /* Be careful of the fixed-point "else" just above when changing this code */ |
249 | do { | 260 | do { |
250 | *f++ = SHR32(MULT16_16(*x++, g), shift); | 261 | *f++ = SHR32(MULT16_16(*x++, g), shift); |
251 | } while (++j<band_end); | 262 | } while (++j<band_end); |
252 | } | 263 | } |
253 | celt_assert(start <= end); | 264 | celt_assert(start <= end); |
254 | for (i=M*eBands[end];i<N;i++) | 265 | OPUS_CLEAR(&freq[bound], N-bound); |
255 | *f++ = 0; | ||
256 | } while (++c<C); | ||
257 | } | 266 | } |
258 | 267 | ||
259 | /* This prevents energy collapse for transients with multiple short MDCTs */ | 268 | /* This prevents energy collapse for transients with multiple short MDCTs */ |
260 | void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, | 269 | void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, |
261 | int start, int end, opus_val16 *logE, opus_val16 *prev1logE, | 270 | int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, |
262 | opus_val16 *prev2logE, int *pulses, opus_uint32 seed) | 271 | const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed) |
263 | { | 272 | { |
264 | int c, i, j, k; | 273 | int c, i, j, k; |
265 | for (i=start;i<end;i++) | 274 | for (i=start;i<end;i++) |
@@ -274,7 +283,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas | |||
274 | 283 | ||
275 | N0 = m->eBands[i+1]-m->eBands[i]; | 284 | N0 = m->eBands[i+1]-m->eBands[i]; |
276 | /* depth in 1/8 bits */ | 285 | /* depth in 1/8 bits */ |
277 | depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); | 286 | celt_assert(pulses[i]>=0); |
287 | depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; | ||
278 | 288 | ||
279 | #ifdef FIXED_POINT | 289 | #ifdef FIXED_POINT |
280 | thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); | 290 | thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); |
@@ -352,7 +362,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas | |||
352 | } | 362 | } |
353 | } | 363 | } |
354 | 364 | ||
355 | static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) | 365 | static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) |
356 | { | 366 | { |
357 | int i = bandID; | 367 | int i = bandID; |
358 | int j; | 368 | int j; |
@@ -372,25 +382,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons | |||
372 | celt_norm r, l; | 382 | celt_norm r, l; |
373 | l = X[j]; | 383 | l = X[j]; |
374 | r = Y[j]; | 384 | r = Y[j]; |
375 | X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); | 385 | X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); |
376 | /* Side is not encoded, no need to calculate */ | 386 | /* Side is not encoded, no need to calculate */ |
377 | } | 387 | } |
378 | } | 388 | } |
379 | 389 | ||
380 | static void stereo_split(celt_norm *X, celt_norm *Y, int N) | 390 | static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) |
381 | { | 391 | { |
382 | int j; | 392 | int j; |
383 | for (j=0;j<N;j++) | 393 | for (j=0;j<N;j++) |
384 | { | 394 | { |
385 | celt_norm r, l; | 395 | opus_val32 r, l; |
386 | l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); | 396 | l = MULT16_16(QCONST16(.70710678f, 15), X[j]); |
387 | r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); | 397 | r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); |
388 | X[j] = l+r; | 398 | X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); |
389 | Y[j] = r-l; | 399 | Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); |
390 | } | 400 | } |
391 | } | 401 | } |
392 | 402 | ||
393 | static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) | 403 | static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) |
394 | { | 404 | { |
395 | int j; | 405 | int j; |
396 | opus_val32 xp=0, side=0; | 406 | opus_val32 xp=0, side=0; |
@@ -411,8 +421,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) | |||
411 | Er = MULT16_16(mid2, mid2) + side + 2*xp; | 421 | Er = MULT16_16(mid2, mid2) + side + 2*xp; |
412 | if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) | 422 | if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) |
413 | { | 423 | { |
414 | for (j=0;j<N;j++) | 424 | OPUS_COPY(Y, X, N); |
415 | Y[j] = X[j]; | ||
416 | return; | 425 | return; |
417 | } | 426 | } |
418 | 427 | ||
@@ -436,7 +445,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) | |||
436 | { | 445 | { |
437 | celt_norm r, l; | 446 | celt_norm r, l; |
438 | /* Apply mid scaling (side is already scaled) */ | 447 | /* Apply mid scaling (side is already scaled) */ |
439 | l = MULT16_16_Q15(mid, X[j]); | 448 | l = MULT16_16_P15(mid, X[j]); |
440 | r = Y[j]; | 449 | r = Y[j]; |
441 | X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); | 450 | X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); |
442 | Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); | 451 | Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); |
@@ -445,7 +454,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) | |||
445 | 454 | ||
446 | #if 0 | 455 | #if 0 |
447 | /* Decide whether we should spread the pulses in the current frame */ | 456 | /* Decide whether we should spread the pulses in the current frame */ |
448 | int spreading_decision(const CELTMode *m, celt_norm *X, int *average, | 457 | int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, |
449 | int last_decision, int *hf_average, int *tapset_decision, int update_hf, | 458 | int last_decision, int *hf_average, int *tapset_decision, int update_hf, |
450 | int end, int C, int M) | 459 | int end, int C, int M) |
451 | { | 460 | { |
@@ -466,7 +475,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, | |||
466 | { | 475 | { |
467 | int j, N, tmp=0; | 476 | int j, N, tmp=0; |
468 | int tcount[3] = {0,0,0}; | 477 | int tcount[3] = {0,0,0}; |
469 | celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; | 478 | const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; |
470 | N = M*(eBands[i+1]-eBands[i]); | 479 | N = M*(eBands[i+1]-eBands[i]); |
471 | if (N<=8) | 480 | if (N<=8) |
472 | continue; | 481 | continue; |
@@ -486,7 +495,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, | |||
486 | 495 | ||
487 | /* Only include four last bands (8 kHz and up) */ | 496 | /* Only include four last bands (8 kHz and up) */ |
488 | if (i>m->nbEBands-4) | 497 | if (i>m->nbEBands-4) |
489 | hf_sum += 32*(tcount[1]+tcount[0])/N; | 498 | hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); |
490 | tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); | 499 | tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); |
491 | sum += tmp*256; | 500 | sum += tmp*256; |
492 | nbBands++; | 501 | nbBands++; |
@@ -496,7 +505,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, | |||
496 | if (update_hf) | 505 | if (update_hf) |
497 | { | 506 | { |
498 | if (hf_sum) | 507 | if (hf_sum) |
499 | hf_sum /= C*(4-m->nbEBands+end); | 508 | hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); |
500 | *hf_average = (*hf_average+hf_sum)>>1; | 509 | *hf_average = (*hf_average+hf_sum)>>1; |
501 | hf_sum = *hf_average; | 510 | hf_sum = *hf_average; |
502 | if (*tapset_decision==2) | 511 | if (*tapset_decision==2) |
@@ -512,7 +521,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, | |||
512 | } | 521 | } |
513 | /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ | 522 | /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ |
514 | celt_assert(nbBands>0); /* end has to be non-zero */ | 523 | celt_assert(nbBands>0); /* end has to be non-zero */ |
515 | sum /= nbBands; | 524 | celt_assert(sum>=0); |
525 | sum = celt_udiv(sum, nbBands); | ||
516 | /* Recursive averaging */ | 526 | /* Recursive averaging */ |
517 | sum = (sum+*average)>>1; | 527 | sum = (sum+*average)>>1; |
518 | *average = sum; | 528 | *average = sum; |
@@ -571,8 +581,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard | |||
571 | for (j=0;j<N0;j++) | 581 | for (j=0;j<N0;j++) |
572 | tmp[i*N0+j] = X[j*stride+i]; | 582 | tmp[i*N0+j] = X[j*stride+i]; |
573 | } | 583 | } |
574 | for (j=0;j<N;j++) | 584 | OPUS_COPY(X, tmp, N); |
575 | X[j] = tmp[j]; | ||
576 | RESTORE_STACK; | 585 | RESTORE_STACK; |
577 | } | 586 | } |
578 | 587 | ||
@@ -595,8 +604,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) | |||
595 | for (j=0;j<N0;j++) | 604 | for (j=0;j<N0;j++) |
596 | tmp[j*stride+i] = X[i*N0+j]; | 605 | tmp[j*stride+i] = X[i*N0+j]; |
597 | } | 606 | } |
598 | for (j=0;j<N;j++) | 607 | OPUS_COPY(X, tmp, N); |
599 | X[j] = tmp[j]; | ||
600 | RESTORE_STACK; | 608 | RESTORE_STACK; |
601 | } | 609 | } |
602 | 610 | ||
@@ -607,11 +615,11 @@ void haar1(celt_norm *X, int N0, int stride) | |||
607 | for (i=0;i<stride;i++) | 615 | for (i=0;i<stride;i++) |
608 | for (j=0;j<N0;j++) | 616 | for (j=0;j<N0;j++) |
609 | { | 617 | { |
610 | celt_norm tmp1, tmp2; | 618 | opus_val32 tmp1, tmp2; |
611 | tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); | 619 | tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); |
612 | tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); | 620 | tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); |
613 | X[stride*2*j+i] = tmp1 + tmp2; | 621 | X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); |
614 | X[stride*(2*j+1)+i] = tmp1 - tmp2; | 622 | X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); |
615 | } | 623 | } |
616 | } | 624 | } |
617 | 625 | ||
@@ -626,7 +634,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) | |||
626 | /* The upper limit ensures that in a stereo split with itheta==16384, we'll | 634 | /* The upper limit ensures that in a stereo split with itheta==16384, we'll |
627 | always have enough bits left over to code at least one pulse in the | 635 | always have enough bits left over to code at least one pulse in the |
628 | side; otherwise it would collapse, since it doesn't get folded. */ | 636 | side; otherwise it would collapse, since it doesn't get folded. */ |
629 | qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); | 637 | qb = celt_sudiv(b+N2*offset, N2); |
638 | qb = IMIN(b-pulse_cap-(4<<BITRES), qb); | ||
630 | 639 | ||
631 | qb = IMIN(8<<BITRES, qb); | 640 | qb = IMIN(8<<BITRES, qb); |
632 | 641 | ||
@@ -773,7 +782,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, | |||
773 | ec_dec_update(ec, fl, fl+fs, ft); | 782 | ec_dec_update(ec, fl, fl+fs, ft); |
774 | } | 783 | } |
775 | } | 784 | } |
776 | itheta = (opus_int32)itheta*16384/qn; | 785 | celt_assert(itheta>=0); |
786 | itheta = celt_udiv((opus_int32)itheta*16384, qn); | ||
777 | if (encode && stereo) | 787 | if (encode && stereo) |
778 | { | 788 | { |
779 | if (itheta==0) | 789 | if (itheta==0) |
@@ -1025,8 +1035,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, | |||
1025 | fill &= cm_mask; | 1035 | fill &= cm_mask; |
1026 | if (!fill) | 1036 | if (!fill) |
1027 | { | 1037 | { |
1028 | for (j=0;j<N;j++) | 1038 | OPUS_CLEAR(X, N); |
1029 | X[j] = 0; | ||
1030 | } else { | 1039 | } else { |
1031 | if (lowband == NULL) | 1040 | if (lowband == NULL) |
1032 | { | 1041 | { |
@@ -1088,7 +1097,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, | |||
1088 | 1097 | ||
1089 | longBlocks = B0==1; | 1098 | longBlocks = B0==1; |
1090 | 1099 | ||
1091 | N_B /= B; | 1100 | N_B = celt_udiv(N_B, B); |
1092 | 1101 | ||
1093 | /* Special case for one sample */ | 1102 | /* Special case for one sample */ |
1094 | if (N==1) | 1103 | if (N==1) |
@@ -1102,9 +1111,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, | |||
1102 | 1111 | ||
1103 | if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) | 1112 | if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) |
1104 | { | 1113 | { |
1105 | int j; | 1114 | OPUS_COPY(lowband_scratch, lowband, N); |
1106 | for (j=0;j<N;j++) | ||
1107 | lowband_scratch[j] = lowband[j]; | ||
1108 | lowband = lowband_scratch; | 1115 | lowband = lowband_scratch; |
1109 | } | 1116 | } |
1110 | 1117 | ||
@@ -1432,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, | |||
1432 | ctx.remaining_bits = remaining_bits; | 1439 | ctx.remaining_bits = remaining_bits; |
1433 | if (i <= codedBands-1) | 1440 | if (i <= codedBands-1) |
1434 | { | 1441 | { |
1435 | curr_balance = balance / IMIN(3, codedBands-i); | 1442 | curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); |
1436 | b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); | 1443 | b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); |
1437 | } else { | 1444 | } else { |
1438 | b = 0; | 1445 | b = 0; |