diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/vq.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/vq.c | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c new file mode 100644 index 0000000000..6a00edf9cd --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/vq.c | |||
@@ -0,0 +1,415 @@ | |||
1 | /* Copyright (c) 2007-2008 CSIRO | ||
2 | Copyright (c) 2007-2009 Xiph.Org Foundation | ||
3 | Written by Jean-Marc Valin */ | ||
4 | /* | ||
5 | Redistribution and use in source and binary forms, with or without | ||
6 | modification, are permitted provided that the following conditions | ||
7 | are met: | ||
8 | |||
9 | - Redistributions of source code must retain the above copyright | ||
10 | notice, this list of conditions and the following disclaimer. | ||
11 | |||
12 | - Redistributions in binary form must reproduce the above copyright | ||
13 | notice, this list of conditions and the following disclaimer in the | ||
14 | documentation and/or other materials provided with the distribution. | ||
15 | |||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
20 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
27 | */ | ||
28 | |||
29 | #ifdef HAVE_CONFIG_H | ||
30 | #include "opus_config.h" | ||
31 | #endif | ||
32 | |||
33 | #include "mathops.h" | ||
34 | #include "cwrs.h" | ||
35 | #include "vq.h" | ||
36 | #include "arch.h" | ||
37 | #include "os_support.h" | ||
38 | #include "bands.h" | ||
39 | #include "rate.h" | ||
40 | |||
41 | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) | ||
42 | { | ||
43 | int i; | ||
44 | celt_norm *Xptr; | ||
45 | Xptr = X; | ||
46 | for (i=0;i<len-stride;i++) | ||
47 | { | ||
48 | celt_norm x1, x2; | ||
49 | x1 = Xptr[0]; | ||
50 | x2 = Xptr[stride]; | ||
51 | Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | ||
52 | *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | ||
53 | } | ||
54 | Xptr = &X[len-2*stride-1]; | ||
55 | for (i=len-2*stride-1;i>=0;i--) | ||
56 | { | ||
57 | celt_norm x1, x2; | ||
58 | x1 = Xptr[0]; | ||
59 | x2 = Xptr[stride]; | ||
60 | Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | ||
61 | *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) | ||
66 | { | ||
67 | static const int SPREAD_FACTOR[3]={15,10,5}; | ||
68 | int i; | ||
69 | opus_val16 c, s; | ||
70 | opus_val16 gain, theta; | ||
71 | int stride2=0; | ||
72 | int factor; | ||
73 | |||
74 | if (2*K>=len || spread==SPREAD_NONE) | ||
75 | return; | ||
76 | factor = SPREAD_FACTOR[spread-1]; | ||
77 | |||
78 | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); | ||
79 | theta = HALF16(MULT16_16_Q15(gain,gain)); | ||
80 | |||
81 | c = celt_cos_norm(EXTEND32(theta)); | ||
82 | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ | ||
83 | |||
84 | if (len>=8*stride) | ||
85 | { | ||
86 | stride2 = 1; | ||
87 | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. | ||
88 | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ | ||
89 | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) | ||
90 | stride2++; | ||
91 | } | ||
92 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | ||
93 | extract_collapse_mask().*/ | ||
94 | len /= stride; | ||
95 | for (i=0;i<stride;i++) | ||
96 | { | ||
97 | if (dir < 0) | ||
98 | { | ||
99 | if (stride2) | ||
100 | exp_rotation1(X+i*len, len, stride2, s, c); | ||
101 | exp_rotation1(X+i*len, len, 1, c, s); | ||
102 | } else { | ||
103 | exp_rotation1(X+i*len, len, 1, c, -s); | ||
104 | if (stride2) | ||
105 | exp_rotation1(X+i*len, len, stride2, s, -c); | ||
106 | } | ||
107 | } | ||
108 | } | ||
109 | |||
110 | /** Takes the pitch vector and the decoded residual vector, computes the gain | ||
111 | that will give ||p+g*y||=1 and mixes the residual with the pitch. */ | ||
112 | static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, | ||
113 | int N, opus_val32 Ryy, opus_val16 gain) | ||
114 | { | ||
115 | int i; | ||
116 | #ifdef FIXED_POINT | ||
117 | int k; | ||
118 | #endif | ||
119 | opus_val32 t; | ||
120 | opus_val16 g; | ||
121 | |||
122 | #ifdef FIXED_POINT | ||
123 | k = celt_ilog2(Ryy)>>1; | ||
124 | #endif | ||
125 | t = VSHR32(Ryy, 2*(k-7)); | ||
126 | g = MULT16_16_P15(celt_rsqrt_norm(t),gain); | ||
127 | |||
128 | i=0; | ||
129 | do | ||
130 | X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); | ||
131 | while (++i < N); | ||
132 | } | ||
133 | |||
134 | static unsigned extract_collapse_mask(int *iy, int N, int B) | ||
135 | { | ||
136 | unsigned collapse_mask; | ||
137 | int N0; | ||
138 | int i; | ||
139 | if (B<=1) | ||
140 | return 1; | ||
141 | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | ||
142 | exp_rotation().*/ | ||
143 | N0 = N/B; | ||
144 | collapse_mask = 0; | ||
145 | i=0; do { | ||
146 | int j; | ||
147 | j=0; do { | ||
148 | collapse_mask |= (iy[i*N0+j]!=0)<<i; | ||
149 | } while (++j<N0); | ||
150 | } while (++i<B); | ||
151 | return collapse_mask; | ||
152 | } | ||
153 | |||
154 | unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc | ||
155 | #ifdef RESYNTH | ||
156 | , opus_val16 gain | ||
157 | #endif | ||
158 | ) | ||
159 | { | ||
160 | VARDECL(celt_norm, y); | ||
161 | VARDECL(int, iy); | ||
162 | VARDECL(opus_val16, signx); | ||
163 | int i, j; | ||
164 | opus_val16 s; | ||
165 | int pulsesLeft; | ||
166 | opus_val32 sum; | ||
167 | opus_val32 xy; | ||
168 | opus_val16 yy; | ||
169 | unsigned collapse_mask; | ||
170 | SAVE_STACK; | ||
171 | |||
172 | celt_assert2(K>0, "alg_quant() needs at least one pulse"); | ||
173 | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); | ||
174 | |||
175 | ALLOC(y, N, celt_norm); | ||
176 | ALLOC(iy, N, int); | ||
177 | ALLOC(signx, N, opus_val16); | ||
178 | |||
179 | exp_rotation(X, N, 1, B, K, spread); | ||
180 | |||
181 | /* Get rid of the sign */ | ||
182 | sum = 0; | ||
183 | j=0; do { | ||
184 | if (X[j]>0) | ||
185 | signx[j]=1; | ||
186 | else { | ||
187 | signx[j]=-1; | ||
188 | X[j]=-X[j]; | ||
189 | } | ||
190 | iy[j] = 0; | ||
191 | y[j] = 0; | ||
192 | } while (++j<N); | ||
193 | |||
194 | xy = yy = 0; | ||
195 | |||
196 | pulsesLeft = K; | ||
197 | |||
198 | /* Do a pre-search by projecting on the pyramid */ | ||
199 | if (K > (N>>1)) | ||
200 | { | ||
201 | opus_val16 rcp; | ||
202 | j=0; do { | ||
203 | sum += X[j]; | ||
204 | } while (++j<N); | ||
205 | |||
206 | /* If X is too small, just replace it with a pulse at 0 */ | ||
207 | #ifdef FIXED_POINT | ||
208 | if (sum <= K) | ||
209 | #else | ||
210 | /* Prevents infinities and NaNs from causing too many pulses | ||
211 | to be allocated. 64 is an approximation of infinity here. */ | ||
212 | if (!(sum > EPSILON && sum < 64)) | ||
213 | #endif | ||
214 | { | ||
215 | X[0] = QCONST16(1.f,14); | ||
216 | j=1; do | ||
217 | X[j]=0; | ||
218 | while (++j<N); | ||
219 | sum = QCONST16(1.f,14); | ||
220 | } | ||
221 | rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); | ||
222 | j=0; do { | ||
223 | #ifdef FIXED_POINT | ||
224 | /* It's really important to round *towards zero* here */ | ||
225 | iy[j] = MULT16_16_Q15(X[j],rcp); | ||
226 | #else | ||
227 | iy[j] = (int)floor(rcp*X[j]); | ||
228 | #endif | ||
229 | y[j] = (celt_norm)iy[j]; | ||
230 | yy = MAC16_16(yy, y[j],y[j]); | ||
231 | xy = MAC16_16(xy, X[j],y[j]); | ||
232 | y[j] *= 2; | ||
233 | pulsesLeft -= iy[j]; | ||
234 | } while (++j<N); | ||
235 | } | ||
236 | celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); | ||
237 | |||
238 | /* This should never happen, but just in case it does (e.g. on silence) | ||
239 | we fill the first bin with pulses. */ | ||
240 | #ifdef FIXED_POINT_DEBUG | ||
241 | celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); | ||
242 | #endif | ||
243 | if (pulsesLeft > N+3) | ||
244 | { | ||
245 | opus_val16 tmp = (opus_val16)pulsesLeft; | ||
246 | yy = MAC16_16(yy, tmp, tmp); | ||
247 | yy = MAC16_16(yy, tmp, y[0]); | ||
248 | iy[0] += pulsesLeft; | ||
249 | pulsesLeft=0; | ||
250 | } | ||
251 | |||
252 | s = 1; | ||
253 | for (i=0;i<pulsesLeft;i++) | ||
254 | { | ||
255 | int best_id; | ||
256 | opus_val32 best_num = -VERY_LARGE16; | ||
257 | opus_val16 best_den = 0; | ||
258 | #ifdef FIXED_POINT | ||
259 | int rshift; | ||
260 | #endif | ||
261 | #ifdef FIXED_POINT | ||
262 | rshift = 1+celt_ilog2(K-pulsesLeft+i+1); | ||
263 | #endif | ||
264 | best_id = 0; | ||
265 | /* The squared magnitude term gets added anyway, so we might as well | ||
266 | add it outside the loop */ | ||
267 | yy = ADD32(yy, 1); | ||
268 | j=0; | ||
269 | do { | ||
270 | opus_val16 Rxy, Ryy; | ||
271 | /* Temporary sums of the new pulse(s) */ | ||
272 | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); | ||
273 | /* We're multiplying y[j] by two so we don't have to do it here */ | ||
274 | Ryy = ADD16(yy, y[j]); | ||
275 | |||
276 | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that | ||
277 | Rxy is positive because the sign is pre-computed) */ | ||
278 | Rxy = MULT16_16_Q15(Rxy,Rxy); | ||
279 | /* The idea is to check for num/den >= best_num/best_den, but that way | ||
280 | we can do it without any division */ | ||
281 | /* OPT: Make sure to use conditional moves here */ | ||
282 | if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) | ||
283 | { | ||
284 | best_den = Ryy; | ||
285 | best_num = Rxy; | ||
286 | best_id = j; | ||
287 | } | ||
288 | } while (++j<N); | ||
289 | |||
290 | /* Updating the sums of the new pulse(s) */ | ||
291 | xy = ADD32(xy, EXTEND32(X[best_id])); | ||
292 | /* We're multiplying y[j] by two so we don't have to do it here */ | ||
293 | yy = ADD16(yy, y[best_id]); | ||
294 | |||
295 | /* Only now that we've made the final choice, update y/iy */ | ||
296 | /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ | ||
297 | y[best_id] += 2*s; | ||
298 | iy[best_id]++; | ||
299 | } | ||
300 | |||
301 | /* Put the original sign back */ | ||
302 | j=0; | ||
303 | do { | ||
304 | X[j] = MULT16_16(signx[j],X[j]); | ||
305 | if (signx[j] < 0) | ||
306 | iy[j] = -iy[j]; | ||
307 | } while (++j<N); | ||
308 | encode_pulses(iy, N, K, enc); | ||
309 | |||
310 | #ifdef RESYNTH | ||
311 | normalise_residual(iy, X, N, yy, gain); | ||
312 | exp_rotation(X, N, -1, B, K, spread); | ||
313 | #endif | ||
314 | |||
315 | collapse_mask = extract_collapse_mask(iy, N, B); | ||
316 | RESTORE_STACK; | ||
317 | return collapse_mask; | ||
318 | } | ||
319 | |||
320 | /** Decode pulse vector and combine the result with the pitch vector to produce | ||
321 | the final normalised signal in the current band. */ | ||
322 | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, | ||
323 | ec_dec *dec, opus_val16 gain) | ||
324 | { | ||
325 | int i; | ||
326 | opus_val32 Ryy; | ||
327 | unsigned collapse_mask; | ||
328 | VARDECL(int, iy); | ||
329 | SAVE_STACK; | ||
330 | |||
331 | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); | ||
332 | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); | ||
333 | ALLOC(iy, N, int); | ||
334 | decode_pulses(iy, N, K, dec); | ||
335 | Ryy = 0; | ||
336 | i=0; | ||
337 | do { | ||
338 | Ryy = MAC16_16(Ryy, iy[i], iy[i]); | ||
339 | } while (++i < N); | ||
340 | normalise_residual(iy, X, N, Ryy, gain); | ||
341 | exp_rotation(X, N, -1, B, K, spread); | ||
342 | collapse_mask = extract_collapse_mask(iy, N, B); | ||
343 | RESTORE_STACK; | ||
344 | return collapse_mask; | ||
345 | } | ||
346 | |||
347 | void renormalise_vector(celt_norm *X, int N, opus_val16 gain) | ||
348 | { | ||
349 | int i; | ||
350 | #ifdef FIXED_POINT | ||
351 | int k; | ||
352 | #endif | ||
353 | opus_val32 E = EPSILON; | ||
354 | opus_val16 g; | ||
355 | opus_val32 t; | ||
356 | celt_norm *xptr = X; | ||
357 | for (i=0;i<N;i++) | ||
358 | { | ||
359 | E = MAC16_16(E, *xptr, *xptr); | ||
360 | xptr++; | ||
361 | } | ||
362 | #ifdef FIXED_POINT | ||
363 | k = celt_ilog2(E)>>1; | ||
364 | #endif | ||
365 | t = VSHR32(E, 2*(k-7)); | ||
366 | g = MULT16_16_P15(celt_rsqrt_norm(t),gain); | ||
367 | |||
368 | xptr = X; | ||
369 | for (i=0;i<N;i++) | ||
370 | { | ||
371 | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); | ||
372 | xptr++; | ||
373 | } | ||
374 | /*return celt_sqrt(E);*/ | ||
375 | } | ||
376 | |||
377 | int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) | ||
378 | { | ||
379 | int i; | ||
380 | int itheta; | ||
381 | opus_val16 mid, side; | ||
382 | opus_val32 Emid, Eside; | ||
383 | |||
384 | Emid = Eside = EPSILON; | ||
385 | if (stereo) | ||
386 | { | ||
387 | for (i=0;i<N;i++) | ||
388 | { | ||
389 | celt_norm m, s; | ||
390 | m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); | ||
391 | s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); | ||
392 | Emid = MAC16_16(Emid, m, m); | ||
393 | Eside = MAC16_16(Eside, s, s); | ||
394 | } | ||
395 | } else { | ||
396 | for (i=0;i<N;i++) | ||
397 | { | ||
398 | celt_norm m, s; | ||
399 | m = X[i]; | ||
400 | s = Y[i]; | ||
401 | Emid = MAC16_16(Emid, m, m); | ||
402 | Eside = MAC16_16(Eside, s, s); | ||
403 | } | ||
404 | } | ||
405 | mid = celt_sqrt(Emid); | ||
406 | side = celt_sqrt(Eside); | ||
407 | #ifdef FIXED_POINT | ||
408 | /* 0.63662 = 2/pi */ | ||
409 | itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); | ||
410 | #else | ||
411 | itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); | ||
412 | #endif | ||
413 | |||
414 | return itheta; | ||
415 | } | ||