diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/mdct.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/mdct.c | 147 |
1 files changed, 55 insertions, 92 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c index 0df77fd5ec..72ea180568 100644 --- a/lib/rbcodec/codecs/libopus/celt/mdct.c +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | #ifndef SKIP_CONFIG_H | 42 | #ifndef SKIP_CONFIG_H |
43 | #ifdef HAVE_CONFIG_H | 43 | #ifdef HAVE_CONFIG_H |
44 | #include "opus_config.h" | 44 | #include "config.h" |
45 | #endif | 45 | #endif |
46 | #endif | 46 | #endif |
47 | 47 | ||
@@ -110,12 +110,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar | |||
110 | int N, N2, N4; | 110 | int N, N2, N4; |
111 | kiss_twiddle_scalar sine; | 111 | kiss_twiddle_scalar sine; |
112 | VARDECL(kiss_fft_scalar, f); | 112 | VARDECL(kiss_fft_scalar, f); |
113 | VARDECL(kiss_fft_scalar, f2); | ||
113 | SAVE_STACK; | 114 | SAVE_STACK; |
114 | N = l->n; | 115 | N = l->n; |
115 | N >>= shift; | 116 | N >>= shift; |
116 | N2 = N>>1; | 117 | N2 = N>>1; |
117 | N4 = N>>2; | 118 | N4 = N>>2; |
118 | ALLOC(f, N2, kiss_fft_scalar); | 119 | ALLOC(f, N2, kiss_fft_scalar); |
120 | ALLOC(f2, N2, kiss_fft_scalar); | ||
119 | /* sin(x) ~= x here */ | 121 | /* sin(x) ~= x here */ |
120 | #ifdef FIXED_POINT | 122 | #ifdef FIXED_POINT |
121 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 123 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
@@ -132,7 +134,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar | |||
132 | kiss_fft_scalar * OPUS_RESTRICT yp = f; | 134 | kiss_fft_scalar * OPUS_RESTRICT yp = f; |
133 | const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); | 135 | const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); |
134 | const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; | 136 | const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; |
135 | for(i=0;i<(overlap>>2);i++) | 137 | for(i=0;i<((overlap+3)>>2);i++) |
136 | { | 138 | { |
137 | /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ | 139 | /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ |
138 | *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); | 140 | *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); |
@@ -144,7 +146,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar | |||
144 | } | 146 | } |
145 | wp1 = window; | 147 | wp1 = window; |
146 | wp2 = window+overlap-1; | 148 | wp2 = window+overlap-1; |
147 | for(;i<N4-(overlap>>2);i++) | 149 | for(;i<N4-((overlap+3)>>2);i++) |
148 | { | 150 | { |
149 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | 151 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
150 | *yp++ = *xp2; | 152 | *yp++ = *xp2; |
@@ -181,12 +183,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar | |||
181 | } | 183 | } |
182 | 184 | ||
183 | /* N/4 complex FFT, down-scales by 4/N */ | 185 | /* N/4 complex FFT, down-scales by 4/N */ |
184 | opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); | 186 | opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); |
185 | 187 | ||
186 | /* Post-rotate */ | 188 | /* Post-rotate */ |
187 | { | 189 | { |
188 | /* Temp pointers to make it really clear to the compiler what we're doing */ | 190 | /* Temp pointers to make it really clear to the compiler what we're doing */ |
189 | const kiss_fft_scalar * OPUS_RESTRICT fp = in; | 191 | const kiss_fft_scalar * OPUS_RESTRICT fp = f2; |
190 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | 192 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
191 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); | 193 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); |
192 | const kiss_twiddle_scalar *t = &l->trig[0]; | 194 | const kiss_twiddle_scalar *t = &l->trig[0]; |
@@ -208,35 +210,20 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar | |||
208 | } | 210 | } |
209 | #endif | 211 | #endif |
210 | 212 | ||
211 | #define S_F_BUF_SIZE (1920>>1) /* N = 1920 for static modes */ | ||
212 | static kiss_fft_scalar s_f2[S_F_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR; | ||
213 | void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, | 213 | void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, |
214 | const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) | 214 | const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) |
215 | { | 215 | { |
216 | int i; | 216 | int i; |
217 | int N, N2, N4; | 217 | int N, N2, N4; |
218 | int tstride = 1<<shift; | ||
219 | kiss_twiddle_scalar sine; | 218 | kiss_twiddle_scalar sine; |
220 | VARDECL(kiss_fft_scalar, f); | 219 | /* VARDECL(kiss_fft_scalar, f2); |
221 | VARDECL(kiss_fft_scalar, f2); | 220 | SAVE_STACK; */ |
222 | SAVE_STACK; | ||
223 | N = l->n; | 221 | N = l->n; |
224 | N >>= shift; | 222 | N >>= shift; |
225 | N2 = N>>1; | 223 | N2 = N>>1; |
226 | N4 = N>>2; | 224 | N4 = N>>2; |
227 | kiss_fft_scalar s_f[S_F_BUF_SIZE]; | 225 | /* ALLOC(f2, N2, kiss_fft_scalar); */ |
228 | 226 | kiss_fft_scalar f2[N2]; /* worst case 3840b */ | |
229 | if (S_F_BUF_SIZE >= N2) | ||
230 | { | ||
231 | f = s_f; | ||
232 | f2 = s_f2; | ||
233 | } | ||
234 | else | ||
235 | { | ||
236 | ALLOC(f , N2, kiss_fft_scalar); | ||
237 | ALLOC(f2, N2, kiss_fft_scalar); | ||
238 | } | ||
239 | |||
240 | /* sin(x) ~= x here */ | 227 | /* sin(x) ~= x here */ |
241 | #ifdef FIXED_POINT | 228 | #ifdef FIXED_POINT |
242 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 229 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
@@ -250,102 +237,78 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala | |||
250 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; | 237 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; |
251 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); | 238 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); |
252 | kiss_fft_scalar * OPUS_RESTRICT yp = f2; | 239 | kiss_fft_scalar * OPUS_RESTRICT yp = f2; |
253 | const kiss_twiddle_scalar *t0 = &l->trig[0]; | 240 | const kiss_twiddle_scalar *t = &l->trig[0]; |
254 | const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift]; | ||
255 | for(i=0;i<N4;i++) | 241 | for(i=0;i<N4;i++) |
256 | { | 242 | { |
257 | kiss_fft_scalar yr, yi; | 243 | kiss_fft_scalar yr, yi; |
258 | yr = -S_MUL(*xp2, *t0) + S_MUL(*xp1, *t1); | 244 | yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); |
259 | yi = -S_MUL(*xp2, *t1) - S_MUL(*xp1, *t0); | 245 | yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); |
260 | /* works because the cos is nearly one */ | 246 | /* works because the cos is nearly one */ |
261 | *yp++ = yr - S_MUL(yi,sine); | 247 | *yp++ = yr - S_MUL(yi,sine); |
262 | *yp++ = yi + S_MUL(yr,sine); | 248 | *yp++ = yi + S_MUL(yr,sine); |
263 | xp1+=2*stride; | 249 | xp1+=2*stride; |
264 | xp2-=2*stride; | 250 | xp2-=2*stride; |
265 | t0 += tstride; | ||
266 | t1 -= tstride; | ||
267 | } | 251 | } |
268 | } | 252 | } |
269 | 253 | ||
270 | /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ | 254 | /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ |
271 | opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); | 255 | opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); |
272 | 256 | ||
273 | /* Post-rotate */ | 257 | /* Post-rotate and de-shuffle from both ends of the buffer at once to make |
258 | it in-place. */ | ||
274 | { | 259 | { |
275 | kiss_fft_scalar * OPUS_RESTRICT fp = f; | 260 | kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); |
276 | const kiss_twiddle_scalar *t0 = &l->trig[0]; | 261 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; |
277 | const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift]; | 262 | const kiss_twiddle_scalar *t = &l->trig[0]; |
278 | for(i=0;i<N4;i++) | 263 | /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the |
264 | middle pair will be computed twice. */ | ||
265 | for(i=0;i<(N4+1)>>1;i++) | ||
279 | { | 266 | { |
280 | kiss_fft_scalar re, im, yr, yi; | 267 | kiss_fft_scalar re, im, yr, yi; |
281 | re = fp[0]; | 268 | kiss_twiddle_scalar t0, t1; |
282 | im = fp[1]; | 269 | re = yp0[0]; |
270 | im = yp0[1]; | ||
271 | t0 = t[i<<shift]; | ||
272 | t1 = t[(N4-i)<<shift]; | ||
283 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ | 273 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ |
284 | yr = S_MUL(re, *t0) - S_MUL(im, *t1); | 274 | yr = S_MUL(re,t0) - S_MUL(im,t1); |
285 | yi = S_MUL(im, *t0) + S_MUL(re, *t1); | 275 | yi = S_MUL(im,t0) + S_MUL(re,t1); |
276 | re = yp1[0]; | ||
277 | im = yp1[1]; | ||
286 | /* works because the cos is nearly one */ | 278 | /* works because the cos is nearly one */ |
287 | *fp++ = yr - S_MUL(yi,sine); | 279 | yp0[0] = -(yr - S_MUL(yi,sine)); |
288 | *fp++ = yi + S_MUL(yr,sine); | 280 | yp1[1] = yi + S_MUL(yr,sine); |
289 | t0 += tstride; | ||
290 | t1 -= tstride; | ||
291 | } | ||
292 | } | ||
293 | /* De-shuffle the components for the middle of the window only */ | ||
294 | { | ||
295 | const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; | ||
296 | const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; | ||
297 | kiss_fft_scalar * OPUS_RESTRICT yp = f2; | ||
298 | for(i = 0; i < N4; i++) | ||
299 | { | ||
300 | *yp++ =-*fp1; | ||
301 | *yp++ = *fp2; | ||
302 | fp1 += 2; | ||
303 | fp2 -= 2; | ||
304 | } | ||
305 | } | ||
306 | out -= (N2-overlap)>>1; | ||
307 | /* Mirror on both sides for TDAC */ | ||
308 | { | ||
309 | kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; | ||
310 | kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; | ||
311 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; | ||
312 | const opus_val16 * OPUS_RESTRICT wp1 = window; | ||
313 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | ||
314 | 281 | ||
315 | i = N4-overlap/2; | 282 | t0 = t[(N4-i-1)<<shift]; |
316 | xp1 -= N4-overlap/2; | 283 | t1 = t[(i+1)<<shift]; |
317 | fp1 -= N4-overlap/2; | 284 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ |
318 | OPUS_COPY(xp1+1, fp1+1, N4-overlap/2); | 285 | yr = S_MUL(re,t0) - S_MUL(im,t1); |
319 | for(; i < N4; i++) | 286 | yi = S_MUL(im,t0) + S_MUL(re,t1); |
320 | { | 287 | /* works because the cos is nearly one */ |
321 | kiss_fft_scalar x1; | 288 | yp1[0] = -(yr - S_MUL(yi,sine)); |
322 | x1 = *fp1--; | 289 | yp0[1] = yi + S_MUL(yr,sine); |
323 | *yp1++ +=-MULT16_32_Q15(*wp1, x1); | 290 | yp0 += 2; |
324 | *xp1-- += MULT16_32_Q15(*wp2, x1); | 291 | yp1 -= 2; |
325 | wp1++; | ||
326 | wp2--; | ||
327 | } | 292 | } |
328 | } | 293 | } |
294 | |||
295 | /* Mirror on both sides for TDAC */ | ||
329 | { | 296 | { |
330 | kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; | 297 | kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; |
331 | kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; | 298 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
332 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); | ||
333 | const opus_val16 * OPUS_RESTRICT wp1 = window; | 299 | const opus_val16 * OPUS_RESTRICT wp1 = window; |
334 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | 300 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; |
335 | 301 | ||
336 | i = N4-overlap/2; | 302 | for(i = 0; i < overlap/2; i++) |
337 | OPUS_COPY(xp2, fp2, N4-overlap/2); | ||
338 | xp2 += N4-overlap/2; | ||
339 | fp2 += N4-overlap/2; | ||
340 | for(; i < N4; i++) | ||
341 | { | 303 | { |
342 | kiss_fft_scalar x2; | 304 | kiss_fft_scalar x1, x2; |
343 | x2 = *fp2++; | 305 | x1 = *xp1; |
344 | *yp2-- = MULT16_32_Q15(*wp1, x2); | 306 | x2 = *yp1; |
345 | *xp2++ = MULT16_32_Q15(*wp2, x2); | 307 | *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); |
308 | *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); | ||
346 | wp1++; | 309 | wp1++; |
347 | wp2--; | 310 | wp2--; |
348 | } | 311 | } |
349 | } | 312 | } |
350 | RESTORE_STACK; | 313 | /* RESTORE_STACK; */ |
351 | } | 314 | } |