summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/mdct.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/mdct.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mdct.c147
1 files changed, 55 insertions, 92 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 0df77fd5ec..72ea180568 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -41,7 +41,7 @@
41 41
42#ifndef SKIP_CONFIG_H 42#ifndef SKIP_CONFIG_H
43#ifdef HAVE_CONFIG_H 43#ifdef HAVE_CONFIG_H
44#include "opus_config.h" 44#include "config.h"
45#endif 45#endif
46#endif 46#endif
47 47
@@ -110,12 +110,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
110 int N, N2, N4; 110 int N, N2, N4;
111 kiss_twiddle_scalar sine; 111 kiss_twiddle_scalar sine;
112 VARDECL(kiss_fft_scalar, f); 112 VARDECL(kiss_fft_scalar, f);
113 VARDECL(kiss_fft_scalar, f2);
113 SAVE_STACK; 114 SAVE_STACK;
114 N = l->n; 115 N = l->n;
115 N >>= shift; 116 N >>= shift;
116 N2 = N>>1; 117 N2 = N>>1;
117 N4 = N>>2; 118 N4 = N>>2;
118 ALLOC(f, N2, kiss_fft_scalar); 119 ALLOC(f, N2, kiss_fft_scalar);
120 ALLOC(f2, N2, kiss_fft_scalar);
119 /* sin(x) ~= x here */ 121 /* sin(x) ~= x here */
120#ifdef FIXED_POINT 122#ifdef FIXED_POINT
121 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 123 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -132,7 +134,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
132 kiss_fft_scalar * OPUS_RESTRICT yp = f; 134 kiss_fft_scalar * OPUS_RESTRICT yp = f;
133 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 135 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
134 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 136 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
135 for(i=0;i<(overlap>>2);i++) 137 for(i=0;i<((overlap+3)>>2);i++)
136 { 138 {
137 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ 139 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
138 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); 140 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
@@ -144,7 +146,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
144 } 146 }
145 wp1 = window; 147 wp1 = window;
146 wp2 = window+overlap-1; 148 wp2 = window+overlap-1;
147 for(;i<N4-(overlap>>2);i++) 149 for(;i<N4-((overlap+3)>>2);i++)
148 { 150 {
149 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 151 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
150 *yp++ = *xp2; 152 *yp++ = *xp2;
@@ -181,12 +183,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
181 } 183 }
182 184
183 /* N/4 complex FFT, down-scales by 4/N */ 185 /* N/4 complex FFT, down-scales by 4/N */
184 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); 186 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
185 187
186 /* Post-rotate */ 188 /* Post-rotate */
187 { 189 {
188 /* Temp pointers to make it really clear to the compiler what we're doing */ 190 /* Temp pointers to make it really clear to the compiler what we're doing */
189 const kiss_fft_scalar * OPUS_RESTRICT fp = in; 191 const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
190 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 192 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
191 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 193 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
192 const kiss_twiddle_scalar *t = &l->trig[0]; 194 const kiss_twiddle_scalar *t = &l->trig[0];
@@ -208,35 +210,20 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
208} 210}
209#endif 211#endif
210 212
211#define S_F_BUF_SIZE (1920>>1) /* N = 1920 for static modes */
212static kiss_fft_scalar s_f2[S_F_BUF_SIZE] IBSS_ATTR MEM_ALIGN_ATTR;
213void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 213void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
214 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 214 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
215{ 215{
216 int i; 216 int i;
217 int N, N2, N4; 217 int N, N2, N4;
218 int tstride = 1<<shift;
219 kiss_twiddle_scalar sine; 218 kiss_twiddle_scalar sine;
220 VARDECL(kiss_fft_scalar, f); 219/* VARDECL(kiss_fft_scalar, f2);
221 VARDECL(kiss_fft_scalar, f2); 220 SAVE_STACK; */
222 SAVE_STACK;
223 N = l->n; 221 N = l->n;
224 N >>= shift; 222 N >>= shift;
225 N2 = N>>1; 223 N2 = N>>1;
226 N4 = N>>2; 224 N4 = N>>2;
227 kiss_fft_scalar s_f[S_F_BUF_SIZE]; 225/* ALLOC(f2, N2, kiss_fft_scalar); */
228 226 kiss_fft_scalar f2[N2]; /* worst case 3840b */
229 if (S_F_BUF_SIZE >= N2)
230 {
231 f = s_f;
232 f2 = s_f2;
233 }
234 else
235 {
236 ALLOC(f , N2, kiss_fft_scalar);
237 ALLOC(f2, N2, kiss_fft_scalar);
238 }
239
240 /* sin(x) ~= x here */ 227 /* sin(x) ~= x here */
241#ifdef FIXED_POINT 228#ifdef FIXED_POINT
242 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 229 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -250,102 +237,78 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
250 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 237 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
251 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 238 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
252 kiss_fft_scalar * OPUS_RESTRICT yp = f2; 239 kiss_fft_scalar * OPUS_RESTRICT yp = f2;
253 const kiss_twiddle_scalar *t0 = &l->trig[0]; 240 const kiss_twiddle_scalar *t = &l->trig[0];
254 const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift];
255 for(i=0;i<N4;i++) 241 for(i=0;i<N4;i++)
256 { 242 {
257 kiss_fft_scalar yr, yi; 243 kiss_fft_scalar yr, yi;
258 yr = -S_MUL(*xp2, *t0) + S_MUL(*xp1, *t1); 244 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
259 yi = -S_MUL(*xp2, *t1) - S_MUL(*xp1, *t0); 245 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
260 /* works because the cos is nearly one */ 246 /* works because the cos is nearly one */
261 *yp++ = yr - S_MUL(yi,sine); 247 *yp++ = yr - S_MUL(yi,sine);
262 *yp++ = yi + S_MUL(yr,sine); 248 *yp++ = yi + S_MUL(yr,sine);
263 xp1+=2*stride; 249 xp1+=2*stride;
264 xp2-=2*stride; 250 xp2-=2*stride;
265 t0 += tstride;
266 t1 -= tstride;
267 } 251 }
268 } 252 }
269 253
270 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 254 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
271 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); 255 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
272 256
273 /* Post-rotate */ 257 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
258 it in-place. */
274 { 259 {
275 kiss_fft_scalar * OPUS_RESTRICT fp = f; 260 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
276 const kiss_twiddle_scalar *t0 = &l->trig[0]; 261 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
277 const kiss_twiddle_scalar *t1 = &l->trig[N4<<shift]; 262 const kiss_twiddle_scalar *t = &l->trig[0];
278 for(i=0;i<N4;i++) 263 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
264 middle pair will be computed twice. */
265 for(i=0;i<(N4+1)>>1;i++)
279 { 266 {
280 kiss_fft_scalar re, im, yr, yi; 267 kiss_fft_scalar re, im, yr, yi;
281 re = fp[0]; 268 kiss_twiddle_scalar t0, t1;
282 im = fp[1]; 269 re = yp0[0];
270 im = yp0[1];
271 t0 = t[i<<shift];
272 t1 = t[(N4-i)<<shift];
283 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 273 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
284 yr = S_MUL(re, *t0) - S_MUL(im, *t1); 274 yr = S_MUL(re,t0) - S_MUL(im,t1);
285 yi = S_MUL(im, *t0) + S_MUL(re, *t1); 275 yi = S_MUL(im,t0) + S_MUL(re,t1);
276 re = yp1[0];
277 im = yp1[1];
286 /* works because the cos is nearly one */ 278 /* works because the cos is nearly one */
287 *fp++ = yr - S_MUL(yi,sine); 279 yp0[0] = -(yr - S_MUL(yi,sine));
288 *fp++ = yi + S_MUL(yr,sine); 280 yp1[1] = yi + S_MUL(yr,sine);
289 t0 += tstride;
290 t1 -= tstride;
291 }
292 }
293 /* De-shuffle the components for the middle of the window only */
294 {
295 const kiss_fft_scalar * OPUS_RESTRICT fp1 = f;
296 const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1;
297 kiss_fft_scalar * OPUS_RESTRICT yp = f2;
298 for(i = 0; i < N4; i++)
299 {
300 *yp++ =-*fp1;
301 *yp++ = *fp2;
302 fp1 += 2;
303 fp2 -= 2;
304 }
305 }
306 out -= (N2-overlap)>>1;
307 /* Mirror on both sides for TDAC */
308 {
309 kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1;
310 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1;
311 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2;
312 const opus_val16 * OPUS_RESTRICT wp1 = window;
313 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
314 281
315 i = N4-overlap/2; 282 t0 = t[(N4-i-1)<<shift];
316 xp1 -= N4-overlap/2; 283 t1 = t[(i+1)<<shift];
317 fp1 -= N4-overlap/2; 284 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
318 OPUS_COPY(xp1+1, fp1+1, N4-overlap/2); 285 yr = S_MUL(re,t0) - S_MUL(im,t1);
319 for(; i < N4; i++) 286 yi = S_MUL(im,t0) + S_MUL(re,t1);
320 { 287 /* works because the cos is nearly one */
321 kiss_fft_scalar x1; 288 yp1[0] = -(yr - S_MUL(yi,sine));
322 x1 = *fp1--; 289 yp0[1] = yi + S_MUL(yr,sine);
323 *yp1++ +=-MULT16_32_Q15(*wp1, x1); 290 yp0 += 2;
324 *xp1-- += MULT16_32_Q15(*wp2, x1); 291 yp1 -= 2;
325 wp1++;
326 wp2--;
327 } 292 }
328 } 293 }
294
295 /* Mirror on both sides for TDAC */
329 { 296 {
330 kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; 297 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
331 kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; 298 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
332 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2);
333 const opus_val16 * OPUS_RESTRICT wp1 = window; 299 const opus_val16 * OPUS_RESTRICT wp1 = window;
334 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 300 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
335 301
336 i = N4-overlap/2; 302 for(i = 0; i < overlap/2; i++)
337 OPUS_COPY(xp2, fp2, N4-overlap/2);
338 xp2 += N4-overlap/2;
339 fp2 += N4-overlap/2;
340 for(; i < N4; i++)
341 { 303 {
342 kiss_fft_scalar x2; 304 kiss_fft_scalar x1, x2;
343 x2 = *fp2++; 305 x1 = *xp1;
344 *yp2-- = MULT16_32_Q15(*wp1, x2); 306 x2 = *yp1;
345 *xp2++ = MULT16_32_Q15(*wp2, x2); 307 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
308 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
346 wp1++; 309 wp1++;
347 wp2--; 310 wp2--;
348 } 311 }
349 } 312 }
350 RESTORE_STACK; 313/* RESTORE_STACK; */
351} 314}