summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt/mdct.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/mdct.c')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mdct.c191
1 files changed, 110 insertions, 81 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 72ea180568..7fa8eaf6bf 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -53,18 +53,20 @@
53#include "mathops.h" 53#include "mathops.h"
54#include "stack_alloc.h" 54#include "stack_alloc.h"
55 55
56#if defined(MIPSr1_ASM)
57#include "mips/mdct_mipsr1.h"
58#endif
59
60
56#ifdef CUSTOM_MODES 61#ifdef CUSTOM_MODES
57 62
58int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 63int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
59{ 64{
60 int i; 65 int i;
61 int N4;
62 kiss_twiddle_scalar *trig; 66 kiss_twiddle_scalar *trig;
63#if defined(FIXED_POINT) 67 int shift;
64 int N2=N>>1; 68 int N2=N>>1;
65#endif
66 l->n = N; 69 l->n = N;
67 N4 = N>>2;
68 l->maxshift = maxshift; 70 l->maxshift = maxshift;
69 for (i=0;i<=maxshift;i++) 71 for (i=0;i<=maxshift;i++)
70 { 72 {
@@ -77,17 +79,28 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
77 return 0; 79 return 0;
78#endif 80#endif
79 } 81 }
80 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); 82 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
81 if (l->trig==NULL) 83 if (l->trig==NULL)
82 return 0; 84 return 0;
83 /* We have enough points that sine isn't necessary */ 85 for (shift=0;shift<=maxshift;shift++)
86 {
87 /* We have enough points that sine isn't necessary */
84#if defined(FIXED_POINT) 88#if defined(FIXED_POINT)
85 for (i=0;i<=N4;i++) 89#if 1
86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); 90 for (i=0;i<N2;i++)
91 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
87#else 92#else
88 for (i=0;i<=N4;i++) 93 for (i=0;i<N2;i++)
89 trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 94 trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
90#endif 95#endif
96#else
97 for (i=0;i<N2;i++)
98 trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
99#endif
100 trig += N2;
101 N2 >>= 1;
102 N >>= 1;
103 }
91 return 1; 104 return 1;
92} 105}
93 106
@@ -103,27 +116,37 @@ void clt_mdct_clear(mdct_lookup *l)
103 116
104#if 0 117#if 0
105/* Forward MDCT trashes the input array */ 118/* Forward MDCT trashes the input array */
119#ifndef OVERRIDE_clt_mdct_forward
106void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 120void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
107 const opus_val16 *window, int overlap, int shift, int stride) 121 const opus_val16 *window, int overlap, int shift, int stride)
108{ 122{
109 int i; 123 int i;
110 int N, N2, N4; 124 int N, N2, N4;
111 kiss_twiddle_scalar sine;
112 VARDECL(kiss_fft_scalar, f); 125 VARDECL(kiss_fft_scalar, f);
113 VARDECL(kiss_fft_scalar, f2); 126 VARDECL(kiss_fft_cpx, f2);
127 const kiss_fft_state *st = l->kfft[shift];
128 const kiss_twiddle_scalar *trig;
129 opus_val16 scale;
130#ifdef FIXED_POINT
131 /* Allows us to scale with MULT16_32_Q16(), which is faster than
132 MULT16_32_Q15() on ARM. */
133 int scale_shift = st->scale_shift-1;
134#endif
114 SAVE_STACK; 135 SAVE_STACK;
136 scale = st->scale;
137
115 N = l->n; 138 N = l->n;
116 N >>= shift; 139 trig = l->trig;
140 for (i=0;i<shift;i++)
141 {
142 N >>= 1;
143 trig += N;
144 }
117 N2 = N>>1; 145 N2 = N>>1;
118 N4 = N>>2; 146 N4 = N>>2;
147
119 ALLOC(f, N2, kiss_fft_scalar); 148 ALLOC(f, N2, kiss_fft_scalar);
120 ALLOC(f2, N2, kiss_fft_scalar); 149 ALLOC(f2, N4, kiss_fft_cpx);
121 /* sin(x) ~= x here */
122#ifdef FIXED_POINT
123 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
124#else
125 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
126#endif
127 150
128 /* Consider the input to be composed of four blocks: [a, b, c, d] */ 151 /* Consider the input to be composed of four blocks: [a, b, c, d] */
129 /* Window, shuffle, fold */ 152 /* Window, shuffle, fold */
@@ -168,125 +191,131 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
168 /* Pre-rotation */ 191 /* Pre-rotation */
169 { 192 {
170 kiss_fft_scalar * OPUS_RESTRICT yp = f; 193 kiss_fft_scalar * OPUS_RESTRICT yp = f;
171 const kiss_twiddle_scalar *t = &l->trig[0]; 194 const kiss_twiddle_scalar *t = &trig[0];
172 for(i=0;i<N4;i++) 195 for(i=0;i<N4;i++)
173 { 196 {
197 kiss_fft_cpx yc;
198 kiss_twiddle_scalar t0, t1;
174 kiss_fft_scalar re, im, yr, yi; 199 kiss_fft_scalar re, im, yr, yi;
175 re = yp[0]; 200 t0 = t[i];
176 im = yp[1]; 201 t1 = t[N4+i];
177 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 202 re = *yp++;
178 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 203 im = *yp++;
179 /* works because the cos is nearly one */ 204 yr = S_MUL(re,t0) - S_MUL(im,t1);
180 *yp++ = yr + S_MUL(yi,sine); 205 yi = S_MUL(im,t0) + S_MUL(re,t1);
181 *yp++ = yi - S_MUL(yr,sine); 206 yc.r = yr;
207 yc.i = yi;
208 yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
209 yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
210 f2[st->bitrev[i]] = yc;
182 } 211 }
183 } 212 }
184 213
185 /* N/4 complex FFT, down-scales by 4/N */ 214 /* N/4 complex FFT, does not downscale anymore */
186 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); 215 opus_fft_impl(st, f2);
187 216
188 /* Post-rotate */ 217 /* Post-rotate */
189 { 218 {
190 /* Temp pointers to make it really clear to the compiler what we're doing */ 219 /* Temp pointers to make it really clear to the compiler what we're doing */
191 const kiss_fft_scalar * OPUS_RESTRICT fp = f2; 220 const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
192 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 221 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
193 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 222 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
194 const kiss_twiddle_scalar *t = &l->trig[0]; 223 const kiss_twiddle_scalar *t = &trig[0];
195 /* Temp pointers to make it really clear to the compiler what we're doing */ 224 /* Temp pointers to make it really clear to the compiler what we're doing */
196 for(i=0;i<N4;i++) 225 for(i=0;i<N4;i++)
197 { 226 {
198 kiss_fft_scalar yr, yi; 227 kiss_fft_scalar yr, yi;
199 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 228 yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
200 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 229 yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
201 /* works because the cos is nearly one */ 230 *yp1 = yr;
202 *yp1 = yr - S_MUL(yi,sine); 231 *yp2 = yi;
203 *yp2 = yi + S_MUL(yr,sine);; 232 fp++;
204 fp += 2;
205 yp1 += 2*stride; 233 yp1 += 2*stride;
206 yp2 -= 2*stride; 234 yp2 -= 2*stride;
207 } 235 }
208 } 236 }
209 RESTORE_STACK; 237 RESTORE_STACK;
210} 238}
239#endif /* OVERRIDE_clt_mdct_forward */
211#endif 240#endif
212 241
242#ifndef OVERRIDE_clt_mdct_backward
213void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 243void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
214 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 244 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
215{ 245{
216 int i; 246 int i;
217 int N, N2, N4; 247 int N, N2, N4;
218 kiss_twiddle_scalar sine; 248 const kiss_twiddle_scalar *trig;
219/* VARDECL(kiss_fft_scalar, f2); 249
220 SAVE_STACK; */
221 N = l->n; 250 N = l->n;
222 N >>= shift; 251 trig = l->trig;
252 for (i=0;i<shift;i++)
253 {
254 N >>= 1;
255 trig += N;
256 }
223 N2 = N>>1; 257 N2 = N>>1;
224 N4 = N>>2; 258 N4 = N>>2;
225/* ALLOC(f2, N2, kiss_fft_scalar); */
226 kiss_fft_scalar f2[N2]; /* worst case 3840b */
227 /* sin(x) ~= x here */
228#ifdef FIXED_POINT
229 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
230#else
231 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
232#endif
233 259
234 /* Pre-rotate */ 260 /* Pre-rotate */
235 { 261 {
236 /* Temp pointers to make it really clear to the compiler what we're doing */ 262 /* Temp pointers to make it really clear to the compiler what we're doing */
237 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 263 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
238 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 264 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
239 kiss_fft_scalar * OPUS_RESTRICT yp = f2; 265 kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
240 const kiss_twiddle_scalar *t = &l->trig[0]; 266 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
267 const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
241 for(i=0;i<N4;i++) 268 for(i=0;i<N4;i++)
242 { 269 {
270 int rev;
243 kiss_fft_scalar yr, yi; 271 kiss_fft_scalar yr, yi;
244 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 272 rev = *bitrev++;
245 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 273 yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
246 /* works because the cos is nearly one */ 274 yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
247 *yp++ = yr - S_MUL(yi,sine); 275 /* We swap real and imag because we use an FFT instead of an IFFT. */
248 *yp++ = yi + S_MUL(yr,sine); 276 yp[2*rev+1] = yr;
277 yp[2*rev] = yi;
278 /* Storing the pre-rotation directly in the bitrev order. */
249 xp1+=2*stride; 279 xp1+=2*stride;
250 xp2-=2*stride; 280 xp2-=2*stride;
251 } 281 }
252 } 282 }
253 283
254 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 284 opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
255 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
256 285
257 /* Post-rotate and de-shuffle from both ends of the buffer at once to make 286 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
258 it in-place. */ 287 it in-place. */
259 { 288 {
260 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); 289 kiss_fft_scalar * yp0 = out+(overlap>>1);
261 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; 290 kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
262 const kiss_twiddle_scalar *t = &l->trig[0]; 291 const kiss_twiddle_scalar *t = &trig[0];
263 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 292 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
264 middle pair will be computed twice. */ 293 middle pair will be computed twice. */
265 for(i=0;i<(N4+1)>>1;i++) 294 for(i=0;i<(N4+1)>>1;i++)
266 { 295 {
267 kiss_fft_scalar re, im, yr, yi; 296 kiss_fft_scalar re, im, yr, yi;
268 kiss_twiddle_scalar t0, t1; 297 kiss_twiddle_scalar t0, t1;
269 re = yp0[0]; 298 /* We swap real and imag because we're using an FFT instead of an IFFT. */
270 im = yp0[1]; 299 re = yp0[1];
271 t0 = t[i<<shift]; 300 im = yp0[0];
272 t1 = t[(N4-i)<<shift]; 301 t0 = t[i];
302 t1 = t[N4+i];
273 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 303 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
274 yr = S_MUL(re,t0) - S_MUL(im,t1); 304 yr = S_MUL(re,t0) + S_MUL(im,t1);
275 yi = S_MUL(im,t0) + S_MUL(re,t1); 305 yi = S_MUL(re,t1) - S_MUL(im,t0);
276 re = yp1[0]; 306 /* We swap real and imag because we're using an FFT instead of an IFFT. */
277 im = yp1[1]; 307 re = yp1[1];
278 /* works because the cos is nearly one */ 308 im = yp1[0];
279 yp0[0] = -(yr - S_MUL(yi,sine)); 309 yp0[0] = yr;
280 yp1[1] = yi + S_MUL(yr,sine); 310 yp1[1] = yi;
281 311
282 t0 = t[(N4-i-1)<<shift]; 312 t0 = t[(N4-i-1)];
283 t1 = t[(i+1)<<shift]; 313 t1 = t[(N2-i-1)];
284 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 314 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
285 yr = S_MUL(re,t0) - S_MUL(im,t1); 315 yr = S_MUL(re,t0) + S_MUL(im,t1);
286 yi = S_MUL(im,t0) + S_MUL(re,t1); 316 yi = S_MUL(re,t1) - S_MUL(im,t0);
287 /* works because the cos is nearly one */ 317 yp1[0] = yr;
288 yp1[0] = -(yr - S_MUL(yi,sine)); 318 yp0[1] = yi;
289 yp0[1] = yi + S_MUL(yr,sine);
290 yp0 += 2; 319 yp0 += 2;
291 yp1 -= 2; 320 yp1 -= 2;
292 } 321 }
@@ -310,5 +339,5 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
310 wp2--; 339 wp2--;
311 } 340 }
312 } 341 }
313/* RESTORE_STACK; */
314} 342}
343#endif /* OVERRIDE_clt_mdct_backward */