diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/mdct.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/mdct.c | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c new file mode 100644 index 0000000000..abf4e79d8d --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/mdct.c | |||
@@ -0,0 +1,332 @@ | |||
1 | /* Copyright (c) 2007-2008 CSIRO | ||
2 | Copyright (c) 2007-2008 Xiph.Org Foundation | ||
3 | Written by Jean-Marc Valin */ | ||
4 | /* | ||
5 | Redistribution and use in source and binary forms, with or without | ||
6 | modification, are permitted provided that the following conditions | ||
7 | are met: | ||
8 | |||
9 | - Redistributions of source code must retain the above copyright | ||
10 | notice, this list of conditions and the following disclaimer. | ||
11 | |||
12 | - Redistributions in binary form must reproduce the above copyright | ||
13 | notice, this list of conditions and the following disclaimer in the | ||
14 | documentation and/or other materials provided with the distribution. | ||
15 | |||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
20 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
27 | */ | ||
28 | |||
29 | /* This is a simple MDCT implementation that uses a N/4 complex FFT | ||
30 | to do most of the work. It should be relatively straightforward to | ||
31 | plug in pretty much and FFT here. | ||
32 | |||
33 | This replaces the Vorbis FFT (and uses the exact same API), which | ||
34 | was a bit too messy and that was ending up duplicating code | ||
35 | (might as well use the same FFT everywhere). | ||
36 | |||
37 | The algorithm is similar to (and inspired from) Fabrice Bellard's | ||
38 | MDCT implementation in FFMPEG, but has differences in signs, ordering | ||
39 | and scaling in many places. | ||
40 | */ | ||
41 | |||
42 | #ifndef SKIP_CONFIG_H | ||
43 | #ifdef HAVE_CONFIG_H | ||
44 | #include "opus_config.h" | ||
45 | #endif | ||
46 | #endif | ||
47 | |||
48 | #include "mdct.h" | ||
49 | #include "kiss_fft.h" | ||
50 | #include "_kiss_fft_guts.h" | ||
51 | #include <math.h> | ||
52 | #include "os_support.h" | ||
53 | #include "mathops.h" | ||
54 | #include "stack_alloc.h" | ||
55 | |||
56 | #ifdef CUSTOM_MODES | ||
57 | |||
58 | int clt_mdct_init(mdct_lookup *l,int N, int maxshift) | ||
59 | { | ||
60 | int i; | ||
61 | int N4; | ||
62 | kiss_twiddle_scalar *trig; | ||
63 | #if defined(FIXED_POINT) | ||
64 | int N2=N>>1; | ||
65 | #endif | ||
66 | l->n = N; | ||
67 | N4 = N>>2; | ||
68 | l->maxshift = maxshift; | ||
69 | for (i=0;i<=maxshift;i++) | ||
70 | { | ||
71 | if (i==0) | ||
72 | l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); | ||
73 | else | ||
74 | l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); | ||
75 | #ifndef ENABLE_TI_DSPLIB55 | ||
76 | if (l->kfft[i]==NULL) | ||
77 | return 0; | ||
78 | #endif | ||
79 | } | ||
80 | l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); | ||
81 | if (l->trig==NULL) | ||
82 | return 0; | ||
83 | /* We have enough points that sine isn't necessary */ | ||
84 | #if defined(FIXED_POINT) | ||
85 | for (i=0;i<=N4;i++) | ||
86 | trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); | ||
87 | #else | ||
88 | for (i=0;i<=N4;i++) | ||
89 | trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); | ||
90 | #endif | ||
91 | return 1; | ||
92 | } | ||
93 | |||
94 | void clt_mdct_clear(mdct_lookup *l) | ||
95 | { | ||
96 | int i; | ||
97 | for (i=0;i<=l->maxshift;i++) | ||
98 | opus_fft_free(l->kfft[i]); | ||
99 | opus_free((kiss_twiddle_scalar*)l->trig); | ||
100 | } | ||
101 | |||
102 | #endif /* CUSTOM_MODES */ | ||
103 | |||
104 | /* Forward MDCT trashes the input array */ | ||
105 | void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, | ||
106 | const opus_val16 *window, int overlap, int shift, int stride) | ||
107 | { | ||
108 | int i; | ||
109 | int N, N2, N4; | ||
110 | kiss_twiddle_scalar sine; | ||
111 | VARDECL(kiss_fft_scalar, f); | ||
112 | SAVE_STACK; | ||
113 | N = l->n; | ||
114 | N >>= shift; | ||
115 | N2 = N>>1; | ||
116 | N4 = N>>2; | ||
117 | ALLOC(f, N2, kiss_fft_scalar); | ||
118 | /* sin(x) ~= x here */ | ||
119 | #ifdef FIXED_POINT | ||
120 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | ||
121 | #else | ||
122 | sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | ||
123 | #endif | ||
124 | |||
125 | /* Consider the input to be composed of four blocks: [a, b, c, d] */ | ||
126 | /* Window, shuffle, fold */ | ||
127 | { | ||
128 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
129 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); | ||
130 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); | ||
131 | kiss_fft_scalar * OPUS_RESTRICT yp = f; | ||
132 | const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); | ||
133 | const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; | ||
134 | for(i=0;i<(overlap>>2);i++) | ||
135 | { | ||
136 | /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ | ||
137 | *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); | ||
138 | *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); | ||
139 | xp1+=2; | ||
140 | xp2-=2; | ||
141 | wp1+=2; | ||
142 | wp2-=2; | ||
143 | } | ||
144 | wp1 = window; | ||
145 | wp2 = window+overlap-1; | ||
146 | for(;i<N4-(overlap>>2);i++) | ||
147 | { | ||
148 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | ||
149 | *yp++ = *xp2; | ||
150 | *yp++ = *xp1; | ||
151 | xp1+=2; | ||
152 | xp2-=2; | ||
153 | } | ||
154 | for(;i<N4;i++) | ||
155 | { | ||
156 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | ||
157 | *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); | ||
158 | *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); | ||
159 | xp1+=2; | ||
160 | xp2-=2; | ||
161 | wp1+=2; | ||
162 | wp2-=2; | ||
163 | } | ||
164 | } | ||
165 | /* Pre-rotation */ | ||
166 | { | ||
167 | kiss_fft_scalar * OPUS_RESTRICT yp = f; | ||
168 | const kiss_twiddle_scalar *t = &l->trig[0]; | ||
169 | for(i=0;i<N4;i++) | ||
170 | { | ||
171 | kiss_fft_scalar re, im, yr, yi; | ||
172 | re = yp[0]; | ||
173 | im = yp[1]; | ||
174 | yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | ||
175 | yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | ||
176 | /* works because the cos is nearly one */ | ||
177 | *yp++ = yr + S_MUL(yi,sine); | ||
178 | *yp++ = yi - S_MUL(yr,sine); | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* N/4 complex FFT, down-scales by 4/N */ | ||
183 | opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); | ||
184 | |||
185 | /* Post-rotate */ | ||
186 | { | ||
187 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
188 | const kiss_fft_scalar * OPUS_RESTRICT fp = in; | ||
189 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | ||
190 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); | ||
191 | const kiss_twiddle_scalar *t = &l->trig[0]; | ||
192 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
193 | for(i=0;i<N4;i++) | ||
194 | { | ||
195 | kiss_fft_scalar yr, yi; | ||
196 | yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); | ||
197 | yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); | ||
198 | /* works because the cos is nearly one */ | ||
199 | *yp1 = yr - S_MUL(yi,sine); | ||
200 | *yp2 = yi + S_MUL(yr,sine);; | ||
201 | fp += 2; | ||
202 | yp1 += 2*stride; | ||
203 | yp2 -= 2*stride; | ||
204 | } | ||
205 | } | ||
206 | RESTORE_STACK; | ||
207 | } | ||
208 | |||
209 | void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, | ||
210 | const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) | ||
211 | { | ||
212 | int i; | ||
213 | int N, N2, N4; | ||
214 | kiss_twiddle_scalar sine; | ||
215 | VARDECL(kiss_fft_scalar, f); | ||
216 | VARDECL(kiss_fft_scalar, f2); | ||
217 | SAVE_STACK; | ||
218 | N = l->n; | ||
219 | N >>= shift; | ||
220 | N2 = N>>1; | ||
221 | N4 = N>>2; | ||
222 | ALLOC(f, N2, kiss_fft_scalar); | ||
223 | ALLOC(f2, N2, kiss_fft_scalar); | ||
224 | /* sin(x) ~= x here */ | ||
225 | #ifdef FIXED_POINT | ||
226 | sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | ||
227 | #else | ||
228 | sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | ||
229 | #endif | ||
230 | |||
231 | /* Pre-rotate */ | ||
232 | { | ||
233 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
234 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; | ||
235 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); | ||
236 | kiss_fft_scalar * OPUS_RESTRICT yp = f2; | ||
237 | const kiss_twiddle_scalar *t = &l->trig[0]; | ||
238 | for(i=0;i<N4;i++) | ||
239 | { | ||
240 | kiss_fft_scalar yr, yi; | ||
241 | yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); | ||
242 | yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); | ||
243 | /* works because the cos is nearly one */ | ||
244 | *yp++ = yr - S_MUL(yi,sine); | ||
245 | *yp++ = yi + S_MUL(yr,sine); | ||
246 | xp1+=2*stride; | ||
247 | xp2-=2*stride; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ | ||
252 | opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); | ||
253 | |||
254 | /* Post-rotate */ | ||
255 | { | ||
256 | kiss_fft_scalar * OPUS_RESTRICT fp = f; | ||
257 | const kiss_twiddle_scalar *t = &l->trig[0]; | ||
258 | |||
259 | for(i=0;i<N4;i++) | ||
260 | { | ||
261 | kiss_fft_scalar re, im, yr, yi; | ||
262 | re = fp[0]; | ||
263 | im = fp[1]; | ||
264 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ | ||
265 | yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | ||
266 | yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | ||
267 | /* works because the cos is nearly one */ | ||
268 | *fp++ = yr - S_MUL(yi,sine); | ||
269 | *fp++ = yi + S_MUL(yr,sine); | ||
270 | } | ||
271 | } | ||
272 | /* De-shuffle the components for the middle of the window only */ | ||
273 | { | ||
274 | const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; | ||
275 | const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; | ||
276 | kiss_fft_scalar * OPUS_RESTRICT yp = f2; | ||
277 | for(i = 0; i < N4; i++) | ||
278 | { | ||
279 | *yp++ =-*fp1; | ||
280 | *yp++ = *fp2; | ||
281 | fp1 += 2; | ||
282 | fp2 -= 2; | ||
283 | } | ||
284 | } | ||
285 | out -= (N2-overlap)>>1; | ||
286 | /* Mirror on both sides for TDAC */ | ||
287 | { | ||
288 | kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; | ||
289 | kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; | ||
290 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; | ||
291 | const opus_val16 * OPUS_RESTRICT wp1 = window; | ||
292 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | ||
293 | for(i = 0; i< N4-overlap/2; i++) | ||
294 | { | ||
295 | *xp1 = *fp1; | ||
296 | xp1--; | ||
297 | fp1--; | ||
298 | } | ||
299 | for(; i < N4; i++) | ||
300 | { | ||
301 | kiss_fft_scalar x1; | ||
302 | x1 = *fp1--; | ||
303 | *yp1++ +=-MULT16_32_Q15(*wp1, x1); | ||
304 | *xp1-- += MULT16_32_Q15(*wp2, x1); | ||
305 | wp1++; | ||
306 | wp2--; | ||
307 | } | ||
308 | } | ||
309 | { | ||
310 | kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; | ||
311 | kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; | ||
312 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); | ||
313 | const opus_val16 * OPUS_RESTRICT wp1 = window; | ||
314 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | ||
315 | for(i = 0; i< N4-overlap/2; i++) | ||
316 | { | ||
317 | *xp2 = *fp2; | ||
318 | xp2++; | ||
319 | fp2++; | ||
320 | } | ||
321 | for(; i < N4; i++) | ||
322 | { | ||
323 | kiss_fft_scalar x2; | ||
324 | x2 = *fp2++; | ||
325 | *yp2-- = MULT16_32_Q15(*wp1, x2); | ||
326 | *xp2++ = MULT16_32_Q15(*wp2, x2); | ||
327 | wp1++; | ||
328 | wp2--; | ||
329 | } | ||
330 | } | ||
331 | RESTORE_STACK; | ||
332 | } | ||