diff options
Diffstat (limited to 'apps/codecs/liba52/imdct.c')
-rw-r--r-- | apps/codecs/liba52/imdct.c | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/apps/codecs/liba52/imdct.c b/apps/codecs/liba52/imdct.c new file mode 100644 index 0000000000..1cb3814907 --- /dev/null +++ b/apps/codecs/liba52/imdct.c | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | * imdct.c | ||
3 | * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> | ||
4 | * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | ||
5 | * | ||
6 | * The ifft algorithms in this file have been largely inspired by Dan | ||
7 | * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html | ||
8 | * | ||
9 | * This file is part of a52dec, a free ATSC A-52 stream decoder. | ||
10 | * See http://liba52.sourceforge.net/ for updates. | ||
11 | * | ||
12 | * a52dec is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License as published by | ||
14 | * the Free Software Foundation; either version 2 of the License, or | ||
15 | * (at your option) any later version. | ||
16 | * | ||
17 | * a52dec is distributed in the hope that it will be useful, | ||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
20 | * GNU General Public License for more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License | ||
23 | * along with this program; if not, write to the Free Software | ||
24 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
25 | */ | ||
26 | |||
27 | #include "config.h" | ||
28 | |||
29 | #include <math.h> | ||
30 | #include <stdio.h> | ||
31 | #ifdef LIBA52_DJBFFT | ||
32 | #include <fftc4.h> | ||
33 | #include <fftc8.h> | ||
34 | #endif | ||
35 | #ifndef M_PI | ||
36 | #define M_PI 3.1415926535897932384626433832795029 | ||
37 | #endif | ||
38 | #include <inttypes.h> | ||
39 | |||
40 | #include "a52.h" | ||
41 | #include "a52_internal.h" | ||
42 | #include "mm_accel.h" | ||
43 | |||
44 | typedef struct complex_s { | ||
45 | sample_t real; | ||
46 | sample_t imag; | ||
47 | } complex_t; | ||
48 | |||
49 | static uint8_t fftorder[] = { | ||
50 | 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, | ||
51 | 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, | ||
52 | 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, | ||
53 | 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, | ||
54 | 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, | ||
55 | 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, | ||
56 | 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, | ||
57 | 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 | ||
58 | }; | ||
59 | |||
60 | /* Root values for IFFT */ | ||
61 | static sample_t roots16[3]; | ||
62 | static sample_t roots32[7]; | ||
63 | static sample_t roots64[15]; | ||
64 | static sample_t roots128[31]; | ||
65 | |||
66 | /* Twiddle factors for IMDCT */ | ||
67 | static complex_t pre1[128]; | ||
68 | static complex_t post1[64]; | ||
69 | static complex_t pre2[64]; | ||
70 | static complex_t post2[32]; | ||
71 | |||
72 | static sample_t a52_imdct_window[256]; | ||
73 | |||
74 | static void (* ifft128) (complex_t * buf); | ||
75 | static void (* ifft64) (complex_t * buf); | ||
76 | |||
77 | static inline void ifft2 (complex_t * buf) | ||
78 | { | ||
79 | sample_t r, i; | ||
80 | |||
81 | r = buf[0].real; | ||
82 | i = buf[0].imag; | ||
83 | buf[0].real += buf[1].real; | ||
84 | buf[0].imag += buf[1].imag; | ||
85 | buf[1].real = r - buf[1].real; | ||
86 | buf[1].imag = i - buf[1].imag; | ||
87 | } | ||
88 | |||
89 | static inline void ifft4 (complex_t * buf) | ||
90 | { | ||
91 | sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | ||
92 | |||
93 | tmp1 = buf[0].real + buf[1].real; | ||
94 | tmp2 = buf[3].real + buf[2].real; | ||
95 | tmp3 = buf[0].imag + buf[1].imag; | ||
96 | tmp4 = buf[2].imag + buf[3].imag; | ||
97 | tmp5 = buf[0].real - buf[1].real; | ||
98 | tmp6 = buf[0].imag - buf[1].imag; | ||
99 | tmp7 = buf[2].imag - buf[3].imag; | ||
100 | tmp8 = buf[3].real - buf[2].real; | ||
101 | |||
102 | buf[0].real = tmp1 + tmp2; | ||
103 | buf[0].imag = tmp3 + tmp4; | ||
104 | buf[2].real = tmp1 - tmp2; | ||
105 | buf[2].imag = tmp3 - tmp4; | ||
106 | buf[1].real = tmp5 + tmp7; | ||
107 | buf[1].imag = tmp6 + tmp8; | ||
108 | buf[3].real = tmp5 - tmp7; | ||
109 | buf[3].imag = tmp6 - tmp8; | ||
110 | } | ||
111 | |||
112 | /* basic radix-2 ifft butterfly */ | ||
113 | |||
114 | #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ | ||
115 | t0 = MUL (W1, d1) + MUL (W0, d0); \ | ||
116 | t1 = MUL (W0, d1) - MUL (W1, d0); \ | ||
117 | } while (0) | ||
118 | |||
119 | /* radix-2 ifft butterfly with bias */ | ||
120 | |||
121 | #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ | ||
122 | t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ | ||
123 | t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \ | ||
124 | } while (0) | ||
125 | |||
126 | /* the basic split-radix ifft butterfly */ | ||
127 | |||
128 | #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ | ||
129 | BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ | ||
130 | BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ | ||
131 | tmp1 = tmp5 + tmp7; \ | ||
132 | tmp2 = tmp6 + tmp8; \ | ||
133 | tmp3 = tmp6 - tmp8; \ | ||
134 | tmp4 = tmp7 - tmp5; \ | ||
135 | a2.real = a0.real - tmp1; \ | ||
136 | a2.imag = a0.imag - tmp2; \ | ||
137 | a3.real = a1.real - tmp3; \ | ||
138 | a3.imag = a1.imag - tmp4; \ | ||
139 | a0.real += tmp1; \ | ||
140 | a0.imag += tmp2; \ | ||
141 | a1.real += tmp3; \ | ||
142 | a1.imag += tmp4; \ | ||
143 | } while (0) | ||
144 | |||
145 | /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ | ||
146 | |||
147 | #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ | ||
148 | tmp1 = a2.real + a3.real; \ | ||
149 | tmp2 = a2.imag + a3.imag; \ | ||
150 | tmp3 = a2.imag - a3.imag; \ | ||
151 | tmp4 = a3.real - a2.real; \ | ||
152 | a2.real = a0.real - tmp1; \ | ||
153 | a2.imag = a0.imag - tmp2; \ | ||
154 | a3.real = a1.real - tmp3; \ | ||
155 | a3.imag = a1.imag - tmp4; \ | ||
156 | a0.real += tmp1; \ | ||
157 | a0.imag += tmp2; \ | ||
158 | a1.real += tmp3; \ | ||
159 | a1.imag += tmp4; \ | ||
160 | } while (0) | ||
161 | |||
162 | /* split-radix ifft butterfly, specialized for wr=wi */ | ||
163 | |||
164 | #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ | ||
165 | tmp5 = MUL (a2.real + a2.imag, w); \ | ||
166 | tmp6 = MUL (a2.imag - a2.real, w); \ | ||
167 | tmp7 = MUL (a3.real - a3.imag, w); \ | ||
168 | tmp8 = MUL (a3.imag + a3.real, w); \ | ||
169 | tmp1 = tmp5 + tmp7; \ | ||
170 | tmp2 = tmp6 + tmp8; \ | ||
171 | tmp3 = tmp6 - tmp8; \ | ||
172 | tmp4 = tmp7 - tmp5; \ | ||
173 | a2.real = a0.real - tmp1; \ | ||
174 | a2.imag = a0.imag - tmp2; \ | ||
175 | a3.real = a1.real - tmp3; \ | ||
176 | a3.imag = a1.imag - tmp4; \ | ||
177 | a0.real += tmp1; \ | ||
178 | a0.imag += tmp2; \ | ||
179 | a1.real += tmp3; \ | ||
180 | a1.imag += tmp4; \ | ||
181 | } while (0) | ||
182 | |||
183 | static inline void ifft8 (complex_t * buf) | ||
184 | { | ||
185 | sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | ||
186 | |||
187 | ifft4 (buf); | ||
188 | ifft2 (buf + 4); | ||
189 | ifft2 (buf + 6); | ||
190 | BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); | ||
191 | BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); | ||
192 | } | ||
193 | |||
194 | static void ifft_pass (complex_t * buf, sample_t * weight, int n) | ||
195 | { | ||
196 | complex_t * buf1; | ||
197 | complex_t * buf2; | ||
198 | complex_t * buf3; | ||
199 | sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | ||
200 | int i; | ||
201 | |||
202 | buf++; | ||
203 | buf1 = buf + n; | ||
204 | buf2 = buf + 2 * n; | ||
205 | buf3 = buf + 3 * n; | ||
206 | |||
207 | BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); | ||
208 | |||
209 | i = n - 1; | ||
210 | |||
211 | do { | ||
212 | BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], | ||
213 | weight[0], weight[2*i-n]); | ||
214 | buf++; | ||
215 | buf1++; | ||
216 | buf2++; | ||
217 | buf3++; | ||
218 | weight++; | ||
219 | } while (--i); | ||
220 | } | ||
221 | |||
222 | static void ifft16 (complex_t * buf) | ||
223 | { | ||
224 | ifft8 (buf); | ||
225 | ifft4 (buf + 8); | ||
226 | ifft4 (buf + 12); | ||
227 | ifft_pass (buf, roots16, 4); | ||
228 | } | ||
229 | |||
230 | static void ifft32 (complex_t * buf) | ||
231 | { | ||
232 | ifft16 (buf); | ||
233 | ifft8 (buf + 16); | ||
234 | ifft8 (buf + 24); | ||
235 | ifft_pass (buf, roots32, 8); | ||
236 | } | ||
237 | |||
238 | static void ifft64_c (complex_t * buf) | ||
239 | { | ||
240 | ifft32 (buf); | ||
241 | ifft16 (buf + 32); | ||
242 | ifft16 (buf + 48); | ||
243 | ifft_pass (buf, roots64, 16); | ||
244 | } | ||
245 | |||
246 | static void ifft128_c (complex_t * buf) | ||
247 | { | ||
248 | ifft32 (buf); | ||
249 | ifft16 (buf + 32); | ||
250 | ifft16 (buf + 48); | ||
251 | ifft_pass (buf, roots64, 16); | ||
252 | |||
253 | ifft32 (buf + 64); | ||
254 | ifft32 (buf + 96); | ||
255 | ifft_pass (buf, roots128, 32); | ||
256 | } | ||
257 | |||
258 | void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | ||
259 | { | ||
260 | int i, k; | ||
261 | sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | ||
262 | const sample_t * window = a52_imdct_window; | ||
263 | complex_t buf[128]; | ||
264 | |||
265 | for (i = 0; i < 128; i++) { | ||
266 | k = fftorder[i]; | ||
267 | t_r = pre1[i].real; | ||
268 | t_i = pre1[i].imag; | ||
269 | BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]); | ||
270 | } | ||
271 | |||
272 | ifft128 (buf); | ||
273 | |||
274 | /* Post IFFT complex multiply plus IFFT complex conjugate*/ | ||
275 | /* Window and convert to real valued signal */ | ||
276 | for (i = 0; i < 64; i++) { | ||
277 | /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | ||
278 | t_r = post1[i].real; | ||
279 | t_i = post1[i].imag; | ||
280 | BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); | ||
281 | BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real); | ||
282 | |||
283 | w_1 = window[2*i]; | ||
284 | w_2 = window[255-2*i]; | ||
285 | BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); | ||
286 | delay[2*i] = a_i; | ||
287 | |||
288 | w_1 = window[2*i+1]; | ||
289 | w_2 = window[254-2*i]; | ||
290 | BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]); | ||
291 | delay[2*i+1] = b_i; | ||
292 | } | ||
293 | } | ||
294 | |||
295 | void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias) | ||
296 | { | ||
297 | int i, k; | ||
298 | sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; | ||
299 | const sample_t * window = a52_imdct_window; | ||
300 | complex_t buf1[64], buf2[64]; | ||
301 | |||
302 | /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | ||
303 | for (i = 0; i < 64; i++) { | ||
304 | k = fftorder[i]; | ||
305 | t_r = pre2[i].real; | ||
306 | t_i = pre2[i].imag; | ||
307 | BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]); | ||
308 | BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]); | ||
309 | } | ||
310 | |||
311 | ifft64 (buf1); | ||
312 | ifft64 (buf2); | ||
313 | |||
314 | /* Post IFFT complex multiply */ | ||
315 | /* Window and convert to real valued signal */ | ||
316 | for (i = 0; i < 32; i++) { | ||
317 | /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ | ||
318 | t_r = post2[i].real; | ||
319 | t_i = post2[i].imag; | ||
320 | BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); | ||
321 | BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real); | ||
322 | BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); | ||
323 | BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real); | ||
324 | |||
325 | w_1 = window[2*i]; | ||
326 | w_2 = window[255-2*i]; | ||
327 | BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); | ||
328 | delay[2*i] = c_i; | ||
329 | |||
330 | w_1 = window[128+2*i]; | ||
331 | w_2 = window[127-2*i]; | ||
332 | BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]); | ||
333 | delay[127-2*i] = c_r; | ||
334 | |||
335 | w_1 = window[2*i+1]; | ||
336 | w_2 = window[254-2*i]; | ||
337 | BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); | ||
338 | delay[2*i+1] = d_r; | ||
339 | |||
340 | w_1 = window[129+2*i]; | ||
341 | w_2 = window[126-2*i]; | ||
342 | BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]); | ||
343 | delay[126-2*i] = d_i; | ||
344 | } | ||
345 | } | ||
346 | |||
347 | static double besselI0 (double x) | ||
348 | { | ||
349 | double bessel = 1; | ||
350 | int i = 100; | ||
351 | |||
352 | do | ||
353 | bessel = bessel * x / (i * i) + 1; | ||
354 | while (--i); | ||
355 | return bessel; | ||
356 | } | ||
357 | |||
358 | void a52_imdct_init (uint32_t mm_accel) | ||
359 | { | ||
360 | int i, k; | ||
361 | double sum; | ||
362 | double local_imdct_window[256]; | ||
363 | |||
364 | /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | ||
365 | sum = 0; | ||
366 | for (i = 0; i < 256; i++) { | ||
367 | sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); | ||
368 | local_imdct_window[i] = sum; | ||
369 | } | ||
370 | sum++; | ||
371 | for (i = 0; i < 256; i++) | ||
372 | a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); | ||
373 | |||
374 | for (i = 0; i < 3; i++) | ||
375 | roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); | ||
376 | |||
377 | for (i = 0; i < 7; i++) | ||
378 | roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); | ||
379 | |||
380 | for (i = 0; i < 15; i++) | ||
381 | roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); | ||
382 | |||
383 | for (i = 0; i < 31; i++) | ||
384 | roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); | ||
385 | |||
386 | for (i = 0; i < 64; i++) { | ||
387 | k = fftorder[i] / 2 + 64; | ||
388 | pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25))); | ||
389 | pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25))); | ||
390 | } | ||
391 | |||
392 | for (i = 64; i < 128; i++) { | ||
393 | k = fftorder[i] / 2 + 64; | ||
394 | pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25))); | ||
395 | pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25))); | ||
396 | } | ||
397 | |||
398 | for (i = 0; i < 64; i++) { | ||
399 | post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); | ||
400 | post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); | ||
401 | } | ||
402 | |||
403 | for (i = 0; i < 64; i++) { | ||
404 | k = fftorder[i] / 4; | ||
405 | pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25))); | ||
406 | pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25))); | ||
407 | } | ||
408 | |||
409 | for (i = 0; i < 32; i++) { | ||
410 | post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); | ||
411 | post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); | ||
412 | } | ||
413 | |||
414 | #ifdef LIBA52_DJBFFT | ||
415 | if (mm_accel & MM_ACCEL_DJBFFT) { | ||
416 | #ifndef LIBA52_DOUBLE | ||
417 | ifft128 = (void (*) (complex_t *)) fftc4_un128; | ||
418 | ifft64 = (void (*) (complex_t *)) fftc4_un64; | ||
419 | #else | ||
420 | ifft128 = (void (*) (complex_t *)) fftc8_un128; | ||
421 | ifft64 = (void (*) (complex_t *)) fftc8_un64; | ||
422 | #endif | ||
423 | } else | ||
424 | #endif | ||
425 | { | ||
426 | ifft128 = ifft128_c; | ||
427 | ifft64 = ifft64_c; | ||
428 | } | ||
429 | } | ||