summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libfaad/filtbank.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libfaad/filtbank.c')
-rw-r--r--lib/rbcodec/codecs/libfaad/filtbank.c482
1 files changed, 482 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libfaad/filtbank.c b/lib/rbcodec/codecs/libfaad/filtbank.c
new file mode 100644
index 0000000000..fd7a4dc91f
--- /dev/null
+++ b/lib/rbcodec/codecs/libfaad/filtbank.c
@@ -0,0 +1,482 @@
1/*
2** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
4**
5** This program is free software; you can redistribute it and/or modify
6** it under the terms of the GNU General Public License as published by
7** the Free Software Foundation; either version 2 of the License, or
8** (at your option) any later version.
9**
10** This program is distributed in the hope that it will be useful,
11** but WITHOUT ANY WARRANTY; without even the implied warranty of
12** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13** GNU General Public License for more details.
14**
15** You should have received a copy of the GNU General Public License
16** along with this program; if not, write to the Free Software
17** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18**
19** Any non-GPL usage of this software or parts of this software is strictly
20** forbidden.
21**
22** Commercial non-GPL licensing of this software is possible.
23** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
24**
25** $Id$
26**/
27
28#include "common.h"
29#include "structs.h"
30
31#include <stdlib.h>
32#include <string.h>
33#ifdef _WIN32_WCE
34#define assert(x)
35#else
36#include <assert.h>
37#endif
38
39#include "filtbank.h"
40#include "decoder.h"
41#include "syntax.h"
42#include "kbd_win.h"
43#include "sine_win.h"
44
45
46/* static variables */
47static real_t transf_buf[2*FRAME_LEN] IBSS_ATTR MEM_ALIGN_ATTR;
48#ifdef LTP_DEC
49static real_t windowed_buf[2*FRAME_LEN] MEM_ALIGN_ATTR = {0};
50#endif
51
52
53/*Windowing functions borrowed from libwmai*/
54#ifdef CPU_ARM
55static inline
56void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
57{
58 /* Block sizes are always power of two */
59 asm volatile (
60 "0:"
61 "ldmia %[d]!, {r0, r1};"
62 "ldmia %[w]!, {r4, r5};"
63 /* consume the first data and window value so we can use those
64 * registers again */
65 "smull r8, r9, r0, r4;"
66 "ldmia %[src2]!, {r0, r4};"
67 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
68 "smull r8, r9, r1, r5;"
69 "add r1, r4, r9, lsl #1;"
70 "stmia %[dst]!, {r0, r1};"
71 "subs %[n], %[n], #2;"
72 "bne 0b;"
73 : [d] "+r" (src0), [w] "+r" (src1), [src2] "+r" (src2), [dst] "+r" (dst), [n] "+r" (len)
74 :
75 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
76}
77static inline
78void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
79 int len)
80{
81 /* Block sizes are always power of two */
82 asm volatile (
83 "add %[s1], %[s1], %[n], lsl #2;"
84 "0:"
85 "ldmia %[s0]!, {r0, r1};"
86 "ldmdb %[s1]!, {r4, r5};"
87 "smull r8, r9, r0, r5;"
88 "mov r0, r9, lsl #1;"
89 "smull r8, r9, r1, r4;"
90 "mov r1, r9, lsl #1;"
91 "stmia %[dst]!, {r0, r1};"
92 "subs %[n], %[n], #2;"
93 "bne 0b;"
94 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
95 :
96 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
97}
98
99#elif defined(CPU_COLDFIRE)
100static inline
101void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len)
102{
103 /* Block sizes are always power of two. Smallest block is always way bigger
104 * than four too.*/
105 asm volatile (
106 "0:"
107 "movem.l (%[src0]), %%d0-%%d3;"
108 "movem.l (%[src1]), %%d4-%%d5/%%a0-%%a1;"
109 "mac.l %%d0, %%d4, %%acc0;"
110 "mac.l %%d1, %%d5, %%acc1;"
111 "mac.l %%d2, %%a0, %%acc2;"
112 "mac.l %%d3, %%a1, %%acc3;"
113 "lea.l (16, %[src0]), %[src0];"
114 "lea.l (16, %[src1]), %[src1];"
115 "movclr.l %%acc0, %%d0;"
116 "movclr.l %%acc1, %%d1;"
117 "movclr.l %%acc2, %%d2;"
118 "movclr.l %%acc3, %%d3;"
119 "movem.l (%[src2]), %%d4-%%d5/%%a0-%%a1;"
120 "lea.l (16, %[src2]), %[src2];"
121 "add.l %%d4, %%d0;"
122 "add.l %%d5, %%d1;"
123 "add.l %%a0, %%d2;"
124 "add.l %%a1, %%d3;"
125 "movem.l %%d0-%%d3, (%[dst]);"
126 "lea.l (16, %[dst]), %[dst];"
127 "subq.l #4, %[n];"
128 "jne 0b;"
129 : [src0] "+a" (src0), [src1] "+a" (src1), [src2] "+a" (src2), [dst] "+a" (dst), [n] "+d" (len)
130 :
131 : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
132}
133
134static inline
135void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1,
136 int len)
137{
138 /* Block sizes are always power of two. Smallest block is always way bigger
139 * than four too.*/
140 asm volatile (
141 "lea.l (-16, %[s1], %[n]*4), %[s1];"
142 "0:"
143 "movem.l (%[s0]), %%d0-%%d3;"
144 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
145 "mac.l %%d0, %%a1, %%acc0;"
146 "mac.l %%d1, %%a0, %%acc1;"
147 "mac.l %%d2, %%d5, %%acc2;"
148 "mac.l %%d3, %%d4, %%acc3;"
149 "lea.l (16, %[s0]), %[s0];"
150 "lea.l (-16, %[s1]), %[s1];"
151 "movclr.l %%acc0, %%d0;"
152 "movclr.l %%acc1, %%d1;"
153 "movclr.l %%acc2, %%d2;"
154 "movclr.l %%acc3, %%d3;"
155 "movem.l %%d0-%%d3, (%[dst]);"
156 "lea.l (16, %[dst]), %[dst];"
157 "subq.l #4, %[n];"
158 "jne 0b;"
159 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
160 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
161}
162
163#else
164static inline void vector_fmul_add_add(real_t *dst, const real_t *src0, const real_t *src1, const real_t *src2, int len){
165 int i;
166 for(i=0; i<len; i++)
167 dst[i] = MUL_F(src0[i], src1[i]) + src2[i];
168}
169
170static inline void vector_fmul_reverse(real_t *dst, const real_t *src0, const real_t *src1, int len){
171 int i;
172 src1 += len-1;
173 for(i=0; i<len; i++)
174 dst[i] = MUL_F(src0[i], src1[-i]);
175}
176#endif
177
178#ifdef LTP_DEC
179static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
180{
181 mdct_info *mdct = NULL;
182
183 switch (len)
184 {
185 case 2048:
186 case 1920:
187 mdct = fb->mdct2048;
188 break;
189 case 256:
190 case 240:
191 mdct = fb->mdct256;
192 break;
193#ifdef LD_DEC
194 case 1024:
195 case 960:
196 mdct = fb->mdct1024;
197 break;
198#endif
199 }
200
201 faad_mdct(mdct, in_data, out_data);
202}
203#endif
204
205void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
206 uint8_t window_shape_prev, real_t *freq_in,
207 real_t *time_out, real_t *overlap,
208 uint8_t object_type, uint16_t frame_len)
209{
210 int32_t i, idx0, idx1;
211 real_t win0, win1, win2;
212
213 const real_t *window_long = NULL;
214 const real_t *window_long_prev = NULL;
215 const real_t *window_short = NULL;
216 const real_t *window_short_prev = NULL;
217
218 int32_t nlong = frame_len;
219 int32_t nshort = frame_len/8;
220 int32_t nflat_ls = (nlong-nshort)/2;
221
222#ifdef PROFILE
223 int64_t count = faad_get_ts();
224#endif
225
226 memset(transf_buf,0,sizeof(transf_buf));
227 /* select windows of current frame and previous frame (Sine or KBD) */
228#ifdef LD_DEC
229 if (object_type == LD)
230 {
231 window_long = fb->ld_window[window_shape];
232 window_long_prev = fb->ld_window[window_shape_prev];
233 } else {
234#else
235 (void) object_type;
236#endif
237
238 /* AAC uses two different window shapes depending on spectal features */
239 if (window_shape == 0) {
240 window_long = sine_long_1024;
241 window_short = sine_short_128;
242 } else {
243 window_long = kbd_long_1024;
244 window_short = kbd_short_128;
245 }
246
247 if (window_shape_prev == 0) {
248 window_long_prev = sine_long_1024;
249 window_short_prev = sine_short_128;
250 } else {
251 window_long_prev = kbd_long_1024;
252 window_short_prev = kbd_short_128;
253 }
254
255#ifdef LD_DEC
256 }
257#endif
258
259#if 0
260 for (i = 0; i < 1024; i++)
261 {
262 printf("%d\n", freq_in[i]);
263 }
264#endif
265
266#if 0
267 printf("%d %d\n", window_sequence, window_shape);
268#endif
269 switch (window_sequence)
270 {
271 case ONLY_LONG_SEQUENCE:
272 /* perform iMDCT */
273 ff_imdct_calc(11, transf_buf, freq_in);
274
275 /* add second half output of previous frame to windowed output of current frame */
276 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong);
277
278 /* window the second half and save as overlap for next frame */
279 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
280
281 break;
282
283 case LONG_START_SEQUENCE:
284 /* perform iMDCT */
285 ff_imdct_calc(11, transf_buf, freq_in);
286
287 /* add second half output of previous frame to windowed output of current frame */
288 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong);
289
290 /* window the second half and save as overlap for next frame */
291 /* construct second half window using padding with 1's and 0's */
292
293 memcpy(overlap, transf_buf+nlong, nflat_ls*sizeof(real_t));
294
295 vector_fmul_reverse(overlap+nflat_ls, transf_buf+nlong+nflat_ls, window_short, nshort);
296
297 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
298 break;
299
300 case EIGHT_SHORT_SEQUENCE:
301 /* this could be assemblerized too, but this case is extremely uncommon */
302
303 /* perform iMDCT for each short block */
304 idx0 = 0; ff_imdct_calc(8, transf_buf , freq_in );
305 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
306 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
307 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
308 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
309 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
310 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
311 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
312
313 /* Add second half output of previous frame to windowed output of current
314 * frame */
315 /* Step 1: copy */
316 memcpy(time_out, overlap, nflat_ls*sizeof(real_t));
317 /* Step 2: First window half, first half of nshort */
318 for (i = 0; i < nshort/2; i++) {
319 win0 = window_short[nshort-1-i];
320 win1 = window_short[i];
321 win2 = window_short_prev[i];
322 idx0 = nflat_ls + i;
323 idx1 = i;
324 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win2); idx0 += nshort; idx1 += (nshort<<1);
325 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
326 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
327 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
328 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1);
329 }
330 /* Step 3: First window half, second half of nshort */
331 for (; i < nshort; i++) {
332 win0 = window_short[nshort-1-i];
333 win1 = window_short[i];
334 idx0 = nflat_ls + i;
335 idx1 = i;
336 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
337 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
338 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
339 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1);
340 }
341
342 /* Window the second half and save as overlap for next frame */
343 /* Step 1: Second window half, first half of nshort */
344 for (i = 0; i < nshort/2; i++) {
345 win0 = window_short[nshort-1-i];
346 win1 = window_short[i];
347 idx0 = nflat_ls + 5*nshort + i - nlong;
348 idx1 = nshort*10 + i;
349 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
350 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
351 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
352 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0);
353 }
354 /* Step 2: Second window half, second half of nshort */
355 for (; i < nshort; i++) {
356 win0 = window_short[nshort-1-i];
357 win1 = window_short[i];
358 idx0 = nflat_ls + 4*nshort + i - nlong;
359 idx1 = nshort*8 + i;
360 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
361 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
362 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
363 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
364 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0);
365 }
366 /* Step 3: Set to zero */
367 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
368
369 break;
370
371 case LONG_STOP_SEQUENCE:
372 /* perform iMDCT */
373 ff_imdct_calc(11, transf_buf, freq_in);
374
375 /* add second half output of previous frame to windowed output of current frame */
376 /* construct first half window using padding with 1's and 0's */
377 memcpy(time_out, overlap, nflat_ls*sizeof(real_t));
378
379 vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort);
380
381 /* nflat_ls can be divided by 2. */
382 idx0 = nflat_ls + nshort;
383 for (i = 0; i < nflat_ls; i+=2) {
384 time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++;
385 time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++;
386 }
387
388 /* window the second half and save as overlap for next frame */
389 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);
390 break;
391 }
392
393#if 0
394 for (i = 0; i < 1024; i++)
395 {
396 printf("%d\n", time_out[i]);
397 //printf("0x%.8X\n", time_out[i]);
398 }
399#endif
400
401
402#ifdef PROFILE
403 count = faad_get_ts() - count;
404 fb->cycles += count;
405#endif
406}
407
408
409#ifdef LTP_DEC
410/* only works for LTP -> no overlapping, no short blocks */
411void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
412 uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct,
413 uint8_t object_type, uint16_t frame_len)
414{
415 int16_t i;
416
417 const real_t *window_long = NULL;
418 const real_t *window_long_prev = NULL;
419 const real_t *window_short = NULL;
420 const real_t *window_short_prev = NULL;
421
422 uint16_t nlong = frame_len;
423 uint16_t nshort = frame_len/8;
424 uint16_t nflat_ls = (nlong-nshort)/2;
425
426 //assert(window_sequence != EIGHT_SHORT_SEQUENCE);
427
428 memset(windowed_buf,0,sizeof(windowed_buf));
429#ifdef LD_DEC
430 if (object_type == LD)
431 {
432 window_long = fb->ld_window[window_shape];
433 window_long_prev = fb->ld_window[window_shape_prev];
434 } else {
435#else
436 (void) object_type;
437#endif
438 window_long = fb->long_window[window_shape];
439 window_long_prev = fb->long_window[window_shape_prev];
440 window_short = fb->short_window[window_shape];
441 window_short_prev = fb->short_window[window_shape_prev];
442#ifdef LD_DEC
443 }
444#endif
445
446 switch(window_sequence)
447 {
448 case ONLY_LONG_SEQUENCE:
449 for (i = nlong-1; i >= 0; i--)
450 {
451 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
452 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
453 }
454 mdct(fb, windowed_buf, out_mdct, 2*nlong);
455 break;
456
457 case LONG_START_SEQUENCE:
458 for (i = 0; i < nlong; i++)
459 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]);
460 for (i = 0; i < nflat_ls; i++)
461 windowed_buf[i+nlong] = in_data[i+nlong];
462 for (i = 0; i < nshort; i++)
463 windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]);
464 for (i = 0; i < nflat_ls; i++)
465 windowed_buf[i+nlong+nflat_ls+nshort] = 0;
466 mdct(fb, windowed_buf, out_mdct, 2*nlong);
467 break;
468
469 case LONG_STOP_SEQUENCE:
470 for (i = 0; i < nflat_ls; i++)
471 windowed_buf[i] = 0;
472 for (i = 0; i < nshort; i++)
473 windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]);
474 for (i = 0; i < nflat_ls; i++)
475 windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort];
476 for (i = 0; i < nlong; i++)
477 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]);
478 mdct(fb, windowed_buf, out_mdct, 2*nlong);
479 break;
480 }
481}
482#endif