summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-11 12:33:35 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-11 12:33:35 +0000
commit6cf66eb08ca2fee1c63147dae685afcb4d2046c4 (patch)
treed0de7b6d4ccb48a2a2c1def0d15e3c19458ef176
parentf65ed0d3bc4fd116f3fa6dfb059bb38d4c1262d1 (diff)
downloadrockbox-6cf66eb08ca2fee1c63147dae685afcb4d2046c4.tar.gz
rockbox-6cf66eb08ca2fee1c63147dae685afcb4d2046c4.zip
Refacture parts of libfaad filterbank. Faster windowing for 8-short-window sequences.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27380 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libfaad/filtbank.c126
1 files changed, 80 insertions, 46 deletions
diff --git a/apps/codecs/libfaad/filtbank.c b/apps/codecs/libfaad/filtbank.c
index 41c8291d9a..0c1f6c03cf 100644
--- a/apps/codecs/libfaad/filtbank.c
+++ b/apps/codecs/libfaad/filtbank.c
@@ -203,18 +203,17 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
203 real_t *time_out, real_t *overlap, 203 real_t *time_out, real_t *overlap,
204 uint8_t object_type, uint16_t frame_len) 204 uint8_t object_type, uint16_t frame_len)
205{ 205{
206 int16_t i; 206 int32_t i, idx0, idx1;
207 207 real_t win0, win1, win2;
208 const real_t *window_long = NULL; 208
209 const real_t *window_long_prev = NULL; 209 const real_t *window_long = NULL;
210 const real_t *window_short = NULL; 210 const real_t *window_long_prev = NULL;
211 const real_t *window_short = NULL;
211 const real_t *window_short_prev = NULL; 212 const real_t *window_short_prev = NULL;
212 213
213 uint16_t nlong = frame_len; 214 int32_t nlong = frame_len;
214 uint16_t nshort = frame_len/8; 215 int32_t nshort = frame_len/8;
215 uint16_t trans = nshort/2; 216 int32_t nflat_ls = (nlong-nshort)/2;
216
217 uint16_t nflat_ls = (nlong-nshort)/2;
218 217
219#ifdef PROFILE 218#ifdef PROFILE
220 int64_t count = faad_get_ts(); 219 int64_t count = faad_get_ts();
@@ -232,8 +231,8 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
232 (void) object_type; 231 (void) object_type;
233#endif 232#endif
234 233
235 /*AAC uses two different window shapes depending on spectal features*/ 234 /* AAC uses two different window shapes depending on spectal features */
236 if(window_shape == 0){ 235 if (window_shape == 0) {
237 window_long = sine_long_1024; 236 window_long = sine_long_1024;
238 window_short = sine_short_128; 237 window_short = sine_short_128;
239 } else { 238 } else {
@@ -241,7 +240,7 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
241 window_short = kbd_short_128; 240 window_short = kbd_short_128;
242 } 241 }
243 242
244 if(window_shape_prev == 0){ 243 if (window_shape_prev == 0) {
245 window_long_prev = sine_long_1024; 244 window_long_prev = sine_long_1024;
246 window_short_prev = sine_short_128; 245 window_short_prev = sine_short_128;
247 } else { 246 } else {
@@ -279,7 +278,7 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
279 278
280 case LONG_START_SEQUENCE: 279 case LONG_START_SEQUENCE:
281 /* perform iMDCT */ 280 /* perform iMDCT */
282 ff_imdct_calc(11, transf_buf, freq_in); 281 ff_imdct_calc(11, transf_buf, freq_in);
283 282
284 /* add second half output of previous frame to windowed output of current frame */ 283 /* add second half output of previous frame to windowed output of current frame */
285 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong); 284 vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong);
@@ -295,41 +294,72 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
295 break; 294 break;
296 295
297 case EIGHT_SHORT_SEQUENCE: 296 case EIGHT_SHORT_SEQUENCE:
298 /*this could be assemblerized too, but this case is extremely uncommon*/ 297 /* this could be assemblerized too, but this case is extremely uncommon */
299 298
300 /* perform iMDCT for each short block */ 299 /* perform iMDCT for each short block */
301 ff_imdct_calc(8, transf_buf+2*nshort*0, freq_in+0*nshort); 300 idx0 = 0; ff_imdct_calc(8, transf_buf , freq_in );
302 ff_imdct_calc(8, transf_buf+2*nshort*1, freq_in+1*nshort); 301 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
303 ff_imdct_calc(8, transf_buf+2*nshort*2, freq_in+2*nshort); 302 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
304 ff_imdct_calc(8, transf_buf+2*nshort*3, freq_in+3*nshort); 303 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
305 ff_imdct_calc(8, transf_buf+2*nshort*4, freq_in+4*nshort); 304 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
306 ff_imdct_calc(8, transf_buf+2*nshort*5, freq_in+5*nshort); 305 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
307 ff_imdct_calc(8, transf_buf+2*nshort*6, freq_in+6*nshort); 306 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
308 ff_imdct_calc(8, transf_buf+2*nshort*7, freq_in+7*nshort); 307 idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0);
309 308
310 /* add second half output of previous frame to windowed output of current frame */ 309 /* Add second half output of previous frame to windowed output of current
311 for (i = 0; i < nflat_ls; i++) 310 * frame */
312 time_out[i] = overlap[i]; 311 /* Step 1: copy */
313 for(i = 0; i < nshort; i++) 312 memcpy(time_out, overlap, nflat_ls*sizeof(real_t));
314 { 313 /* Step 2: First window half, first half of nshort */
315 time_out[nflat_ls+ i] = overlap[nflat_ls+ i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]); 314 for (i = 0; i < nshort/2; i++) {
316 time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]); 315 win0 = window_short[nshort-1-i];
317 time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]); 316 win1 = window_short[i];
318 time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]); 317 win2 = window_short_prev[i];
319 if (i < trans) 318 idx0 = nflat_ls + i;
320 time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); 319 idx1 = i;
320 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win2); idx0 += nshort; idx1 += (nshort<<1);
321 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
322 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
323 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
324 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1);
325 }
326 /* Step 3: First window half, second half of nshort */
327 for (; i < nshort; i++) {
328 win0 = window_short[nshort-1-i];
329 win1 = window_short[i];
330 idx0 = nflat_ls + i;
331 idx1 = i;
332 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
333 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
334 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
335 time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1);
321 } 336 }
322 337
323 /* window the second half and save as overlap for next frame */ 338 /* Window the second half and save as overlap for next frame */
324 for(i = 0; i < nshort; i++) 339 /* Step 1: Second window half, first half of nshort */
325 { 340 for (i = 0; i < nshort/2; i++) {
326 if (i >= trans) 341 win0 = window_short[nshort-1-i];
327 overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); 342 win1 = window_short[i];
328 overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]); 343 idx0 = nflat_ls + 5*nshort + i - nlong;
329 overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]); 344 idx1 = nshort*10 + i;
330 overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]); 345 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
331 overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]); 346 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
347 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
348 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0);
349 }
350 /* Step 2: Second window half, second half of nshort */
351 for (; i < nshort; i++) {
352 win0 = window_short[nshort-1-i];
353 win1 = window_short[i];
354 idx0 = nflat_ls + 4*nshort + i - nlong;
355 idx1 = nshort*8 + i;
356 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
357 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
358 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
359 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1);
360 overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0);
332 } 361 }
362 /* Step 3: Set to zero */
333 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t)); 363 memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t));
334 364
335 break; 365 break;
@@ -344,8 +374,12 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape,
344 374
345 vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort); 375 vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort);
346 376
347 for (i = 0; i < nflat_ls; i++) 377 /* nflat_ls can be divided by 2. */
348 time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i]; 378 idx0 = nflat_ls + nshort;
379 for (i = 0; i < nflat_ls; i+=2) {
380 time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++;
381 time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++;
382 }
349 383
350 /* window the second half and save as overlap for next frame */ 384 /* window the second half and save as overlap for next frame */
351 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong); 385 vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong);