diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-07-11 12:33:35 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-07-11 12:33:35 +0000 |
commit | 6cf66eb08ca2fee1c63147dae685afcb4d2046c4 (patch) | |
tree | d0de7b6d4ccb48a2a2c1def0d15e3c19458ef176 /apps | |
parent | f65ed0d3bc4fd116f3fa6dfb059bb38d4c1262d1 (diff) | |
download | rockbox-6cf66eb08ca2fee1c63147dae685afcb4d2046c4.tar.gz rockbox-6cf66eb08ca2fee1c63147dae685afcb4d2046c4.zip |
Refacture parts of libfaad filterbank. Faster windowing for 8-short-window sequences.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27380 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libfaad/filtbank.c | 126 |
1 files changed, 80 insertions, 46 deletions
diff --git a/apps/codecs/libfaad/filtbank.c b/apps/codecs/libfaad/filtbank.c index 41c8291d9a..0c1f6c03cf 100644 --- a/apps/codecs/libfaad/filtbank.c +++ b/apps/codecs/libfaad/filtbank.c | |||
@@ -203,18 +203,17 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
203 | real_t *time_out, real_t *overlap, | 203 | real_t *time_out, real_t *overlap, |
204 | uint8_t object_type, uint16_t frame_len) | 204 | uint8_t object_type, uint16_t frame_len) |
205 | { | 205 | { |
206 | int16_t i; | 206 | int32_t i, idx0, idx1; |
207 | 207 | real_t win0, win1, win2; | |
208 | const real_t *window_long = NULL; | 208 | |
209 | const real_t *window_long_prev = NULL; | 209 | const real_t *window_long = NULL; |
210 | const real_t *window_short = NULL; | 210 | const real_t *window_long_prev = NULL; |
211 | const real_t *window_short = NULL; | ||
211 | const real_t *window_short_prev = NULL; | 212 | const real_t *window_short_prev = NULL; |
212 | 213 | ||
213 | uint16_t nlong = frame_len; | 214 | int32_t nlong = frame_len; |
214 | uint16_t nshort = frame_len/8; | 215 | int32_t nshort = frame_len/8; |
215 | uint16_t trans = nshort/2; | 216 | int32_t nflat_ls = (nlong-nshort)/2; |
216 | |||
217 | uint16_t nflat_ls = (nlong-nshort)/2; | ||
218 | 217 | ||
219 | #ifdef PROFILE | 218 | #ifdef PROFILE |
220 | int64_t count = faad_get_ts(); | 219 | int64_t count = faad_get_ts(); |
@@ -232,8 +231,8 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
232 | (void) object_type; | 231 | (void) object_type; |
233 | #endif | 232 | #endif |
234 | 233 | ||
235 | /*AAC uses two different window shapes depending on spectal features*/ | 234 | /* AAC uses two different window shapes depending on spectal features */ |
236 | if(window_shape == 0){ | 235 | if (window_shape == 0) { |
237 | window_long = sine_long_1024; | 236 | window_long = sine_long_1024; |
238 | window_short = sine_short_128; | 237 | window_short = sine_short_128; |
239 | } else { | 238 | } else { |
@@ -241,7 +240,7 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
241 | window_short = kbd_short_128; | 240 | window_short = kbd_short_128; |
242 | } | 241 | } |
243 | 242 | ||
244 | if(window_shape_prev == 0){ | 243 | if (window_shape_prev == 0) { |
245 | window_long_prev = sine_long_1024; | 244 | window_long_prev = sine_long_1024; |
246 | window_short_prev = sine_short_128; | 245 | window_short_prev = sine_short_128; |
247 | } else { | 246 | } else { |
@@ -279,7 +278,7 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
279 | 278 | ||
280 | case LONG_START_SEQUENCE: | 279 | case LONG_START_SEQUENCE: |
281 | /* perform iMDCT */ | 280 | /* perform iMDCT */ |
282 | ff_imdct_calc(11, transf_buf, freq_in); | 281 | ff_imdct_calc(11, transf_buf, freq_in); |
283 | 282 | ||
284 | /* add second half output of previous frame to windowed output of current frame */ | 283 | /* add second half output of previous frame to windowed output of current frame */ |
285 | vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong); | 284 | vector_fmul_add_add(time_out, transf_buf, window_long_prev, overlap, nlong); |
@@ -295,41 +294,72 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
295 | break; | 294 | break; |
296 | 295 | ||
297 | case EIGHT_SHORT_SEQUENCE: | 296 | case EIGHT_SHORT_SEQUENCE: |
298 | /*this could be assemblerized too, but this case is extremely uncommon*/ | 297 | /* this could be assemblerized too, but this case is extremely uncommon */ |
299 | 298 | ||
300 | /* perform iMDCT for each short block */ | 299 | /* perform iMDCT for each short block */ |
301 | ff_imdct_calc(8, transf_buf+2*nshort*0, freq_in+0*nshort); | 300 | idx0 = 0; ff_imdct_calc(8, transf_buf , freq_in ); |
302 | ff_imdct_calc(8, transf_buf+2*nshort*1, freq_in+1*nshort); | 301 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
303 | ff_imdct_calc(8, transf_buf+2*nshort*2, freq_in+2*nshort); | 302 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
304 | ff_imdct_calc(8, transf_buf+2*nshort*3, freq_in+3*nshort); | 303 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
305 | ff_imdct_calc(8, transf_buf+2*nshort*4, freq_in+4*nshort); | 304 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
306 | ff_imdct_calc(8, transf_buf+2*nshort*5, freq_in+5*nshort); | 305 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
307 | ff_imdct_calc(8, transf_buf+2*nshort*6, freq_in+6*nshort); | 306 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
308 | ff_imdct_calc(8, transf_buf+2*nshort*7, freq_in+7*nshort); | 307 | idx0 += nshort; ff_imdct_calc(8, transf_buf + (idx0<<1), freq_in + idx0); |
309 | 308 | ||
310 | /* add second half output of previous frame to windowed output of current frame */ | 309 | /* Add second half output of previous frame to windowed output of current |
311 | for (i = 0; i < nflat_ls; i++) | 310 | * frame */ |
312 | time_out[i] = overlap[i]; | 311 | /* Step 1: copy */ |
313 | for(i = 0; i < nshort; i++) | 312 | memcpy(time_out, overlap, nflat_ls*sizeof(real_t)); |
314 | { | 313 | /* Step 2: First window half, first half of nshort */ |
315 | time_out[nflat_ls+ i] = overlap[nflat_ls+ i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]); | 314 | for (i = 0; i < nshort/2; i++) { |
316 | time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]); | 315 | win0 = window_short[nshort-1-i]; |
317 | time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]); | 316 | win1 = window_short[i]; |
318 | time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]); | 317 | win2 = window_short_prev[i]; |
319 | if (i < trans) | 318 | idx0 = nflat_ls + i; |
320 | time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); | 319 | idx1 = i; |
320 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win2); idx0 += nshort; idx1 += (nshort<<1); | ||
321 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
322 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
323 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
324 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); | ||
325 | } | ||
326 | /* Step 3: First window half, second half of nshort */ | ||
327 | for (; i < nshort; i++) { | ||
328 | win0 = window_short[nshort-1-i]; | ||
329 | win1 = window_short[i]; | ||
330 | idx0 = nflat_ls + i; | ||
331 | idx1 = i; | ||
332 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
333 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
334 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
335 | time_out[idx0] = overlap[idx0] + MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); | ||
321 | } | 336 | } |
322 | 337 | ||
323 | /* window the second half and save as overlap for next frame */ | 338 | /* Window the second half and save as overlap for next frame */ |
324 | for(i = 0; i < nshort; i++) | 339 | /* Step 1: Second window half, first half of nshort */ |
325 | { | 340 | for (i = 0; i < nshort/2; i++) { |
326 | if (i >= trans) | 341 | win0 = window_short[nshort-1-i]; |
327 | overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); | 342 | win1 = window_short[i]; |
328 | overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]); | 343 | idx0 = nflat_ls + 5*nshort + i - nlong; |
329 | overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]); | 344 | idx1 = nshort*10 + i; |
330 | overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]); | 345 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); |
331 | overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]); | 346 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); |
347 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
348 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0); | ||
349 | } | ||
350 | /* Step 2: Second window half, second half of nshort */ | ||
351 | for (; i < nshort; i++) { | ||
352 | win0 = window_short[nshort-1-i]; | ||
353 | win1 = window_short[i]; | ||
354 | idx0 = nflat_ls + 4*nshort + i - nlong; | ||
355 | idx1 = nshort*8 + i; | ||
356 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
357 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
358 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
359 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0) + MUL_F(transf_buf[idx1], win1); idx0 += nshort; idx1 += (nshort<<1); | ||
360 | overlap[idx0] = MUL_F(transf_buf[idx1-nshort], win0); | ||
332 | } | 361 | } |
362 | /* Step 3: Set to zero */ | ||
333 | memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t)); | 363 | memset(overlap+nflat_ls+nshort, 0, nflat_ls*sizeof(real_t)); |
334 | 364 | ||
335 | break; | 365 | break; |
@@ -344,8 +374,12 @@ void ifilter_bank(uint8_t window_sequence, uint8_t window_shape, | |||
344 | 374 | ||
345 | vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort); | 375 | vector_fmul_add_add(time_out+nflat_ls, transf_buf+nflat_ls, window_short_prev, overlap+nflat_ls, nshort); |
346 | 376 | ||
347 | for (i = 0; i < nflat_ls; i++) | 377 | /* nflat_ls can be divided by 2. */ |
348 | time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i]; | 378 | idx0 = nflat_ls + nshort; |
379 | for (i = 0; i < nflat_ls; i+=2) { | ||
380 | time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++; | ||
381 | time_out[idx0] = overlap[idx0] + transf_buf[idx0]; idx0++; | ||
382 | } | ||
349 | 383 | ||
350 | /* window the second half and save as overlap for next frame */ | 384 | /* window the second half and save as overlap for next frame */ |
351 | vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong); | 385 | vector_fmul_reverse(overlap, transf_buf+nlong, window_long, nlong); |