diff options
author | Thom Johansen <thomj@rockbox.org> | 2006-02-20 20:40:29 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2006-02-20 20:40:29 +0000 |
commit | 831cfb8e44c7690d77ddae03e680cf01881bed5b (patch) | |
tree | cc667465ec7c1f257034258642b4139647690315 /apps | |
parent | ec93f4685cf55b9990e37fd84997be750f4eaef8 (diff) | |
download | rockbox-831cfb8e44c7690d77ddae03e680cf01881bed5b.tar.gz rockbox-831cfb8e44c7690d77ddae03e680cf01881bed5b.zip |
Add ASM optimised subband filtering loop for ARM targets. Restructure
other loops a little bit.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8754 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libmusepack/synth_filter.c | 71 |
1 files changed, 60 insertions, 11 deletions
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c index a2127f01c5..c03355b01b 100644 --- a/apps/codecs/libmusepack/synth_filter.c +++ b/apps/codecs/libmusepack/synth_filter.c | |||
@@ -342,36 +342,85 @@ static void Synthese_Filter_float_internal(MPC_SAMPLE_FORMAT * OutData,MPC_SAMPL | |||
342 | 342 | ||
343 | 343 | ||
344 | 344 | ||
345 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | ||
345 | for ( k = 0; k < 32; k++, D += 16, V++ ) { | 346 | for ( k = 0; k < 32; k++, D += 16, V++ ) { |
346 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | ||
347 | asm volatile ( | 347 | asm volatile ( |
348 | "movem.l (%[D]), %%d0-%%d3\n\t" | 348 | "movem.l (%[D]), %%d0-%%d3 \n\t" |
349 | "move.l (%[V]), %%a5\n\t" | 349 | "move.l (%[V]), %%a5 \n\t" |
350 | "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0\n\t" | 350 | "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t" |
351 | "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t" | 351 | "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t" |
352 | "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t" | 352 | "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t" |
353 | "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t" | 353 | "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t" |
354 | "movem.l (4*4, %[D]), %%d0-%%d3\n\t" | 354 | "movem.l (4*4, %[D]), %%d0-%%d3 \n\t" |
355 | "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t" | 355 | "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t" |
356 | "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t" | 356 | "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t" |
357 | "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t" | 357 | "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t" |
358 | "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t" | 358 | "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t" |
359 | "movem.l (8*4, %[D]), %%d0-%%d3\n\t" | 359 | "movem.l (8*4, %[D]), %%d0-%%d3 \n\t" |
360 | "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t" | 360 | "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t" |
361 | "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t" | 361 | "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t" |
362 | "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t" | 362 | "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t" |
363 | "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t" | 363 | "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t" |
364 | "movem.l (12*4, %[D]), %%d0-%%d3\n\t" | 364 | "movem.l (12*4, %[D]), %%d0-%%d3 \n\t" |
365 | "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t" | 365 | "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t" |
366 | "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t" | 366 | "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t" |
367 | "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t" | 367 | "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t" |
368 | "mac.l %%d3, %%a5, %%acc0\n\t" | 368 | "mac.l %%d3, %%a5, %%acc0 \n\t" |
369 | "movclr.l %%acc0, %%d0\n\t" | 369 | "movclr.l %%acc0, %%d0 \n\t" |
370 | "move.l %%d0, (%[Data])+\n" | 370 | "move.l %%d0, (%[Data])+ \n" |
371 | : [Data] "+a" (Data) | 371 | : [Data] "+a" (Data) |
372 | : [V] "a" (V), [D] "a" (D) | 372 | : [V] "a" (V), [D] "a" (D) |
373 | : "d0", "d1", "d2", "d3", "a5"); | 373 | : "d0", "d1", "d2", "d3", "a5"); |
374 | #else | 374 | #elif defined(CPU_ARM) && !defined(SIMULATOR) |
375 | for ( k = 0; k < 32; k++, V++ ) { | ||
376 | asm volatile ( | ||
377 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
378 | "ldr r4, [%[V]] \n\t" | ||
379 | "smull r5, r6, r0, r4 \n\t" | ||
380 | "ldr r4, [%[V], #96*4] \n\t" | ||
381 | "smlal r5, r6, r1, r4 \n\t" | ||
382 | "ldr r4, [%[V], #128*4] \n\t" | ||
383 | "smlal r5, r6, r2, r4 \n\t" | ||
384 | "ldr r4, [%[V], #224*4] \n\t" | ||
385 | "smlal r5, r6, r3, r4 \n\t" | ||
386 | |||
387 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
388 | "ldr r4, [%[V], #256*4] \n\t" | ||
389 | "smlal r5, r6, r0, r4 \n\t" | ||
390 | "ldr r4, [%[V], #352*4] \n\t" | ||
391 | "smlal r5, r6, r1, r4 \n\t" | ||
392 | "ldr r4, [%[V], #384*4] \n\t" | ||
393 | "smlal r5, r6, r2, r4 \n\t" | ||
394 | "ldr r4, [%[V], #480*4] \n\t" | ||
395 | "smlal r5, r6, r3, r4 \n\t" | ||
396 | |||
397 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
398 | "ldr r4, [%[V], #512*4] \n\t" | ||
399 | "smlal r5, r6, r0, r4 \n\t" | ||
400 | "ldr r4, [%[V], #608*4] \n\t" | ||
401 | "smlal r5, r6, r1, r4 \n\t" | ||
402 | "ldr r4, [%[V], #640*4] \n\t" | ||
403 | "smlal r5, r6, r2, r4 \n\t" | ||
404 | "ldr r4, [%[V], #736*4] \n\t" | ||
405 | "smlal r5, r6, r3, r4 \n\t" | ||
406 | |||
407 | "ldmia %[D]!, { r0-r3 } \n\t" | ||
408 | "ldr r4, [%[V], #768*4] \n\t" | ||
409 | "smlal r5, r6, r0, r4 \n\t" | ||
410 | "ldr r4, [%[V], #864*4] \n\t" | ||
411 | "smlal r5, r6, r1, r4 \n\t" | ||
412 | "ldr r4, [%[V], #896*4] \n\t" | ||
413 | "smlal r5, r6, r2, r4 \n\t" | ||
414 | "ldr r4, [%[V], #992*4] \n\t" | ||
415 | "smlal r5, r6, r3, r4 \n\t" | ||
416 | "mov r4, r6, lsl #1 \n\t" | ||
417 | "orr r4, r4, r5, lsr #31\n\t" | ||
418 | "str r4, [%[Data]], #4 \n" | ||
419 | : [Data] "+r" (Data), [D] "+r" (D) | ||
420 | : [V] "r" (V) | ||
421 | : "r0", "r1", "r2", "r3", "r4", "r5", "r6"); | ||
422 | #else | ||
423 | for ( k = 0; k < 32; k++, D += 16, V++ ) { | ||
375 | *Data = MPC_SHL( | 424 | *Data = MPC_SHL( |
376 | MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3]) | 425 | MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3]) |
377 | + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7]) | 426 | + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7]) |