summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2006-02-20 20:40:29 +0000
committerThom Johansen <thomj@rockbox.org>2006-02-20 20:40:29 +0000
commit831cfb8e44c7690d77ddae03e680cf01881bed5b (patch)
treecc667465ec7c1f257034258642b4139647690315
parentec93f4685cf55b9990e37fd84997be750f4eaef8 (diff)
downloadrockbox-831cfb8e44c7690d77ddae03e680cf01881bed5b.tar.gz
rockbox-831cfb8e44c7690d77ddae03e680cf01881bed5b.zip
Add ASM optimised subband filtering loop for ARM targets. Restructure
other loops a little bit. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8754 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmusepack/synth_filter.c71
1 files changed, 60 insertions, 11 deletions
diff --git a/apps/codecs/libmusepack/synth_filter.c b/apps/codecs/libmusepack/synth_filter.c
index a2127f01c5..c03355b01b 100644
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@@ -342,36 +342,85 @@ static void Synthese_Filter_float_internal(MPC_SAMPLE_FORMAT * OutData,MPC_SAMPL
342 342
343 343
344 344
345 #if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
345 for ( k = 0; k < 32; k++, D += 16, V++ ) { 346 for ( k = 0; k < 32; k++, D += 16, V++ ) {
346 #if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
347 asm volatile ( 347 asm volatile (
348 "movem.l (%[D]), %%d0-%%d3\n\t" 348 "movem.l (%[D]), %%d0-%%d3 \n\t"
349 "move.l (%[V]), %%a5\n\t" 349 "move.l (%[V]), %%a5 \n\t"
350 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0\n\t" 350 "mac.l %%d0, %%a5, (96*4, %[V]), %%a5, %%acc0 \n\t"
351 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t" 351 "mac.l %%d1, %%a5, (128*4, %[V]), %%a5, %%acc0\n\t"
352 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t" 352 "mac.l %%d2, %%a5, (224*4, %[V]), %%a5, %%acc0\n\t"
353 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t" 353 "mac.l %%d3, %%a5, (256*4, %[V]), %%a5, %%acc0\n\t"
354 "movem.l (4*4, %[D]), %%d0-%%d3\n\t" 354 "movem.l (4*4, %[D]), %%d0-%%d3 \n\t"
355 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t" 355 "mac.l %%d0, %%a5, (352*4, %[V]), %%a5, %%acc0\n\t"
356 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t" 356 "mac.l %%d1, %%a5, (384*4, %[V]), %%a5, %%acc0\n\t"
357 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t" 357 "mac.l %%d2, %%a5, (480*4, %[V]), %%a5, %%acc0\n\t"
358 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t" 358 "mac.l %%d3, %%a5, (512*4, %[V]), %%a5, %%acc0\n\t"
359 "movem.l (8*4, %[D]), %%d0-%%d3\n\t" 359 "movem.l (8*4, %[D]), %%d0-%%d3 \n\t"
360 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t" 360 "mac.l %%d0, %%a5, (608*4, %[V]), %%a5, %%acc0\n\t"
361 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t" 361 "mac.l %%d1, %%a5, (640*4, %[V]), %%a5, %%acc0\n\t"
362 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t" 362 "mac.l %%d2, %%a5, (736*4, %[V]), %%a5, %%acc0\n\t"
363 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t" 363 "mac.l %%d3, %%a5, (768*4, %[V]), %%a5, %%acc0\n\t"
364 "movem.l (12*4, %[D]), %%d0-%%d3\n\t" 364 "movem.l (12*4, %[D]), %%d0-%%d3 \n\t"
365 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t" 365 "mac.l %%d0, %%a5, (864*4, %[V]), %%a5, %%acc0\n\t"
366 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t" 366 "mac.l %%d1, %%a5, (896*4, %[V]), %%a5, %%acc0\n\t"
367 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t" 367 "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
368 "mac.l %%d3, %%a5, %%acc0\n\t" 368 "mac.l %%d3, %%a5, %%acc0 \n\t"
369 "movclr.l %%acc0, %%d0\n\t" 369 "movclr.l %%acc0, %%d0 \n\t"
370 "move.l %%d0, (%[Data])+\n" 370 "move.l %%d0, (%[Data])+ \n"
371 : [Data] "+a" (Data) 371 : [Data] "+a" (Data)
372 : [V] "a" (V), [D] "a" (D) 372 : [V] "a" (V), [D] "a" (D)
373 : "d0", "d1", "d2", "d3", "a5"); 373 : "d0", "d1", "d2", "d3", "a5");
374 #else 374 #elif defined(CPU_ARM) && !defined(SIMULATOR)
375 for ( k = 0; k < 32; k++, V++ ) {
376 asm volatile (
377 "ldmia %[D]!, { r0-r3 } \n\t"
378 "ldr r4, [%[V]] \n\t"
379 "smull r5, r6, r0, r4 \n\t"
380 "ldr r4, [%[V], #96*4] \n\t"
381 "smlal r5, r6, r1, r4 \n\t"
382 "ldr r4, [%[V], #128*4] \n\t"
383 "smlal r5, r6, r2, r4 \n\t"
384 "ldr r4, [%[V], #224*4] \n\t"
385 "smlal r5, r6, r3, r4 \n\t"
386
387 "ldmia %[D]!, { r0-r3 } \n\t"
388 "ldr r4, [%[V], #256*4] \n\t"
389 "smlal r5, r6, r0, r4 \n\t"
390 "ldr r4, [%[V], #352*4] \n\t"
391 "smlal r5, r6, r1, r4 \n\t"
392 "ldr r4, [%[V], #384*4] \n\t"
393 "smlal r5, r6, r2, r4 \n\t"
394 "ldr r4, [%[V], #480*4] \n\t"
395 "smlal r5, r6, r3, r4 \n\t"
396
397 "ldmia %[D]!, { r0-r3 } \n\t"
398 "ldr r4, [%[V], #512*4] \n\t"
399 "smlal r5, r6, r0, r4 \n\t"
400 "ldr r4, [%[V], #608*4] \n\t"
401 "smlal r5, r6, r1, r4 \n\t"
402 "ldr r4, [%[V], #640*4] \n\t"
403 "smlal r5, r6, r2, r4 \n\t"
404 "ldr r4, [%[V], #736*4] \n\t"
405 "smlal r5, r6, r3, r4 \n\t"
406
407 "ldmia %[D]!, { r0-r3 } \n\t"
408 "ldr r4, [%[V], #768*4] \n\t"
409 "smlal r5, r6, r0, r4 \n\t"
410 "ldr r4, [%[V], #864*4] \n\t"
411 "smlal r5, r6, r1, r4 \n\t"
412 "ldr r4, [%[V], #896*4] \n\t"
413 "smlal r5, r6, r2, r4 \n\t"
414 "ldr r4, [%[V], #992*4] \n\t"
415 "smlal r5, r6, r3, r4 \n\t"
416 "mov r4, r6, lsl #1 \n\t"
417 "orr r4, r4, r5, lsr #31\n\t"
418 "str r4, [%[Data]], #4 \n"
419 : [Data] "+r" (Data), [D] "+r" (D)
420 : [V] "r" (V)
421 : "r0", "r1", "r2", "r3", "r4", "r5", "r6");
422 #else
423 for ( k = 0; k < 32; k++, D += 16, V++ ) {
375 *Data = MPC_SHL( 424 *Data = MPC_SHL(
376 MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3]) 425 MPC_MULTIPLY_FRACT(V[ 0],D[ 0]) + MPC_MULTIPLY_FRACT(V[ 96],D[ 1]) + MPC_MULTIPLY_FRACT(V[128],D[ 2]) + MPC_MULTIPLY_FRACT(V[224],D[ 3])
377 + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7]) 426 + MPC_MULTIPLY_FRACT(V[256],D[ 4]) + MPC_MULTIPLY_FRACT(V[352],D[ 5]) + MPC_MULTIPLY_FRACT(V[384],D[ 6]) + MPC_MULTIPLY_FRACT(V[480],D[ 7])