diff options
Diffstat (limited to 'apps/codecs/libmusepack/synth_filter_arm.S')
-rw-r--r-- | apps/codecs/libmusepack/synth_filter_arm.S | 196 |
1 files changed, 195 insertions, 1 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S index b44e029a43..598f218e45 100644 --- a/apps/codecs/libmusepack/synth_filter_arm.S +++ b/apps/codecs/libmusepack/synth_filter_arm.S | |||
@@ -296,7 +296,7 @@ mpc_decoder_windowing_D: | |||
296 | add r1, r1, #4 /* V++ */ | 296 | add r1, r1, #4 /* V++ */ |
297 | 297 | ||
298 | ldmpc regs=r4-r11 | 298 | ldmpc regs=r4-r11 |
299 | #else /* arm9 and above */ | 299 | #elif ARM_ARCH < 6 /* arm9 and above */ |
300 | mpc_decoder_windowing_D: | 300 | mpc_decoder_windowing_D: |
301 | /* r0 = Data[] */ | 301 | /* r0 = Data[] */ |
302 | /* r1 = V[] */ | 302 | /* r1 = V[] */ |
@@ -501,6 +501,200 @@ mpc_decoder_windowing_D: | |||
501 | add r1, r1, #4 /* V++ */ | 501 | add r1, r1, #4 /* V++ */ |
502 | 502 | ||
503 | ldmpc regs=r4-r11 | 503 | ldmpc regs=r4-r11 |
504 | #else | ||
505 | mpc_decoder_windowing_D: | ||
506 | /* r0 = Data[] */ | ||
507 | /* r1 = V[] */ | ||
508 | /* r2 = D[] */ | ||
509 | /* lr = counter */ | ||
510 | /************************************************************************ | ||
511 | * Further speed up through making use of symmetries within D[]-window. | ||
512 | * The row V[00] can be extracted as it has symmetries within this single | ||
513 | * row. 8 smull/mlal and 8 ldr's can be saved at the cost of 2 add's. | ||
514 | * The rows V[01..15] are symmetric to V[31..17]. 15 x 16 ldr's can be | ||
515 | * saved at the cost of 15 x 4 + 1 add's. | ||
516 | * The row V[16] can be extracted as it has symmetries within this single | ||
517 | * row. 8 smull/mlal and 8 ldr's can be saved. | ||
518 | * On armv6 use smmulr/smlalr which are faster than smull/smlal and only | ||
519 | * accumulate the top 32 bits of the result so that frees up 2 | ||
520 | * registers so we can ldm larger blocks. | ||
521 | ***********************************************************************/ | ||
522 | stmfd sp!, {r4-r11, lr} | ||
523 | |||
524 | /****************************************** | ||
525 | * row 0 with internal symmetry | ||
526 | *****************************************/ | ||
527 | add r2, r2, #4 /* D+=1, r2 = D[01] as D[00] = 0 */ | ||
528 | ldmia r2!, { r3-r6 } /* load D[01..04] */ | ||
529 | ldr r7 , [r1, #96*4] /* 1 */ | ||
530 | ldr r10, [r1, #992*4] /* 15 */ | ||
531 | ldr r11, [r1, #128*4] /* 2 */ | ||
532 | rsb r10, r10, r7 /* V[01] - V[15] */ | ||
533 | ldr r12, [r1, #896*4] /* 14 */ | ||
534 | smmulr r9, r10, r3 | ||
535 | ldr r7 , [r1, #224*4] /* 3 */ | ||
536 | add r12, r12, r11 /* V[02] + V[14] */ | ||
537 | ldr r10, [r1, #864*4] /* 13 */ | ||
538 | smmlar r9, r12, r4, r9 | ||
539 | ldr r11, [r1, #256*4] /* 4 */ | ||
540 | rsb r10, r10, r7 /* V[03] - V[13] */ | ||
541 | ldr r12, [r1, #768*4] /* 12 */ | ||
542 | smmlar r9, r10, r5, r9 | ||
543 | ldr r7 , [r1, #352*4] /* 5 */ | ||
544 | add r12, r12, r11 /* V[04] + V[12] */ | ||
545 | ldr r10, [r1, #736*4] /* 11 */ | ||
546 | smmlar r9, r12, r6, r9 | ||
547 | ldmia r2!, { r3-r6 } /* load D[05..08] */ | ||
548 | ldr r11, [r1, #384*4] /* 6 */ | ||
549 | rsb r10, r10, r7 /* V[05] - V[11] */ | ||
550 | ldr r12, [r1, #640*4] /* 10 */ | ||
551 | smmlar r9, r10, r3, r9 | ||
552 | ldr r7 , [r1, #480*4] /* 7 */ | ||
553 | add r12, r12, r11 /* V[06] + V[10] */ | ||
554 | ldr r10, [r1, #608*4] /* 9 */ | ||
555 | smmlar r9, r12, r4, r9 | ||
556 | rsb r10, r10, r7 /* V[07] - V[09] */ | ||
557 | ldr r11, [r1, #512*4] /* 8 */ | ||
558 | smmlar r9, r10, r5, r9 | ||
559 | add r1, r1, #4 /* V+=1, r1 = V[01] */ | ||
560 | smmlar r9, r11, r6, r9 | ||
561 | add r2, r2, #7*4 /* D+=7, r2 = D[16] */ | ||
562 | mov r9, r9, lsl #2 | ||
563 | str r9, [r0], #4 /* store Data */ | ||
564 | |||
565 | /****************************************** | ||
566 | * rows 01..15 are symmetric to rows 31..17 | ||
567 | * r9 = acc of 01..15 | ||
568 | * r1 = V[01..15] | ||
569 | * r11 = acc of 31..17 | ||
570 | * r12 = V[31..16] | ||
571 | *****************************************/ | ||
572 | mov lr, #15*8 | ||
573 | add r12, r1, #30*4 /* r12 = V[31] */ | ||
574 | .loop15: | ||
575 | ldmia r2!, { r3-r6 } /* load D[00..03] */ | ||
576 | ldr r7, [r12, #896*4] /* 14 */ | ||
577 | ldr r8, [r12, #992*4] /* 15 */ | ||
578 | smmulr r11, r7, r4 | ||
579 | ldr r7, [r1] /* 0 */ | ||
580 | smmlar r11, r8, r3, r11 | ||
581 | ldr r8, [r1, #96*4] /* 1 */ | ||
582 | smmulr r9, r7, r3 | ||
583 | ldr r7, [r12, #768*4] /* 12 */ | ||
584 | smmlar r9, r8, r4, r9 | ||
585 | ldr r8, [r12, #864*4] /* 13 */ | ||
586 | smmlar r11, r7, r6, r11 | ||
587 | ldr r7, [r1, #128*4] /* 2 */ | ||
588 | smmlar r11, r8, r5, r11 | ||
589 | ldr r8, [r1, #224*4] /* 3 */ | ||
590 | smmlar r9, r7, r5, r9 | ||
591 | ldr r7, [r1, #256*4] /* 4 */ | ||
592 | smmlar r9, r8, r6, r9 | ||
593 | ldmia r2!, { r3-r6 } /* load D[04..07] */ | ||
594 | ldr r8, [r1, #352*4] /* 5 */ | ||
595 | smmlar r9, r7, r3, r9 | ||
596 | ldr r7, [r12, #640*4] /* 10 */ | ||
597 | smmlar r9, r8, r4, r9 | ||
598 | ldr r8, [r12, #736*4] /* 11 */ | ||
599 | smmlar r11, r7, r4, r11 | ||
600 | ldr r7, [r1, #384*4] /* 6 */ | ||
601 | smmlar r11, r8, r3, r11 | ||
602 | ldr r8, [r1, #480*4] /* 7 */ | ||
603 | smmlar r9, r7, r5, r9 | ||
604 | ldr r7, [r12, #512*4] /* 8 */ | ||
605 | smmlar r9, r8, r6, r9 | ||
606 | ldr r8, [r12, #608*4] /* 9 */ | ||
607 | smmlar r11, r7, r6, r11 | ||
608 | ldr r7, [r12, #384*4] /* 6 */ | ||
609 | smmlar r11, r8, r5, r11 | ||
610 | ldmia r2!, { r3-r6 } /* load D[08..11] */ | ||
611 | ldr r8, [r12, #480*4] /* 7 */ | ||
612 | smmlar r11, r7, r4, r11 | ||
613 | ldr r7, [r1, #512*4] /* 8 */ | ||
614 | smmlar r11, r8, r3, r11 | ||
615 | ldr r8, [r1, #608*4] /* 9 */ | ||
616 | smmlar r9, r7, r3, r9 | ||
617 | ldr r7, [r1, #640*4] /* 10 */ | ||
618 | smmlar r9, r8, r4, r9 | ||
619 | ldr r8, [r1, #736*4] /* 11 */ | ||
620 | smmlar r9, r7, r5, r9 | ||
621 | ldr r7, [r12, #256*4] /* 4 */ | ||
622 | smmlar r9, r8, r6, r9 | ||
623 | ldr r8, [r12, #352*4] /* 5 */ | ||
624 | smmlar r11, r7, r6, r11 | ||
625 | ldr r7, [r1, #768*4] /* 12 */ | ||
626 | smmlar r11, r8, r5, r11 | ||
627 | ldmia r2!, { r3-r6 } /* load D[12..15] */ | ||
628 | ldr r8, [r1, #864*4] /* 13 */ | ||
629 | smmlar r9, r7, r3, r9 | ||
630 | ldr r7, [r12, #128*4] /* 2 */ | ||
631 | smmlar r9, r8, r4, r9 | ||
632 | ldr r8, [r12, #224*4] /* 3 */ | ||
633 | smmlar r11, r7, r4, r11 | ||
634 | ldr r7, [r12] /* 0 */ | ||
635 | smmlar r11, r8, r3, r11 | ||
636 | ldr r8, [r12, #96*4] /* 1 */ | ||
637 | smmlar r11, r7, r6, r11 | ||
638 | ldr r7, [r1, #896*4] /* 14 */ | ||
639 | smmlar r11, r8, r5, r11 | ||
640 | ldr r8, [r1, #992*4] /* 15 */ | ||
641 | smmlar r9, r7, r5, r9 | ||
642 | sub r12, r12, #4 /* r12 = V-- correct adresses for next loop */ | ||
643 | smmlar r9, r8, r6, r9 | ||
644 | add r1, r1, #4 /* r1 = V++ correct adresses for next loop */ | ||
645 | rsb r11, r11, #0 /* r11 = -r11 */ | ||
646 | /* store Data[01..15] */ | ||
647 | mov r9, r9, lsl #2 | ||
648 | str r9, [r0] /* store Data */ | ||
649 | /* store Data[31..17] */ | ||
650 | mov r11, r11, lsl #2 | ||
651 | str r11, [r0, lr] /* store Data */ | ||
652 | add r0, r0, #4 /* r0++ */ | ||
653 | /* next loop */ | ||
654 | subs lr, lr, #8 | ||
655 | bgt .loop15 | ||
656 | |||
657 | /****************************************** | ||
658 | * V[16] with internal symmetry | ||
659 | *****************************************/ | ||
660 | ldmia r2!, { r3-r6 } /* load D[00..03] */ | ||
661 | ldr r7 , [r1] /* 0 */ | ||
662 | ldr r10, [r1, #992*4] /* 15 */ | ||
663 | ldr r11, [r1, #96*4] /* 1 */ | ||
664 | rsb r10, r10, r7 /* V[00] - V[15] */ | ||
665 | ldr r12, [r1, #896*4] /* 14 */ | ||
666 | smmulr r9, r10, r3 | ||
667 | ldr r7 , [r1, #128*4] /* 2 */ | ||
668 | rsb r12, r12, r11 /* V[01] - V[14] */ | ||
669 | ldr r10, [r1, #864*4] /* 13 */ | ||
670 | smmlar r9, r12, r4, r9 | ||
671 | ldr r11, [r1, #224*4] /* 3 */ | ||
672 | rsb r10, r10, r7 /* V[02] - V[13] */ | ||
673 | ldr r12, [r1, #768*4] /* 12 */ | ||
674 | smmlar r9, r10, r5, r9 | ||
675 | ldr r7 , [r1, #256*4] /* 4 */ | ||
676 | rsb r12, r12, r11 /* V[03] - V[12] */ | ||
677 | ldr r10, [r1, #736*4] /* 11 */ | ||
678 | smmlar r9, r12, r6, r9 | ||
679 | ldmia r2!, { r3-r6 } /* load D[04..07] */ | ||
680 | ldr r11, [r1, #352*4] /* 5 */ | ||
681 | rsb r10, r10, r7 /* V[04] - V[11] */ | ||
682 | ldr r12, [r1, #640*4] /* 10 */ | ||
683 | smmlar r9, r10, r3, r9 | ||
684 | ldr r7 , [r1, #384*4] /* 6 */ | ||
685 | rsb r12, r12, r11 /* V[05] - V[10] */ | ||
686 | ldr r10, [r1, #608*4] /* 9 */ | ||
687 | smmlar r9, r12, r4, r9 | ||
688 | ldr r11, [r1, #480*4] /* 7 */ | ||
689 | rsb r10, r10, r7 /* V[06] - V[09] */ | ||
690 | ldr r12, [r1, #512*4] /* 8 */ | ||
691 | smmlar r9, r10, r5, r9 | ||
692 | rsb r12, r12, r11 /* V[07] - V[08] */ | ||
693 | smmlar r9, r12, r6, r9 | ||
694 | mov r9, r9, lsl #2 | ||
695 | str r9, [r0], #4 /* store Data */ | ||
696 | |||
697 | ldmpc regs=r4-r11 | ||
504 | #endif | 698 | #endif |
505 | .mpc_dewindowing_end: | 699 | .mpc_dewindowing_end: |
506 | .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D | 700 | .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D |