diff options
author | Michael Giacomelli <giac2000@hotmail.com> | 2010-11-20 19:06:05 +0000 |
---|---|---|
committer | Michael Giacomelli <giac2000@hotmail.com> | 2010-11-20 19:06:05 +0000 |
commit | f54cbfa1647a1bcacb7998e0e19b17f0d1bde84a (patch) | |
tree | d001d18e77d8ef865a1ab1060326b90152b5811a | |
parent | ee6b0dae96617a5cfed3d155db9e5dda0b42f2ab (diff) | |
download | rockbox-f54cbfa1647a1bcacb7998e0e19b17f0d1bde84a.tar.gz rockbox-f54cbfa1647a1bcacb7998e0e19b17f0d1bde84a.zip |
Commit first part of FS#11235 by Buschel and I. Improves scheduling on arm9 for two filter macros in libmad that are almost never called. A larger speed up will be possible when the remaining ASM code is improved.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28624 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/codecs/libmad/synth.c | 88 |
1 files changed, 43 insertions, 45 deletions
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c index ab2bcbe829..cbffee5e65 100644 --- a/apps/codecs/libmad/synth.c +++ b/apps/codecs/libmad/synth.c | |||
@@ -766,57 +766,55 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | |||
766 | #elif defined(FPM_ARM) | 766 | #elif defined(FPM_ARM) |
767 | 767 | ||
768 | #define PROD_O(hi, lo, f, ptr) \ | 768 | #define PROD_O(hi, lo, f, ptr) \ |
769 | ({ \ | 769 | ({ \ |
770 | mad_fixed_t *__p = (f); \ | 770 | mad_fixed_t *__p = (f); \ |
771 | asm volatile ( \ | 771 | asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ |
772 | "ldmia %2!, {r0, r1, r2, r3} \n\t" \ | 772 | "ldr r4, [%3, #0]\n\t" \ |
773 | "ldr r4, [%3, #0] \n\t" \ | 773 | "ldr r12, [%3, #56]\n\t" \ |
774 | "smull %0, %1, r0, r4 \n\t" \ | 774 | "smull %0, %1, r0, r4\n\t" \ |
775 | "ldr r4, [%3, #56] \n\t" \ | 775 | "ldr r4, [%3, #48]\n\t" \ |
776 | "smlal %0, %1, r1, r4 \n\t" \ | 776 | "smlal %0, %1, r1, r12\n\t" \ |
777 | "ldr r4, [%3, #48] \n\t" \ | 777 | "ldr r12, [%3, #40]\n\t" \ |
778 | "smlal %0, %1, r2, r4 \n\t" \ | 778 | "smlal %0, %1, r2, r4\n\t" \ |
779 | "ldr r4, [%3, #40] \n\t" \ | 779 | "smlal %0, %1, r3, r12\n\t" \ |
780 | "smlal %0, %1, r3, r4 \n\t" \ | 780 | "ldmia %2, {r0, r1, r2, r3}\n\t" \ |
781 | "ldmia %2, {r0, r1, r2, r3} \n\t" \ | 781 | "ldr r4, [%3, #32]\n\t" \ |
782 | "ldr r4, [%3, #32] \n\t" \ | 782 | "ldr r12, [%3, #24]\n\t" \ |
783 | "smlal %0, %1, r0, r4 \n\t" \ | 783 | "smlal %0, %1, r0, r4\n\t" \ |
784 | "ldr r4, [%3, #24] \n\t" \ | 784 | "ldr r4, [%3, #16]\n\t" \ |
785 | "smlal %0, %1, r1, r4 \n\t" \ | 785 | "smlal %0, %1, r1, r12\n\t" \ |
786 | "ldr r4, [%3, #16] \n\t" \ | 786 | "ldr r12, [%3, #8]\n\t" \ |
787 | "smlal %0, %1, r2, r4 \n\t" \ | 787 | "smlal %0, %1, r2, r4\n\t" \ |
788 | "ldr r4, [%3, #8] \n\t" \ | 788 | "smlal %0, %1, r3, r12\n\t" \ |
789 | "smlal %0, %1, r3, r4 \n\t" \ | ||
790 | : "=&r" (lo), "=&r" (hi), "+r" (__p) \ | 789 | : "=&r" (lo), "=&r" (hi), "+r" (__p) \ |
791 | : "r" (ptr) \ | 790 | : "r" (ptr) \ |
792 | : "r0", "r1", "r2", "r3", "r4", "memory"); \ | 791 | : "r0", "r1", "r2", "r3", "r4", "r12"); \ |
793 | }) | 792 | }) |
794 | 793 | ||
795 | #define PROD_A(hi, lo, f, ptr) \ | 794 | #define PROD_A(hi, lo, f, ptr) \ |
796 | ({ \ | 795 | ({ \ |
797 | mad_fixed_t *__p = (f); \ | 796 | mad_fixed_t *__p = (f); \ |
798 | asm volatile ( \ | 797 | asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ |
799 | "ldmia %2!, {r0, r1, r2, r3} \n\t" \ | 798 | "ldr r4, [%3, #0]\n\t" \ |
800 | "ldr r4, [%3, #0] \n\t" \ | 799 | "ldr r12, [%3, #56]\n\t" \ |
801 | "smlal %0, %1, r0, r4 \n\t" \ | 800 | "smlal %0, %1, r0, r4\n\t" \ |
802 | "ldr r4, [%3, #56] \n\t" \ | 801 | "ldr r4, [%3, #48]\n\t" \ |
803 | "smlal %0, %1, r1, r4 \n\t" \ | 802 | "smlal %0, %1, r1, r12\n\t" \ |
804 | "ldr r4, [%3, #48] \n\t" \ | 803 | "ldr r12, [%3, #40]\n\t" \ |
805 | "smlal %0, %1, r2, r4 \n\t" \ | 804 | "smlal %0, %1, r2, r4\n\t" \ |
806 | "ldr r4, [%3, #40] \n\t" \ | 805 | "smlal %0, %1, r3, r12\n\t" \ |
807 | "smlal %0, %1, r3, r4 \n\t" \ | 806 | "ldmia %2, {r0, r1, r2, r3}\n\t" \ |
808 | "ldmia %2, {r0, r1, r2, r3} \n\t" \ | 807 | "ldr r4, [%3, #32]\n\t" \ |
809 | "ldr r4, [%3, #32] \n\t" \ | 808 | "ldr r12, [%3, #24]\n\t" \ |
810 | "smlal %0, %1, r0, r4 \n\t" \ | 809 | "smlal %0, %1, r0, r4\n\t" \ |
811 | "ldr r4, [%3, #24] \n\t" \ | 810 | "ldr r4, [%3, #16]\n\t" \ |
812 | "smlal %0, %1, r1, r4 \n\t" \ | 811 | "smlal %0, %1, r1, r12\n\t" \ |
813 | "ldr r4, [%3, #16] \n\t" \ | 812 | "ldr r12, [%3, #8]\n\t" \ |
814 | "smlal %0, %1, r2, r4 \n\t" \ | 813 | "smlal %0, %1, r2, r4\n\t" \ |
815 | "ldr r4, [%3, #8] \n\t" \ | 814 | "smlal %0, %1, r3, r12\n\t" \ |
816 | "smlal %0, %1, r3, r4 \n\t" \ | ||
817 | : "+r" (lo), "+r" (hi), "+r" (__p) \ | 815 | : "+r" (lo), "+r" (hi), "+r" (__p) \ |
818 | : "r" (ptr) \ | 816 | : "r" (ptr) \ |
819 | : "r0", "r1", "r2", "r3", "r4", "memory"); \ | 817 | : "r0", "r1", "r2", "r3", "r4", "r12"); \ |
820 | }) | 818 | }) |
821 | 819 | ||
822 | void synth_full_odd_sbsample (mad_fixed_t *pcm, | 820 | void synth_full_odd_sbsample (mad_fixed_t *pcm, |