summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2010-11-20 19:06:05 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2010-11-20 19:06:05 +0000
commitf54cbfa1647a1bcacb7998e0e19b17f0d1bde84a (patch)
treed001d18e77d8ef865a1ab1060326b90152b5811a
parentee6b0dae96617a5cfed3d155db9e5dda0b42f2ab (diff)
downloadrockbox-f54cbfa1647a1bcacb7998e0e19b17f0d1bde84a.tar.gz
rockbox-f54cbfa1647a1bcacb7998e0e19b17f0d1bde84a.zip
Commit first part of FS#11235 by Buschel and I. Improves scheduling on arm9 for two filter macros in libmad that are almost never called. A larger speed up will be possible when the remaining ASM code is improved.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28624 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmad/synth.c88
1 files changed, 43 insertions, 45 deletions
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c
index ab2bcbe829..cbffee5e65 100644
--- a/apps/codecs/libmad/synth.c
+++ b/apps/codecs/libmad/synth.c
@@ -766,57 +766,55 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
766#elif defined(FPM_ARM) 766#elif defined(FPM_ARM)
767 767
768#define PROD_O(hi, lo, f, ptr) \ 768#define PROD_O(hi, lo, f, ptr) \
769 ({ \ 769 ({ \
770 mad_fixed_t *__p = (f); \ 770 mad_fixed_t *__p = (f); \
771 asm volatile ( \ 771 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
772 "ldmia %2!, {r0, r1, r2, r3} \n\t" \ 772 "ldr r4, [%3, #0]\n\t" \
773 "ldr r4, [%3, #0] \n\t" \ 773 "ldr r12, [%3, #56]\n\t" \
774 "smull %0, %1, r0, r4 \n\t" \ 774 "smull %0, %1, r0, r4\n\t" \
775 "ldr r4, [%3, #56] \n\t" \ 775 "ldr r4, [%3, #48]\n\t" \
776 "smlal %0, %1, r1, r4 \n\t" \ 776 "smlal %0, %1, r1, r12\n\t" \
777 "ldr r4, [%3, #48] \n\t" \ 777 "ldr r12, [%3, #40]\n\t" \
778 "smlal %0, %1, r2, r4 \n\t" \ 778 "smlal %0, %1, r2, r4\n\t" \
779 "ldr r4, [%3, #40] \n\t" \ 779 "smlal %0, %1, r3, r12\n\t" \
780 "smlal %0, %1, r3, r4 \n\t" \ 780 "ldmia %2, {r0, r1, r2, r3}\n\t" \
781 "ldmia %2, {r0, r1, r2, r3} \n\t" \ 781 "ldr r4, [%3, #32]\n\t" \
782 "ldr r4, [%3, #32] \n\t" \ 782 "ldr r12, [%3, #24]\n\t" \
783 "smlal %0, %1, r0, r4 \n\t" \ 783 "smlal %0, %1, r0, r4\n\t" \
784 "ldr r4, [%3, #24] \n\t" \ 784 "ldr r4, [%3, #16]\n\t" \
785 "smlal %0, %1, r1, r4 \n\t" \ 785 "smlal %0, %1, r1, r12\n\t" \
786 "ldr r4, [%3, #16] \n\t" \ 786 "ldr r12, [%3, #8]\n\t" \
787 "smlal %0, %1, r2, r4 \n\t" \ 787 "smlal %0, %1, r2, r4\n\t" \
788 "ldr r4, [%3, #8] \n\t" \ 788 "smlal %0, %1, r3, r12\n\t" \
789 "smlal %0, %1, r3, r4 \n\t" \
790 : "=&r" (lo), "=&r" (hi), "+r" (__p) \ 789 : "=&r" (lo), "=&r" (hi), "+r" (__p) \
791 : "r" (ptr) \ 790 : "r" (ptr) \
792 : "r0", "r1", "r2", "r3", "r4", "memory"); \ 791 : "r0", "r1", "r2", "r3", "r4", "r12"); \
793 }) 792 })
794 793
795#define PROD_A(hi, lo, f, ptr) \ 794#define PROD_A(hi, lo, f, ptr) \
796 ({ \ 795 ({ \
797 mad_fixed_t *__p = (f); \ 796 mad_fixed_t *__p = (f); \
798 asm volatile ( \ 797 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
799 "ldmia %2!, {r0, r1, r2, r3} \n\t" \ 798 "ldr r4, [%3, #0]\n\t" \
800 "ldr r4, [%3, #0] \n\t" \ 799 "ldr r12, [%3, #56]\n\t" \
801 "smlal %0, %1, r0, r4 \n\t" \ 800 "smlal %0, %1, r0, r4\n\t" \
802 "ldr r4, [%3, #56] \n\t" \ 801 "ldr r4, [%3, #48]\n\t" \
803 "smlal %0, %1, r1, r4 \n\t" \ 802 "smlal %0, %1, r1, r12\n\t" \
804 "ldr r4, [%3, #48] \n\t" \ 803 "ldr r12, [%3, #40]\n\t" \
805 "smlal %0, %1, r2, r4 \n\t" \ 804 "smlal %0, %1, r2, r4\n\t" \
806 "ldr r4, [%3, #40] \n\t" \ 805 "smlal %0, %1, r3, r12\n\t" \
807 "smlal %0, %1, r3, r4 \n\t" \ 806 "ldmia %2, {r0, r1, r2, r3}\n\t" \
808 "ldmia %2, {r0, r1, r2, r3} \n\t" \ 807 "ldr r4, [%3, #32]\n\t" \
809 "ldr r4, [%3, #32] \n\t" \ 808 "ldr r12, [%3, #24]\n\t" \
810 "smlal %0, %1, r0, r4 \n\t" \ 809 "smlal %0, %1, r0, r4\n\t" \
811 "ldr r4, [%3, #24] \n\t" \ 810 "ldr r4, [%3, #16]\n\t" \
812 "smlal %0, %1, r1, r4 \n\t" \ 811 "smlal %0, %1, r1, r12\n\t" \
813 "ldr r4, [%3, #16] \n\t" \ 812 "ldr r12, [%3, #8]\n\t" \
814 "smlal %0, %1, r2, r4 \n\t" \ 813 "smlal %0, %1, r2, r4\n\t" \
815 "ldr r4, [%3, #8] \n\t" \ 814 "smlal %0, %1, r3, r12\n\t" \
816 "smlal %0, %1, r3, r4 \n\t" \
817 : "+r" (lo), "+r" (hi), "+r" (__p) \ 815 : "+r" (lo), "+r" (hi), "+r" (__p) \
818 : "r" (ptr) \ 816 : "r" (ptr) \
819 : "r0", "r1", "r2", "r3", "r4", "memory"); \ 817 : "r0", "r1", "r2", "r3", "r4", "r12"); \
820 }) 818 })
821 819
822void synth_full_odd_sbsample (mad_fixed_t *pcm, 820void synth_full_odd_sbsample (mad_fixed_t *pcm,