summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-05-02 15:45:43 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-05-02 15:45:43 +0000
commiteec5eb9ecca46a4d202f7b41b3d0aa4a8626a892 (patch)
tree05a747e702ab0a0266c52094a325bf8d352b9f56
parent0fd111d4310c767828dd83d9cc23f108fe584750 (diff)
downloadrockbox-eec5eb9ecca46a4d202f7b41b3d0aa4a8626a892.tar.gz
rockbox-eec5eb9ecca46a4d202f7b41b3d0aa4a8626a892.zip
Refacturate arm version of libmad's synthesis filter. Only two asm macros left, renamed asm-implementation for better clarity. No change in speed or precision.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25777 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmad/synth.c264
-rw-r--r--apps/codecs/libmad/synth_full_arm.S8
2 files changed, 30 insertions, 242 deletions
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c
index 9d1bdb7c91..15c1f9decc 100644
--- a/apps/codecs/libmad/synth.c
+++ b/apps/codecs/libmad/synth.c
@@ -828,60 +828,8 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
828 828
829#elif defined(FPM_ARM) 829#elif defined(FPM_ARM)
830 830
831#define PROD_ODD_0(hi, lo, f, ptr) \ 831#define PROD_O(hi, lo, f, ptr) \
832 do { \ 832 ({ \
833 mad_fixed_t *__p = (f); \
834 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
835 "ldr r4, [%3, #4]\n\t" \
836 "smull %0, %1, r0, r4\n\t" \
837 "ldr r4, [%3, #60]\n\t" \
838 "smlal %0, %1, r1, r4\n\t" \
839 "ldr r4, [%3, #52]\n\t" \
840 "smlal %0, %1, r2, r4\n\t" \
841 "ldr r4, [%3, #44]\n\t" \
842 "smlal %0, %1, r3, r4\n\t" \
843 "ldmia %2, {r0, r1, r2, r3}\n\t" \
844 "ldr r4, [%3, #36]\n\t" \
845 "smlal %0, %1, r0, r4\n\t" \
846 "ldr r4, [%3, #28]\n\t" \
847 "smlal %0, %1, r1, r4\n\t" \
848 "ldr r4, [%3, #20]\n\t" \
849 "smlal %0, %1, r2, r4\n\t" \
850 "ldr r4, [%3, #12]\n\t" \
851 "smlal %0, %1, r3, r4\n\t" \
852 : "=&r" (lo), "=&r" (hi), "+r" (__p) \
853 : "r" (ptr) \
854 : "r0", "r1", "r2", "r3", "r4"); \
855 } while (0)
856
857#define PROD_ODD_A(hi, lo, f, ptr) \
858 do { \
859 mad_fixed_t *__p = (f); \
860 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
861 "ldr r4, [%3, #4]\n\t" \
862 "smlal %0, %1, r0, r4\n\t" \
863 "ldr r4, [%3, #60]\n\t" \
864 "smlal %0, %1, r1, r4\n\t" \
865 "ldr r4, [%3, #52]\n\t" \
866 "smlal %0, %1, r2, r4\n\t" \
867 "ldr r4, [%3, #44]\n\t" \
868 "smlal %0, %1, r3, r4\n\t" \
869 "ldmia %2, {r0, r1, r2, r3}\n\t" \
870 "ldr r4, [%3, #36]\n\t" \
871 "smlal %0, %1, r0, r4\n\t" \
872 "ldr r4, [%3, #28]\n\t" \
873 "smlal %0, %1, r1, r4\n\t" \
874 "ldr r4, [%3, #20]\n\t" \
875 "smlal %0, %1, r2, r4\n\t" \
876 "ldr r4, [%3, #12]\n\t" \
877 "smlal %0, %1, r3, r4\n\t" \
878 : "+r" (lo), "+r" (hi), "+r" (__p) \
879 : "r" (ptr) \
880 : "r0", "r1", "r2", "r3", "r4"); \
881 } while (0)
882
883#define PROD_EVEN_0(hi, lo, f, ptr) \
884 do { \
885 mad_fixed_t *__p = (f); \ 833 mad_fixed_t *__p = (f); \
886 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ 834 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
887 "ldr r4, [%3, #0]\n\t" \ 835 "ldr r4, [%3, #0]\n\t" \
@@ -904,10 +852,10 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
904 : "=&r" (lo), "=&r" (hi), "+r" (__p) \ 852 : "=&r" (lo), "=&r" (hi), "+r" (__p) \
905 : "r" (ptr) \ 853 : "r" (ptr) \
906 : "r0", "r1", "r2", "r3", "r4"); \ 854 : "r0", "r1", "r2", "r3", "r4"); \
907 } while (0) 855 })
908 856
909#define PROD_EVEN_A(hi, lo, f, ptr) \ 857#define PROD_A(hi, lo, f, ptr) \
910 do { \ 858 ({ \
911 mad_fixed_t *__p = (f); \ 859 mad_fixed_t *__p = (f); \
912 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ 860 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
913 "ldr r4, [%3, #0]\n\t" \ 861 "ldr r4, [%3, #0]\n\t" \
@@ -930,118 +878,18 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
930 : "+r" (lo), "+r" (hi), "+r" (__p) \ 878 : "+r" (lo), "+r" (hi), "+r" (__p) \
931 : "r" (ptr) \ 879 : "r" (ptr) \
932 : "r0", "r1", "r2", "r3", "r4"); \ 880 : "r0", "r1", "r2", "r3", "r4"); \
933 } while (0) 881 })
934 882
935#define PROD_EVENBACK_0(hi, lo, f, ptr) \ 883void synth_full_odd_band (mad_fixed_t *pcm,
936 do { \ 884 mad_fixed_t (*fo)[8],
937 mad_fixed_t *__p = (f); \ 885 mad_fixed_t (*fe)[8],
938 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ 886 mad_fixed_t const (*D0ptr)[32],
939 "ldr r4, [%3, #60]\n\t" \ 887 mad_fixed_t const (*D1ptr)[32]);
940 "smull %0, %1, r0, r4\n\t" \ 888void synth_full_even_band(mad_fixed_t *pcm,
941 "ldr r4, [%3, #68]\n\t" \ 889 mad_fixed_t (*fo)[8],
942 "smlal %0, %1, r1, r4\n\t" \ 890 mad_fixed_t (*fe)[8],
943 "ldr r4, [%3, #76]\n\t" \ 891 mad_fixed_t const (*D0ptr)[32],
944 "smlal %0, %1, r2, r4\n\t" \ 892 mad_fixed_t const (*D1ptr)[32]);
945 "ldr r4, [%3, #84]\n\t" \
946 "smlal %0, %1, r3, r4\n\t" \
947 "ldmia %2, {r0, r1, r2, r3}\n\t" \
948 "ldr r4, [%3, #92]\n\t" \
949 "smlal %0, %1, r0, r4\n\t" \
950 "ldr r4, [%3, #100]\n\t" \
951 "smlal %0, %1, r1, r4\n\t" \
952 "ldr r4, [%3, #108]\n\t" \
953 "smlal %0, %1, r2, r4\n\t" \
954 "ldr r4, [%3, #116]\n\t" \
955 "smlal %0, %1, r3, r4\n\t" \
956 : "=&r" (lo), "=&r" (hi), "+r" (__p) \
957 : "r" (ptr) \
958 : "r0", "r1", "r2", "r3", "r4"); \
959 } while (0)
960
961#define PROD_EVENBACK_A(hi, lo, f, ptr) \
962 do { \
963 mad_fixed_t *__p = (f); \
964 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
965 "ldr r4, [%3, #60]\n\t" \
966 "smlal %0, %1, r0, r4\n\t" \
967 "ldr r4, [%3, #68]\n\t" \
968 "smlal %0, %1, r1, r4\n\t" \
969 "ldr r4, [%3, #76]\n\t" \
970 "smlal %0, %1, r2, r4\n\t" \
971 "ldr r4, [%3, #84]\n\t" \
972 "smlal %0, %1, r3, r4\n\t" \
973 "ldmia %2, {r0, r1, r2, r3}\n\t" \
974 "ldr r4, [%3, #92]\n\t" \
975 "smlal %0, %1, r0, r4\n\t" \
976 "ldr r4, [%3, #100]\n\t" \
977 "smlal %0, %1, r1, r4\n\t" \
978 "ldr r4, [%3, #108]\n\t" \
979 "smlal %0, %1, r2, r4\n\t" \
980 "ldr r4, [%3, #116]\n\t" \
981 "smlal %0, %1, r3, r4\n\t" \
982 : "+r" (lo), "+r" (hi), "+r" (__p) \
983 : "r" (ptr) \
984 : "r0", "r1", "r2", "r3", "r4"); \
985 } while (0)
986
987#define PROD_ODDBACK_0(hi, lo, f, ptr) \
988 do { \
989 mad_fixed_t *__p = (f); \
990 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
991 "ldr r4, [%3, #120]\n\t" \
992 "smull %0, %1, r0, r4\n\t" \
993 "ldr r4, [%3, #64]\n\t" \
994 "smlal %0, %1, r1, r4\n\t" \
995 "ldr r4, [%3, #72]\n\t" \
996 "smlal %0, %1, r2, r4\n\t" \
997 "ldr r4, [%3, #80]\n\t" \
998 "smlal %0, %1, r3, r4\n\t" \
999 "ldmia %2, {r0, r1, r2, r3}\n\t" \
1000 "ldr r4, [%3, #88]\n\t" \
1001 "smlal %0, %1, r0, r4\n\t" \
1002 "ldr r4, [%3, #96]\n\t" \
1003 "smlal %0, %1, r1, r4\n\t" \
1004 "ldr r4, [%3, #104]\n\t" \
1005 "smlal %0, %1, r2, r4\n\t" \
1006 "ldr r4, [%3, #112]\n\t" \
1007 "smlal %0, %1, r3, r4\n\t" \
1008 : "=&r" (lo), "=&r" (hi), "+r" (__p) \
1009 : "r" (ptr) \
1010 : "r0", "r1", "r2", "r3", "r4"); \
1011 } while (0)
1012
1013#define PROD_ODDBACK_A(hi, lo, f, ptr) \
1014 do { \
1015 mad_fixed_t *__p = (f); \
1016 asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \
1017 "ldr r4, [%3, #120]\n\t" \
1018 "smlal %0, %1, r0, r4\n\t" \
1019 "ldr r4, [%3, #64]\n\t" \
1020 "smlal %0, %1, r1, r4\n\t" \
1021 "ldr r4, [%3, #72]\n\t" \
1022 "smlal %0, %1, r2, r4\n\t" \
1023 "ldr r4, [%3, #80]\n\t" \
1024 "smlal %0, %1, r3, r4\n\t" \
1025 "ldmia %2, {r0, r1, r2, r3}\n\t" \
1026 "ldr r4, [%3, #88]\n\t" \
1027 "smlal %0, %1, r0, r4\n\t" \
1028 "ldr r4, [%3, #96]\n\t" \
1029 "smlal %0, %1, r1, r4\n\t" \
1030 "ldr r4, [%3, #104]\n\t" \
1031 "smlal %0, %1, r2, r4\n\t" \
1032 "ldr r4, [%3, #112]\n\t" \
1033 "smlal %0, %1, r3, r4\n\t" \
1034 : "+r" (lo), "+r" (hi), "+r" (__p) \
1035 : "r" (ptr) \
1036 : "r0", "r1", "r2", "r3", "r4"); \
1037 } while (0)
1038
1039void synth_full1(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8],
1040 mad_fixed_t const (*D0ptr)[32],
1041 mad_fixed_t const (*D1ptr)[32]);
1042void synth_full2(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8],
1043 mad_fixed_t const (*D0ptr)[32],
1044 mad_fixed_t const (*D1ptr)[32]);
1045 893
1046static 894static
1047void synth_full(struct mad_synth *synth, struct mad_frame const *frame, 895void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
@@ -1083,99 +931,39 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
1083 if(s & 1) 931 if(s & 1)
1084 { 932 {
1085 ptr = *D0ptr; 933 ptr = *D0ptr;
1086/* 934 PROD_O(hi, lo, *fo, ptr+1);
1087 ML0(hi, lo, (*fx)[0], ptr[ 1]);
1088 MLA(hi, lo, (*fx)[1], ptr[15]);
1089 MLA(hi, lo, (*fx)[2], ptr[13]);
1090 MLA(hi, lo, (*fx)[3], ptr[11]);
1091 MLA(hi, lo, (*fx)[4], ptr[ 9]);
1092 MLA(hi, lo, (*fx)[5], ptr[ 7]);
1093 MLA(hi, lo, (*fx)[6], ptr[ 5]);
1094 MLA(hi, lo, (*fx)[7], ptr[ 3]);
1095*/
1096 PROD_ODD_0(hi, lo, *fx, ptr);
1097 MLN(hi, lo); 935 MLN(hi, lo);
1098/* 936 PROD_A(hi, lo, *fe, ptr);
1099 MLA(hi, lo, (*fe)[0], ptr[ 0]);
1100 MLA(hi, lo, (*fe)[1], ptr[14]);
1101 MLA(hi, lo, (*fe)[2], ptr[12]);
1102 MLA(hi, lo, (*fe)[3], ptr[10]);
1103 MLA(hi, lo, (*fe)[4], ptr[ 8]);
1104 MLA(hi, lo, (*fe)[5], ptr[ 6]);
1105 MLA(hi, lo, (*fe)[6], ptr[ 4]);
1106 MLA(hi, lo, (*fe)[7], ptr[ 2]);
1107*/
1108 PROD_EVEN_A(hi, lo, *fe, ptr);
1109 pcm[0] = SHIFT(MLZ(hi, lo)); 937 pcm[0] = SHIFT(MLZ(hi, lo));
1110 pcm += 16; 938 pcm += 16;
1111 939
1112 synth_full1(pcm, fo, fe, D0ptr, D1ptr); 940 synth_full_odd_band(pcm, fo, fe, D0ptr, D1ptr);
1113 D0ptr += 15; 941 D0ptr += 15;
1114 D1ptr += 15; 942 D1ptr += 15;
1115 fo += 15; 943 fo += 15;
1116 fe += 15; 944 fe += 15;
1117 945
1118 ptr = *(D0ptr + 1); 946 ptr = *(D0ptr + 1);
1119 PROD_ODD_0(hi, lo, *fo, ptr); 947 PROD_O(hi, lo, *fo, ptr+1);
1120/*
1121 ML0(hi, lo, (*fo)[0], ptr[ 1]);
1122 MLA(hi, lo, (*fo)[1], ptr[15]);
1123 MLA(hi, lo, (*fo)[2], ptr[13]);
1124 MLA(hi, lo, (*fo)[3], ptr[11]);
1125 MLA(hi, lo, (*fo)[4], ptr[ 9]);
1126 MLA(hi, lo, (*fo)[5], ptr[ 7]);
1127 MLA(hi, lo, (*fo)[6], ptr[ 5]);
1128 MLA(hi, lo, (*fo)[7], ptr[ 3]);
1129*/
1130 pcm[0] = SHIFT(-MLZ(hi, lo)); 948 pcm[0] = SHIFT(-MLZ(hi, lo));
1131 } 949 }
1132 else 950 else
1133 { 951 {
1134 ptr = *D0ptr; 952 ptr = *D0ptr;
1135/* 953 PROD_O(hi, lo, *fx, ptr);
1136 ML0(hi, lo, (*fx)[0], ptr[ 0]);
1137 MLA(hi, lo, (*fx)[1], ptr[14]);
1138 MLA(hi, lo, (*fx)[2], ptr[12]);
1139 MLA(hi, lo, (*fx)[3], ptr[10]);
1140 MLA(hi, lo, (*fx)[4], ptr[ 8]);
1141 MLA(hi, lo, (*fx)[5], ptr[ 6]);
1142 MLA(hi, lo, (*fx)[6], ptr[ 4]);
1143 MLA(hi, lo, (*fx)[7], ptr[ 2]);
1144*/
1145 PROD_EVEN_0(hi, lo, *fx, ptr);
1146 MLN(hi, lo); 954 MLN(hi, lo);
1147/* 955 PROD_A(hi, lo, *fe, ptr+1);
1148 MLA(hi, lo, (*fe)[0], ptr[ 1]);
1149 MLA(hi, lo, (*fe)[1], ptr[15]);
1150 MLA(hi, lo, (*fe)[2], ptr[13]);
1151 MLA(hi, lo, (*fe)[3], ptr[11]);
1152 MLA(hi, lo, (*fe)[4], ptr[ 9]);
1153 MLA(hi, lo, (*fe)[5], ptr[ 7]);
1154 MLA(hi, lo, (*fe)[6], ptr[ 5]);
1155 MLA(hi, lo, (*fe)[7], ptr[ 3]);
1156*/
1157 PROD_ODD_A(hi, lo, *fe, ptr);
1158 pcm[0] = SHIFT(MLZ(hi, lo)); 956 pcm[0] = SHIFT(MLZ(hi, lo));
1159 pcm += 16; 957 pcm += 16;
1160 958
1161 synth_full2(pcm, fo, fe, D0ptr, D1ptr); 959 synth_full_even_band(pcm, fo, fe, D0ptr, D1ptr);
1162 D0ptr += 15; 960 D0ptr += 15;
1163 D1ptr += 15; 961 D1ptr += 15;
1164 fo += 15; 962 fo += 15;
1165 fe += 15; 963 fe += 15;
1166 964
1167 ptr = *(D0ptr + 1); 965 ptr = *(D0ptr + 1);
1168/* 966 PROD_O(hi, lo, *fo, ptr);
1169 ML0(hi, lo, (*fo)[0], ptr[ 0]);
1170 MLA(hi, lo, (*fo)[1], ptr[14]);
1171 MLA(hi, lo, (*fo)[2], ptr[12]);
1172 MLA(hi, lo, (*fo)[3], ptr[10]);
1173 MLA(hi, lo, (*fo)[4], ptr[ 8]);
1174 MLA(hi, lo, (*fo)[5], ptr[ 6]);
1175 MLA(hi, lo, (*fo)[6], ptr[ 4]);
1176 MLA(hi, lo, (*fo)[7], ptr[ 2]);
1177*/
1178 PROD_EVEN_0(hi, lo, *fo, ptr);
1179 pcm[0] = SHIFT(-MLZ(hi, lo)); 967 pcm[0] = SHIFT(-MLZ(hi, lo));
1180 } 968 }
1181 969
@@ -1185,7 +973,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
1185 } 973 }
1186} 974}
1187 975
1188# else 976# else /* not FPM_COLDFIRE_EMAC and not FPM_ARM */
1189 977
1190static 978static
1191void synth_full(struct mad_synth *synth, struct mad_frame const *frame, 979void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index 419bf2b96e..c5848d3327 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -23,15 +23,15 @@
23 23
24 .section ICODE_SECTION_MPA_ARM,"ax",%progbits 24 .section ICODE_SECTION_MPA_ARM,"ax",%progbits
25 25
26 .global synth_full1 26 .global synth_full_odd_band
27 .global synth_full2 27 .global synth_full_even_band
28 28
29 ;; r0 = pcm 29 ;; r0 = pcm
30 ;; r1 = fo 30 ;; r1 = fo
31 ;; r2 = fe 31 ;; r2 = fe
32 ;; r3 = D0ptr 32 ;; r3 = D0ptr
33 ;; r4 = D1ptr 33 ;; r4 = D1ptr
34synth_full1: 34synth_full_odd_band:
35 stmdb sp!, {r4-r11, lr} 35 stmdb sp!, {r4-r11, lr}
36 ldr r4, [sp, #36] 36 ldr r4, [sp, #36]
37 ldr r5, =synth_full_sp 37 ldr r5, =synth_full_sp
@@ -137,7 +137,7 @@ synth_full1:
137 ldr sp, [r5] 137 ldr sp, [r5]
138 ldmia sp!, {r4-r11, pc} 138 ldmia sp!, {r4-r11, pc}
139 139
140synth_full2: 140synth_full_even_band:
141 stmdb sp!, {r4-r11, lr} 141 stmdb sp!, {r4-r11, lr}
142 ldr r4, [sp, #36] 142 ldr r4, [sp, #36]
143 ldr r5, =synth_full_sp 143 ldr r5, =synth_full_sp