summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:43 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-24 18:40:43 +0000
commit66c0cf2eb17158eec9d0cd2553481a2caf86e611 (patch)
tree9a226b31d0c7c9ef216136586c7336b0acd1db7d
parent7bf4e7bda74fe392039d15c063e5c3a6ae3cca58 (diff)
downloadrockbox-66c0cf2eb17158eec9d0cd2553481a2caf86e611.tar.gz
rockbox-66c0cf2eb17158eec9d0cd2553481a2caf86e611.zip
Tweak the ARMv6 filter assembly a bit further.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19198 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h49
1 files changed, 22 insertions, 27 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index cd27b271af..61471103bd 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -22,7 +22,7 @@ You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software 22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24 24
25*/ 25*/
26 26
27/* This version fetches data as 32 bit words, and *requires* v1 to be 27/* This version fetches data as 32 bit words, and *requires* v1 to be
28 * 32 bit aligned, otherwise it will result either in a data abort, or 28 * 32 bit aligned, otherwise it will result either in a data abort, or
@@ -44,9 +44,8 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
44 "beq 20f \n" 44 "beq 20f \n"
45 45
46 "10: \n" 46 "10: \n"
47 "ldrh r4, [%[v2]], #2 \n" 47 "bic %[v2], %[v2], #2 \n"
48 "ldr r5, [%[v2]], #4 \n" 48 "ldmia %[v2]!, {r4-r5} \n"
49 "mov r4, r4, lsl #16 \n"
50 "1: \n" 49 "1: \n"
51 ".rept " ADD_SUB_BLOCKS "\n" 50 ".rept " ADD_SUB_BLOCKS "\n"
52 "ldmia %[v2]!, {r6-r7} \n" 51 "ldmia %[v2]!, {r6-r7} \n"
@@ -114,9 +113,8 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
114 "beq 20f \n" 113 "beq 20f \n"
115 114
116 "10: \n" 115 "10: \n"
117 "ldrh r4, [%[v2]], #2 \n" 116 "bic %[v2], %[v2], #2 \n"
118 "ldr r5, [%[v2]], #4 \n" 117 "ldmia %[v2]!, {r4-r5} \n"
119 "mov r4, r4, lsl #16 \n"
120 "1: \n" 118 "1: \n"
121 ".rept " ADD_SUB_BLOCKS "\n" 119 ".rept " ADD_SUB_BLOCKS "\n"
122 "ldmia %[v2]!, {r6-r7} \n" 120 "ldmia %[v2]!, {r6-r7} \n"
@@ -194,51 +192,48 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
194 "beq 20f \n" 192 "beq 20f \n"
195 193
196 "10: \n" 194 "10: \n"
197 "ldrh r7, [%[v2]], #2 \n" 195 "bic %[v2], %[v2], #2 \n"
198 "ldmia %[v2]!, {r4-r5} \n" 196 "ldmia %[v2]!, {r5-r7} \n"
199 "ldmia %[v1]!, {r0-r1} \n" 197 "ldmia %[v1]!, {r0-r1} \n"
200#if ORDER > 32
201 "mov r7, r7, lsl #16 \n"
202 "1: \n" 198 "1: \n"
203 "pkhbt r8, r4, r7 \n" 199 "pkhbt r8, r6, r5 \n"
204 "ldmia %[v2]!, {r6-r7} \n" 200 "ldmia %[v2]!, {r4-r5} \n"
201#if ORDER > 32
205 "smladx %[res], r0, r8, %[res] \n" 202 "smladx %[res], r0, r8, %[res] \n"
206#else 203#else
207 "pkhbt r8, r4, r7, lsl #16 \n"
208 "ldmia %[v2]!, {r6-r7} \n"
209 "smuadx %[res], r0, r8 \n" 204 "smuadx %[res], r0, r8 \n"
210#endif 205#endif
211 ".rept " MLA_BLOCKS "\n" 206 ".rept " MLA_BLOCKS "\n"
212 "pkhbt r8, r5, r4 \n" 207 "pkhbt r8, r7, r6 \n"
213 "ldmia %[v1]!, {r2-r3} \n" 208 "ldmia %[v1]!, {r2-r3} \n"
214 "smladx %[res], r1, r8, %[res] \n" 209 "smladx %[res], r1, r8, %[res] \n"
215 "pkhbt r8, r6, r5 \n" 210 "pkhbt r8, r4, r7 \n"
216 "ldmia %[v2]!, {r4-r5} \n" 211 "ldmia %[v2]!, {r6-r7} \n"
217 "smladx %[res], r2, r8, %[res] \n" 212 "smladx %[res], r2, r8, %[res] \n"
218 "pkhbt r8, r7, r6 \n" 213 "pkhbt r8, r5, r4 \n"
219 "ldmia %[v1]!, {r0-r1} \n" 214 "ldmia %[v1]!, {r0-r1} \n"
220 "smladx %[res], r3, r8, %[res] \n" 215 "smladx %[res], r3, r8, %[res] \n"
221 "pkhbt r8, r4, r7 \n" 216 "pkhbt r8, r6, r5 \n"
222 "ldmia %[v2]!, {r6-r7} \n" 217 "ldmia %[v2]!, {r4-r5} \n"
223 "smladx %[res], r0, r8, %[res] \n" 218 "smladx %[res], r0, r8, %[res] \n"
224 ".endr \n" 219 ".endr \n"
225 220
226 "pkhbt r8, r5, r4 \n" 221 "pkhbt r8, r7, r6 \n"
227 "ldmia %[v1]!, {r2-r3} \n" 222 "ldmia %[v1]!, {r2-r3} \n"
228 "smladx %[res], r1, r8, %[res] \n" 223 "smladx %[res], r1, r8, %[res] \n"
229 "pkhbt r8, r6, r5 \n" 224 "pkhbt r8, r4, r7 \n"
230#if ORDER > 32 225#if ORDER > 32
231 "subs %[cnt], %[cnt], #1 \n" 226 "subs %[cnt], %[cnt], #1 \n"
232 "ldmneia %[v2]!, {r4-r5} \n" 227 "ldmneia %[v2]!, {r6-r7} \n"
233 "smladx %[res], r2, r8, %[res] \n" 228 "smladx %[res], r2, r8, %[res] \n"
234 "pkhbt r8, r7, r6 \n" 229 "pkhbt r8, r5, r4 \n"
235 "ldmneia %[v1]!, {r0-r1} \n" 230 "ldmneia %[v1]!, {r0-r1} \n"
236 "smladx %[res], r3, r8, %[res] \n" 231 "smladx %[res], r3, r8, %[res] \n"
237 "bne 1b \n" 232 "bne 1b \n"
238#else 233#else
239 "pkhbt r7, r7, r6 \n" 234 "pkhbt r5, r5, r4 \n"
240 "smladx %[res], r2, r8, %[res] \n" 235 "smladx %[res], r2, r8, %[res] \n"
241 "smladx %[res], r3, r7, %[res] \n" 236 "smladx %[res], r3, r5, %[res] \n"
242#endif 237#endif
243 "b 99f \n" 238 "b 99f \n"
244 239