summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-10-03 12:30:18 +0000
committerJens Arnold <amiconn@rockbox.org>2008-10-03 12:30:18 +0000
commitd456460707f79ec48d08baf5d8f28c88c9641e64 (patch)
treea922a1829e3a90886f4eacc698f92c6b749dcd93
parent7fc446263f99aad5f0b2f9f674fde02e6eac4d5c (diff)
downloadrockbox-d456460707f79ec48d08baf5d8f28c88c9641e64.tar.gz
rockbox-d456460707f79ec48d08baf5d8f28c88c9641e64.zip
Further speedup for ARMv6 by better pipelining in scalarproduct().
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18697 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h80
1 files changed, 53 insertions, 27 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index e963e10ff0..bf50d9cabd 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -217,54 +217,80 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
217 "beq 20f \n" 217 "beq 20f \n"
218 218
219 "10: \n" 219 "10: \n"
220 "ldrh r4, [%[v2]], #2 \n" 220 "ldrh r2, [%[v2]], #2 \n"
221 "mov r4, r4, lsl #16 \n" 221 "ldr r0, [%[v1]], #4 \n"
222 "ldr r3, [%[v2]], #4 \n"
223 "mov r2, r2, lsl #16 \n"
222 "1: \n" 224 "1: \n"
223 "ldmia %[v1]!, {r0-r3} \n" 225 "ldr r1, [%[v1]], #4 \n"
224 "ldmia %[v2]!, {r5-r8} \n" 226 "smlabt %[res], r0, r2, %[res] \n"
227 "ldr r4, [%[v2]], #4 \n"
228 "smlatb %[res], r0, r3, %[res] \n"
229 "ldr r0, [%[v1]], #4 \n"
230 "smlabt %[res], r1, r3, %[res] \n"
231 "ldr r5, [%[v2]], #4 \n"
232 "smlatb %[res], r1, r4, %[res] \n"
233 "ldr r1, [%[v1]], #4 \n"
225 "smlabt %[res], r0, r4, %[res] \n" 234 "smlabt %[res], r0, r4, %[res] \n"
235 "ldr r6, [%[v2]], #4 \n"
226 "smlatb %[res], r0, r5, %[res] \n" 236 "smlatb %[res], r0, r5, %[res] \n"
237 "ldr r0, [%[v1]], #4 \n"
227 "smlabt %[res], r1, r5, %[res] \n" 238 "smlabt %[res], r1, r5, %[res] \n"
239 "ldr r3, [%[v2]], #4 \n"
228 "smlatb %[res], r1, r6, %[res] \n" 240 "smlatb %[res], r1, r6, %[res] \n"
229 "smlabt %[res], r2, r6, %[res] \n" 241 "mov r2, r6 \n"
230 "smlatb %[res], r2, r7, %[res] \n" 242 "ldr r1, [%[v1]], #4 \n"
231 "smlabt %[res], r3, r7, %[res] \n" 243 "smlabt %[res], r0, r2, %[res] \n"
232 "smlatb %[res], r3, r8, %[res] \n" 244 "ldr r4, [%[v2]], #4 \n"
233 "mov r4, r8 \n" 245 "smlatb %[res], r0, r3, %[res] \n"
234 "ldmia %[v1]!, {r0-r3} \n" 246 "ldr r0, [%[v1]], #4 \n"
235 "ldmia %[v2]!, {r5-r8} \n" 247 "smlabt %[res], r1, r3, %[res] \n"
248 "ldr r5, [%[v2]], #4 \n"
249 "smlatb %[res], r1, r4, %[res] \n"
250 "ldr r1, [%[v1]], #4 \n"
236 "smlabt %[res], r0, r4, %[res] \n" 251 "smlabt %[res], r0, r4, %[res] \n"
252 "ldr r6, [%[v2]], #4 \n"
237 "smlatb %[res], r0, r5, %[res] \n" 253 "smlatb %[res], r0, r5, %[res] \n"
238 "smlabt %[res], r1, r5, %[res] \n"
239 "smlatb %[res], r1, r6, %[res] \n"
240 "smlabt %[res], r2, r6, %[res] \n"
241 "smlatb %[res], r2, r7, %[res] \n"
242 "smlabt %[res], r3, r7, %[res] \n"
243 "smlatb %[res], r3, r8, %[res] \n"
244#if ORDER > 16 254#if ORDER > 16
245 "mov r4, r8 \n"
246 "subs %[cnt], %[cnt], #1 \n" 255 "subs %[cnt], %[cnt], #1 \n"
256 "ldrne r0, [%[v1]], #4 \n"
257 "smlabt %[res], r1, r5, %[res] \n"
258 "ldrne r3, [%[v2]], #4 \n"
259 "smlatb %[res], r1, r6, %[res] \n"
260 "mov r2, r6 \n"
247 "bne 1b \n" 261 "bne 1b \n"
262#else
263 "smlabt %[res], r1, r5, %[res] \n"
264 "smlatb %[res], r1, r6, %[res] \n"
248#endif 265#endif
249 "b 99f \n" 266 "b 99f \n"
250 267
251 "20: \n" 268 "20: \n"
269 "ldmia %[v1]!, {r0-r1} \n"
270 "ldmia %[v2]!, {r4-r5} \n"
252 "1: \n" 271 "1: \n"
253 "ldmia %[v1]!, {r0-r3} \n" 272 "ldmia %[v1]!, {r2-r3} \n"
254 "ldmia %[v2]!, {r4-r7} \n"
255 "smlad %[res], r0, r4, %[res] \n" 273 "smlad %[res], r0, r4, %[res] \n"
274 "ldmia %[v2]!, {r6-r7} \n"
256 "smlad %[res], r1, r5, %[res] \n" 275 "smlad %[res], r1, r5, %[res] \n"
276 "ldmia %[v1]!, {r0-r1} \n"
257 "smlad %[res], r2, r6, %[res] \n" 277 "smlad %[res], r2, r6, %[res] \n"
278 "ldmia %[v2]!, {r4-r5} \n"
258 "smlad %[res], r3, r7, %[res] \n" 279 "smlad %[res], r3, r7, %[res] \n"
259 "ldmia %[v1]!, {r0-r3} \n" 280 "ldmia %[v1]!, {r2-r3} \n"
260 "ldmia %[v2]!, {r4-r7} \n"
261 "smlad %[res], r0, r4, %[res] \n" 281 "smlad %[res], r0, r4, %[res] \n"
282 "ldmia %[v2]!, {r6-r7} \n"
262 "smlad %[res], r1, r5, %[res] \n" 283 "smlad %[res], r1, r5, %[res] \n"
263 "smlad %[res], r2, r6, %[res] \n"
264 "smlad %[res], r3, r7, %[res] \n"
265#if ORDER > 16 284#if ORDER > 16
266 "subs %[cnt], %[cnt], #1 \n" 285 "subs %[cnt], %[cnt], #1 \n"
267 "bne 1b \n" 286 "ldmneia %[v1]!, {r0-r1} \n"
287 "smlad %[res], r2, r6, %[res] \n"
288 "ldmneia %[v2]!, {r4-r5} \n"
289 "smlad %[res], r3, r7, %[res] \n"
290 "bne 1b \n"
291#else
292 "smlad %[res], r2, r6, %[res] \n"
293 "smlad %[res], r3, r7, %[res] \n"
268#endif 294#endif
269 295
270 "99: \n" 296 "99: \n"
@@ -277,8 +303,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
277 [res]"+r"(res) 303 [res]"+r"(res)
278 : /* inputs */ 304 : /* inputs */
279 : /* clobbers */ 305 : /* clobbers */
280 "r0", "r1", "r2", "r3", "r4", 306 "r0", "r1", "r2", "r3",
281 "r5", "r6", "r7", "r8" 307 "r4", "r5", "r6", "r7"
282 ); 308 );
283 return res; 309 return res;
284} 310}