summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_armv5te.h
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2010-03-03 20:52:02 +0000
committerJens Arnold <amiconn@rockbox.org>2010-03-03 20:52:02 +0000
commit0030ae28b551df94defe25c04c1859ba508e632e (patch)
tree7700cd67da8c2b191cc68402a83c4c674d470c1d /apps/codecs/demac/libdemac/vector_math16_armv5te.h
parentd9adfa1c739c54640fff1ac79c7a6ab8c75398b8 (diff)
downloadrockbox-0030ae28b551df94defe25c04c1859ba508e632e.tar.gz
rockbox-0030ae28b551df94defe25c04c1859ba508e632e.zip
Get rid of .rept in inline asm() blocks where possible. Using .rept causes gcc to wrongly estimate the size of the asm(), leading to (potential) compilation problems. This is necessary for the upcoming restructuring, and should fix ARMv6+ sim builds as well. No functional change.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25004 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv5te.h29
1 files changed, 15 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index 2940585a42..0a3679ce63 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#define REPEAT_3(x) x x x
30#if ORDER > 16
31#define REPEAT_MLA(x) x x x x x x x
32#else
33#define REPEAT_MLA(x) x x x
34#endif
35
29/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 36/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
30 * This version fetches data as 32 bit words, and *requires* v1 to be 37 * This version fetches data as 32 bit words, and *requires* v1 to be
31 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit 38 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
133 ADDHALFREGS(r1, r2, r4) 140 ADDHALFREGS(r1, r2, r4)
134 "stmia %[v1]!, {r0,r1} \n" 141 "stmia %[v1]!, {r0,r1} \n"
135 142
136 ".rept 3 \n" 143 REPEAT_3(
137 "ldmia %[v1], {r1,r2} \n" 144 "ldmia %[v1], {r1,r2} \n"
138 "ldmia %[f2]!, {r3,r4} \n" 145 "ldmia %[f2]!, {r3,r4} \n"
139 "smlabb %[res], r1, r3, %[res] \n" 146 "smlabb %[res], r1, r3, %[res] \n"
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
144 ADDHALFREGS(r0, r1, r3) 151 ADDHALFREGS(r0, r1, r3)
145 ADDHALFREGS(r1, r2, r4) 152 ADDHALFREGS(r1, r2, r4)
146 "stmia %[v1]!, {r0,r1} \n" 153 "stmia %[v1]!, {r0,r1} \n"
147 ".endr \n" 154 )
148#if ORDER > 16 155#if ORDER > 16
149 "subs %[cnt], %[cnt], #1 \n" 156 "subs %[cnt], %[cnt], #1 \n"
150 "bne 1b \n" 157 "bne 1b \n"
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
275 SUBHALFREGS(r1, r2, r4) 282 SUBHALFREGS(r1, r2, r4)
276 "stmia %[v1]!, {r0,r1} \n" 283 "stmia %[v1]!, {r0,r1} \n"
277 284
278 ".rept 3 \n" 285 REPEAT_3(
279 "ldmia %[v1], {r1,r2} \n" 286 "ldmia %[v1], {r1,r2} \n"
280 "ldmia %[f2]!, {r3,r4} \n" 287 "ldmia %[f2]!, {r3,r4} \n"
281 "smlabb %[res], r1, r3, %[res] \n" 288 "smlabb %[res], r1, r3, %[res] \n"
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
286 SUBHALFREGS(r0, r1, r3) 293 SUBHALFREGS(r0, r1, r3)
287 SUBHALFREGS(r1, r2, r4) 294 SUBHALFREGS(r1, r2, r4)
288 "stmia %[v1]!, {r0,r1} \n" 295 "stmia %[v1]!, {r0,r1} \n"
289 ".endr \n" 296 )
290#if ORDER > 16 297#if ORDER > 16
291 "subs %[cnt], %[cnt], #1 \n" 298 "subs %[cnt], %[cnt], #1 \n"
292 "bne 1b \n" 299 "bne 1b \n"
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
318 int cnt = ORDER>>5; 325 int cnt = ORDER>>5;
319#endif 326#endif
320 327
321#if ORDER > 16
322#define MLA_BLOCKS "7"
323#else
324#define MLA_BLOCKS "3"
325#endif
326
327 asm volatile ( 328 asm volatile (
328#if ORDER > 32 329#if ORDER > 32
329 "mov %[res], #0 \n" 330 "mov %[res], #0 \n"
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
347 "smlabt %[res], r1, r2, %[res] \n" 348 "smlabt %[res], r1, r2, %[res] \n"
348 "smlatb %[res], r1, r3, %[res] \n" 349 "smlatb %[res], r1, r3, %[res] \n"
349 350
350 ".rept " MLA_BLOCKS "\n" 351 REPEAT_MLA(
351 "ldmia %[v1]!, {r0,r1} \n" 352 "ldmia %[v1]!, {r0,r1} \n"
352 "smlabt %[res], r0, r3, %[res] \n" 353 "smlabt %[res], r0, r3, %[res] \n"
353 "ldmia %[v2]!, {r2,r3} \n" 354 "ldmia %[v2]!, {r2,r3} \n"
354 "smlatb %[res], r0, r2, %[res] \n" 355 "smlatb %[res], r0, r2, %[res] \n"
355 "smlabt %[res], r1, r2, %[res] \n" 356 "smlabt %[res], r1, r2, %[res] \n"
356 "smlatb %[res], r1, r3, %[res] \n" 357 "smlatb %[res], r1, r3, %[res] \n"
357 ".endr \n" 358 )
358#if ORDER > 32 359#if ORDER > 32
359 "subs %[cnt], %[cnt], #1 \n" 360 "subs %[cnt], %[cnt], #1 \n"
360 "bne 1b \n" 361 "bne 1b \n"
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
374 "smlabb %[res], r1, r3, %[res] \n" 375 "smlabb %[res], r1, r3, %[res] \n"
375 "smlatt %[res], r1, r3, %[res] \n" 376 "smlatt %[res], r1, r3, %[res] \n"
376 377
377 ".rept " MLA_BLOCKS "\n" 378 REPEAT_MLA(
378 "ldmia %[v1]!, {r0,r1} \n" 379 "ldmia %[v1]!, {r0,r1} \n"
379 "ldmia %[v2]!, {r2,r3} \n" 380 "ldmia %[v2]!, {r2,r3} \n"
380 "smlabb %[res], r0, r2, %[res] \n" 381 "smlabb %[res], r0, r2, %[res] \n"
381 "smlatt %[res], r0, r2, %[res] \n" 382 "smlatt %[res], r0, r2, %[res] \n"
382 "smlabb %[res], r1, r3, %[res] \n" 383 "smlabb %[res], r1, r3, %[res] \n"
383 "smlatt %[res], r1, r3, %[res] \n" 384 "smlatt %[res], r1, r3, %[res] \n"
384 ".endr \n" 385 )
385#if ORDER > 32 386#if ORDER > 32
386 "subs %[cnt], %[cnt], #1 \n" 387 "subs %[cnt], %[cnt], #1 \n"
387 "bne 1b \n" 388 "bne 1b \n"