diff options
author | Jens Arnold <amiconn@rockbox.org> | 2010-03-03 20:52:02 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2010-03-03 20:52:02 +0000 |
commit | 0030ae28b551df94defe25c04c1859ba508e632e (patch) | |
tree | 7700cd67da8c2b191cc68402a83c4c674d470c1d /apps/codecs/demac/libdemac/vector_math16_armv5te.h | |
parent | d9adfa1c739c54640fff1ac79c7a6ab8c75398b8 (diff) | |
download | rockbox-0030ae28b551df94defe25c04c1859ba508e632e.tar.gz rockbox-0030ae28b551df94defe25c04c1859ba508e632e.zip |
Get rid of .rept in inline asm() blocks where possible. Using .rept causes gcc to wrongly estimate the size of the asm(), leading to (potential) compilation problems. This is necessary for the upcoming restructuring, and should fix ARMv6+ sim builds as well. No functional change.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25004 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv5te.h | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h index 2940585a42..0a3679ce63 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h | |||
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
26 | 26 | ||
27 | #define FUSED_VECTOR_MATH | 27 | #define FUSED_VECTOR_MATH |
28 | 28 | ||
29 | #define REPEAT_3(x) x x x | ||
30 | #if ORDER > 16 | ||
31 | #define REPEAT_MLA(x) x x x x x x x | ||
32 | #else | ||
33 | #define REPEAT_MLA(x) x x x | ||
34 | #endif | ||
35 | |||
29 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 36 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
30 | * This version fetches data as 32 bit words, and *requires* v1 to be | 37 | * This version fetches data as 32 bit words, and *requires* v1 to be |
31 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | 38 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit |
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
133 | ADDHALFREGS(r1, r2, r4) | 140 | ADDHALFREGS(r1, r2, r4) |
134 | "stmia %[v1]!, {r0,r1} \n" | 141 | "stmia %[v1]!, {r0,r1} \n" |
135 | 142 | ||
136 | ".rept 3 \n" | 143 | REPEAT_3( |
137 | "ldmia %[v1], {r1,r2} \n" | 144 | "ldmia %[v1], {r1,r2} \n" |
138 | "ldmia %[f2]!, {r3,r4} \n" | 145 | "ldmia %[f2]!, {r3,r4} \n" |
139 | "smlabb %[res], r1, r3, %[res] \n" | 146 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
144 | ADDHALFREGS(r0, r1, r3) | 151 | ADDHALFREGS(r0, r1, r3) |
145 | ADDHALFREGS(r1, r2, r4) | 152 | ADDHALFREGS(r1, r2, r4) |
146 | "stmia %[v1]!, {r0,r1} \n" | 153 | "stmia %[v1]!, {r0,r1} \n" |
147 | ".endr \n" | 154 | ) |
148 | #if ORDER > 16 | 155 | #if ORDER > 16 |
149 | "subs %[cnt], %[cnt], #1 \n" | 156 | "subs %[cnt], %[cnt], #1 \n" |
150 | "bne 1b \n" | 157 | "bne 1b \n" |
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
275 | SUBHALFREGS(r1, r2, r4) | 282 | SUBHALFREGS(r1, r2, r4) |
276 | "stmia %[v1]!, {r0,r1} \n" | 283 | "stmia %[v1]!, {r0,r1} \n" |
277 | 284 | ||
278 | ".rept 3 \n" | 285 | REPEAT_3( |
279 | "ldmia %[v1], {r1,r2} \n" | 286 | "ldmia %[v1], {r1,r2} \n" |
280 | "ldmia %[f2]!, {r3,r4} \n" | 287 | "ldmia %[f2]!, {r3,r4} \n" |
281 | "smlabb %[res], r1, r3, %[res] \n" | 288 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
286 | SUBHALFREGS(r0, r1, r3) | 293 | SUBHALFREGS(r0, r1, r3) |
287 | SUBHALFREGS(r1, r2, r4) | 294 | SUBHALFREGS(r1, r2, r4) |
288 | "stmia %[v1]!, {r0,r1} \n" | 295 | "stmia %[v1]!, {r0,r1} \n" |
289 | ".endr \n" | 296 | ) |
290 | #if ORDER > 16 | 297 | #if ORDER > 16 |
291 | "subs %[cnt], %[cnt], #1 \n" | 298 | "subs %[cnt], %[cnt], #1 \n" |
292 | "bne 1b \n" | 299 | "bne 1b \n" |
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
318 | int cnt = ORDER>>5; | 325 | int cnt = ORDER>>5; |
319 | #endif | 326 | #endif |
320 | 327 | ||
321 | #if ORDER > 16 | ||
322 | #define MLA_BLOCKS "7" | ||
323 | #else | ||
324 | #define MLA_BLOCKS "3" | ||
325 | #endif | ||
326 | |||
327 | asm volatile ( | 328 | asm volatile ( |
328 | #if ORDER > 32 | 329 | #if ORDER > 32 |
329 | "mov %[res], #0 \n" | 330 | "mov %[res], #0 \n" |
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
347 | "smlabt %[res], r1, r2, %[res] \n" | 348 | "smlabt %[res], r1, r2, %[res] \n" |
348 | "smlatb %[res], r1, r3, %[res] \n" | 349 | "smlatb %[res], r1, r3, %[res] \n" |
349 | 350 | ||
350 | ".rept " MLA_BLOCKS "\n" | 351 | REPEAT_MLA( |
351 | "ldmia %[v1]!, {r0,r1} \n" | 352 | "ldmia %[v1]!, {r0,r1} \n" |
352 | "smlabt %[res], r0, r3, %[res] \n" | 353 | "smlabt %[res], r0, r3, %[res] \n" |
353 | "ldmia %[v2]!, {r2,r3} \n" | 354 | "ldmia %[v2]!, {r2,r3} \n" |
354 | "smlatb %[res], r0, r2, %[res] \n" | 355 | "smlatb %[res], r0, r2, %[res] \n" |
355 | "smlabt %[res], r1, r2, %[res] \n" | 356 | "smlabt %[res], r1, r2, %[res] \n" |
356 | "smlatb %[res], r1, r3, %[res] \n" | 357 | "smlatb %[res], r1, r3, %[res] \n" |
357 | ".endr \n" | 358 | ) |
358 | #if ORDER > 32 | 359 | #if ORDER > 32 |
359 | "subs %[cnt], %[cnt], #1 \n" | 360 | "subs %[cnt], %[cnt], #1 \n" |
360 | "bne 1b \n" | 361 | "bne 1b \n" |
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
374 | "smlabb %[res], r1, r3, %[res] \n" | 375 | "smlabb %[res], r1, r3, %[res] \n" |
375 | "smlatt %[res], r1, r3, %[res] \n" | 376 | "smlatt %[res], r1, r3, %[res] \n" |
376 | 377 | ||
377 | ".rept " MLA_BLOCKS "\n" | 378 | REPEAT_MLA( |
378 | "ldmia %[v1]!, {r0,r1} \n" | 379 | "ldmia %[v1]!, {r0,r1} \n" |
379 | "ldmia %[v2]!, {r2,r3} \n" | 380 | "ldmia %[v2]!, {r2,r3} \n" |
380 | "smlabb %[res], r0, r2, %[res] \n" | 381 | "smlabb %[res], r0, r2, %[res] \n" |
381 | "smlatt %[res], r0, r2, %[res] \n" | 382 | "smlatt %[res], r0, r2, %[res] \n" |
382 | "smlabb %[res], r1, r3, %[res] \n" | 383 | "smlabb %[res], r1, r3, %[res] \n" |
383 | "smlatt %[res], r1, r3, %[res] \n" | 384 | "smlatt %[res], r1, r3, %[res] \n" |
384 | ".endr \n" | 385 | ) |
385 | #if ORDER > 32 | 386 | #if ORDER > 32 |
386 | "subs %[cnt], %[cnt], #1 \n" | 387 | "subs %[cnt], %[cnt], #1 \n" |
387 | "bne 1b \n" | 388 | "bne 1b \n" |