summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2010-03-03 20:52:02 +0000
committerJens Arnold <amiconn@rockbox.org>2010-03-03 20:52:02 +0000
commit0030ae28b551df94defe25c04c1859ba508e632e (patch)
tree7700cd67da8c2b191cc68402a83c4c674d470c1d
parentd9adfa1c739c54640fff1ac79c7a6ab8c75398b8 (diff)
downloadrockbox-0030ae28b551df94defe25c04c1859ba508e632e.tar.gz
rockbox-0030ae28b551df94defe25c04c1859ba508e632e.zip
Get rid of .rept in inline asm() blocks where possible. Using .rept causes gcc to wrongly estimate the size of the asm(), leading to (potential) compilation problems. This is necessary for the upcoming restructuring, and should fix ARMv6+ sim builds as well. No functional change.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25004 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv5te.h29
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h34
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_cf.h20
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h18
4 files changed, 53 insertions, 48 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index 2940585a42..0a3679ce63 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#define REPEAT_3(x) x x x
30#if ORDER > 16
31#define REPEAT_MLA(x) x x x x x x x
32#else
33#define REPEAT_MLA(x) x x x
34#endif
35
29/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 36/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
30 * This version fetches data as 32 bit words, and *requires* v1 to be 37 * This version fetches data as 32 bit words, and *requires* v1 to be
31 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit 38 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
133 ADDHALFREGS(r1, r2, r4) 140 ADDHALFREGS(r1, r2, r4)
134 "stmia %[v1]!, {r0,r1} \n" 141 "stmia %[v1]!, {r0,r1} \n"
135 142
136 ".rept 3 \n" 143 REPEAT_3(
137 "ldmia %[v1], {r1,r2} \n" 144 "ldmia %[v1], {r1,r2} \n"
138 "ldmia %[f2]!, {r3,r4} \n" 145 "ldmia %[f2]!, {r3,r4} \n"
139 "smlabb %[res], r1, r3, %[res] \n" 146 "smlabb %[res], r1, r3, %[res] \n"
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
144 ADDHALFREGS(r0, r1, r3) 151 ADDHALFREGS(r0, r1, r3)
145 ADDHALFREGS(r1, r2, r4) 152 ADDHALFREGS(r1, r2, r4)
146 "stmia %[v1]!, {r0,r1} \n" 153 "stmia %[v1]!, {r0,r1} \n"
147 ".endr \n" 154 )
148#if ORDER > 16 155#if ORDER > 16
149 "subs %[cnt], %[cnt], #1 \n" 156 "subs %[cnt], %[cnt], #1 \n"
150 "bne 1b \n" 157 "bne 1b \n"
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
275 SUBHALFREGS(r1, r2, r4) 282 SUBHALFREGS(r1, r2, r4)
276 "stmia %[v1]!, {r0,r1} \n" 283 "stmia %[v1]!, {r0,r1} \n"
277 284
278 ".rept 3 \n" 285 REPEAT_3(
279 "ldmia %[v1], {r1,r2} \n" 286 "ldmia %[v1], {r1,r2} \n"
280 "ldmia %[f2]!, {r3,r4} \n" 287 "ldmia %[f2]!, {r3,r4} \n"
281 "smlabb %[res], r1, r3, %[res] \n" 288 "smlabb %[res], r1, r3, %[res] \n"
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
286 SUBHALFREGS(r0, r1, r3) 293 SUBHALFREGS(r0, r1, r3)
287 SUBHALFREGS(r1, r2, r4) 294 SUBHALFREGS(r1, r2, r4)
288 "stmia %[v1]!, {r0,r1} \n" 295 "stmia %[v1]!, {r0,r1} \n"
289 ".endr \n" 296 )
290#if ORDER > 16 297#if ORDER > 16
291 "subs %[cnt], %[cnt], #1 \n" 298 "subs %[cnt], %[cnt], #1 \n"
292 "bne 1b \n" 299 "bne 1b \n"
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
318 int cnt = ORDER>>5; 325 int cnt = ORDER>>5;
319#endif 326#endif
320 327
321#if ORDER > 16
322#define MLA_BLOCKS "7"
323#else
324#define MLA_BLOCKS "3"
325#endif
326
327 asm volatile ( 328 asm volatile (
328#if ORDER > 32 329#if ORDER > 32
329 "mov %[res], #0 \n" 330 "mov %[res], #0 \n"
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
347 "smlabt %[res], r1, r2, %[res] \n" 348 "smlabt %[res], r1, r2, %[res] \n"
348 "smlatb %[res], r1, r3, %[res] \n" 349 "smlatb %[res], r1, r3, %[res] \n"
349 350
350 ".rept " MLA_BLOCKS "\n" 351 REPEAT_MLA(
351 "ldmia %[v1]!, {r0,r1} \n" 352 "ldmia %[v1]!, {r0,r1} \n"
352 "smlabt %[res], r0, r3, %[res] \n" 353 "smlabt %[res], r0, r3, %[res] \n"
353 "ldmia %[v2]!, {r2,r3} \n" 354 "ldmia %[v2]!, {r2,r3} \n"
354 "smlatb %[res], r0, r2, %[res] \n" 355 "smlatb %[res], r0, r2, %[res] \n"
355 "smlabt %[res], r1, r2, %[res] \n" 356 "smlabt %[res], r1, r2, %[res] \n"
356 "smlatb %[res], r1, r3, %[res] \n" 357 "smlatb %[res], r1, r3, %[res] \n"
357 ".endr \n" 358 )
358#if ORDER > 32 359#if ORDER > 32
359 "subs %[cnt], %[cnt], #1 \n" 360 "subs %[cnt], %[cnt], #1 \n"
360 "bne 1b \n" 361 "bne 1b \n"
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
374 "smlabb %[res], r1, r3, %[res] \n" 375 "smlabb %[res], r1, r3, %[res] \n"
375 "smlatt %[res], r1, r3, %[res] \n" 376 "smlatt %[res], r1, r3, %[res] \n"
376 377
377 ".rept " MLA_BLOCKS "\n" 378 REPEAT_MLA(
378 "ldmia %[v1]!, {r0,r1} \n" 379 "ldmia %[v1]!, {r0,r1} \n"
379 "ldmia %[v2]!, {r2,r3} \n" 380 "ldmia %[v2]!, {r2,r3} \n"
380 "smlabb %[res], r0, r2, %[res] \n" 381 "smlabb %[res], r0, r2, %[res] \n"
381 "smlatt %[res], r0, r2, %[res] \n" 382 "smlatt %[res], r0, r2, %[res] \n"
382 "smlabb %[res], r1, r3, %[res] \n" 383 "smlabb %[res], r1, r3, %[res] \n"
383 "smlatt %[res], r1, r3, %[res] \n" 384 "smlatt %[res], r1, r3, %[res] \n"
384 ".endr \n" 385 )
385#if ORDER > 32 386#if ORDER > 32
386 "subs %[cnt], %[cnt], #1 \n" 387 "subs %[cnt], %[cnt], #1 \n"
387 "bne 1b \n" 388 "bne 1b \n"
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index 0ace6c5811..2ce62728cb 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -22,14 +22,14 @@ You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software 22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24 24
25*/ 25*/
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#if ORDER > 16 29#if ORDER > 16
30#define BLOCK_REPEAT "3" 30#define REPEAT_BLOCK(x) x x x
31#else 31#else
32#define BLOCK_REPEAT "1" 32#define REPEAT_BLOCK(x) x
33#endif 33#endif
34 34
35/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
@@ -77,7 +77,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
77 "sadd16 r1, r1, r5 \n" 77 "sadd16 r1, r1, r5 \n"
78 "strd r0, [%[v1]], #8 \n" 78 "strd r0, [%[v1]], #8 \n"
79 79
80 ".rept " BLOCK_REPEAT "\n" 80 REPEAT_BLOCK(
81 "ldmia %[s2]!, {r5,r6} \n" 81 "ldmia %[s2]!, {r5,r6} \n"
82 "pkhtb r4, r4, r2 \n" 82 "pkhtb r4, r4, r2 \n"
83 "pkhtb r2, r2, r3 \n" 83 "pkhtb r2, r2, r3 \n"
@@ -104,7 +104,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
104 "sadd16 r0, r0, r6 \n" 104 "sadd16 r0, r0, r6 \n"
105 "sadd16 r1, r1, r5 \n" 105 "sadd16 r1, r1, r5 \n"
106 "strd r0, [%[v1]], #8 \n" 106 "strd r0, [%[v1]], #8 \n"
107 ".endr \n" 107 )
108 108
109 "ldmia %[s2]!, {r5,r6} \n" 109 "ldmia %[s2]!, {r5,r6} \n"
110 "pkhtb r4, r4, r2 \n" 110 "pkhtb r4, r4, r2 \n"
@@ -148,7 +148,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
148 "sadd16 r1, r1, r7 \n" 148 "sadd16 r1, r1, r7 \n"
149 "strd r0, [%[v1]], #8 \n" 149 "strd r0, [%[v1]], #8 \n"
150 150
151 ".rept " BLOCK_REPEAT "\n" 151 REPEAT_BLOCK(
152 "smlad %[res], r2, r4, %[res] \n" 152 "smlad %[res], r2, r4, %[res] \n"
153 "ldrd r6, [%[s2]], #8 \n" 153 "ldrd r6, [%[s2]], #8 \n"
154 "smlad %[res], r3, r5, %[res] \n" 154 "smlad %[res], r3, r5, %[res] \n"
@@ -165,7 +165,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
165 "sadd16 r0, r0, r6 \n" 165 "sadd16 r0, r0, r6 \n"
166 "sadd16 r1, r1, r7 \n" 166 "sadd16 r1, r1, r7 \n"
167 "strd r0, [%[v1]], #8 \n" 167 "strd r0, [%[v1]], #8 \n"
168 ".endr \n" 168 )
169 169
170 "smlad %[res], r2, r4, %[res] \n" 170 "smlad %[res], r2, r4, %[res] \n"
171 "ldrd r6, [%[s2]], #8 \n" 171 "ldrd r6, [%[s2]], #8 \n"
@@ -246,7 +246,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
246 "ssub16 r1, r1, r5 \n" 246 "ssub16 r1, r1, r5 \n"
247 "strd r0, [%[v1]], #8 \n" 247 "strd r0, [%[v1]], #8 \n"
248 248
249 ".rept " BLOCK_REPEAT "\n" 249 REPEAT_BLOCK(
250 "ldmia %[s2]!, {r5,r6} \n" 250 "ldmia %[s2]!, {r5,r6} \n"
251 "pkhtb r4, r4, r2 \n" 251 "pkhtb r4, r4, r2 \n"
252 "pkhtb r2, r2, r3 \n" 252 "pkhtb r2, r2, r3 \n"
@@ -273,7 +273,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
273 "ssub16 r0, r0, r6 \n" 273 "ssub16 r0, r0, r6 \n"
274 "ssub16 r1, r1, r5 \n" 274 "ssub16 r1, r1, r5 \n"
275 "strd r0, [%[v1]], #8 \n" 275 "strd r0, [%[v1]], #8 \n"
276 ".endr \n" 276 )
277 277
278 "ldmia %[s2]!, {r5,r6} \n" 278 "ldmia %[s2]!, {r5,r6} \n"
279 "pkhtb r4, r4, r2 \n" 279 "pkhtb r4, r4, r2 \n"
@@ -317,7 +317,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
317 "ssub16 r1, r1, r7 \n" 317 "ssub16 r1, r1, r7 \n"
318 "strd r0, [%[v1]], #8 \n" 318 "strd r0, [%[v1]], #8 \n"
319 319
320 ".rept " BLOCK_REPEAT "\n" 320 REPEAT_BLOCK(
321 "smlad %[res], r2, r4, %[res] \n" 321 "smlad %[res], r2, r4, %[res] \n"
322 "ldrd r6, [%[s2]], #8 \n" 322 "ldrd r6, [%[s2]], #8 \n"
323 "smlad %[res], r3, r5, %[res] \n" 323 "smlad %[res], r3, r5, %[res] \n"
@@ -334,7 +334,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
334 "ssub16 r0, r0, r6 \n" 334 "ssub16 r0, r0, r6 \n"
335 "ssub16 r1, r1, r7 \n" 335 "ssub16 r1, r1, r7 \n"
336 "strd r0, [%[v1]], #8 \n" 336 "strd r0, [%[v1]], #8 \n"
337 ".endr \n" 337 )
338 338
339 "smlad %[res], r2, r4, %[res] \n" 339 "smlad %[res], r2, r4, %[res] \n"
340 "ldrd r6, [%[s2]], #8 \n" 340 "ldrd r6, [%[s2]], #8 \n"
@@ -400,7 +400,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
400#else 400#else
401 "smuadx %[res], r0, r3 \n" 401 "smuadx %[res], r0, r3 \n"
402#endif 402#endif
403 ".rept " BLOCK_REPEAT "\n" 403 REPEAT_BLOCK(
404 "pkhtb r0, r6, r7 \n" 404 "pkhtb r0, r6, r7 \n"
405 "ldrd r2, [%[v1]], #8 \n" 405 "ldrd r2, [%[v1]], #8 \n"
406 "smladx %[res], r1, r0, %[res] \n" 406 "smladx %[res], r1, r0, %[res] \n"
@@ -413,8 +413,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
413 "pkhtb r3, r5, r6 \n" 413 "pkhtb r3, r5, r6 \n"
414 "ldrd r4, [%[v2]], #8 \n" 414 "ldrd r4, [%[v2]], #8 \n"
415 "smladx %[res], r0, r3, %[res] \n" 415 "smladx %[res], r0, r3, %[res] \n"
416 ".endr \n" 416 )
417 417
418 "pkhtb r0, r6, r7 \n" 418 "pkhtb r0, r6, r7 \n"
419 "ldrd r2, [%[v1]], #8 \n" 419 "ldrd r2, [%[v1]], #8 \n"
420 "smladx %[res], r1, r0, %[res] \n" 420 "smladx %[res], r1, r0, %[res] \n"
@@ -434,7 +434,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
434#endif 434#endif
435 435
436 "b 99f \n" 436 "b 99f \n"
437 437
438 "20: \n" 438 "20: \n"
439 "ldrd r0, [%[v1]], #8 \n" 439 "ldrd r0, [%[v1]], #8 \n"
440 "ldmia %[v2]!, {r5-r7} \n" 440 "ldmia %[v2]!, {r5-r7} \n"
@@ -446,7 +446,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
446#else 446#else
447 "smuad %[res], r0, r5 \n" 447 "smuad %[res], r0, r5 \n"
448#endif 448#endif
449 ".rept " BLOCK_REPEAT "\n" 449 REPEAT_BLOCK(
450 "ldrd r4, [%[v2]], #8 \n" 450 "ldrd r4, [%[v2]], #8 \n"
451 "smlad %[res], r1, r6, %[res] \n" 451 "smlad %[res], r1, r6, %[res] \n"
452 "ldrd r0, [%[v1]], #8 \n" 452 "ldrd r0, [%[v1]], #8 \n"
@@ -455,7 +455,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
455 "smlad %[res], r3, r4, %[res] \n" 455 "smlad %[res], r3, r4, %[res] \n"
456 "ldrd r2, [%[v1]], #8 \n" 456 "ldrd r2, [%[v1]], #8 \n"
457 "smlad %[res], r0, r5, %[res] \n" 457 "smlad %[res], r0, r5, %[res] \n"
458 ".endr \n" 458 )
459 459
460#if ORDER > 32 460#if ORDER > 32
461 "ldrd r4, [%[v2]], #8 \n" 461 "ldrd r4, [%[v2]], #8 \n"
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 6e8216c9cc..4d77d3be31 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
28 28
29#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ 29#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
30 30
31#define REPEAT_2(x) x x
32#define REPEAT_3(x) x x x
33#define REPEAT_7(x) x x x x x x x
34
31/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
32 * This version fetches data as 32 bit words, and *recommends* v1 to be 36 * This version fetches data as 32 bit words, and *recommends* v1 to be
33 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit 37 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
@@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
64 "move.w (%[s2])+, %%d1 \n" 68 "move.w (%[s2])+, %%d1 \n"
65 "swap %%d1 \n" 69 "swap %%d1 \n"
66 "1: \n" 70 "1: \n"
67 ".rept 2 \n" 71 REPEAT_2(
68 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" 72 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
69 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" 73 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
70 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" 74 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
82 "move.l %%d6, (%[v1])+ \n" 86 "move.l %%d6, (%[v1])+ \n"
83 ADDHALFXREGS(%%a1, %%d1, %%d7) 87 ADDHALFXREGS(%%a1, %%d1, %%d7)
84 "move.l %%d7, (%[v1])+ \n" 88 "move.l %%d7, (%[v1])+ \n"
85 ".endr \n" 89 )
86 90
87#if ORDER > 16 91#if ORDER > 16
88 "subq.l #1, %[res] \n" 92 "subq.l #1, %[res] \n"
@@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
193 "move.w (%[s2])+, %%d1 \n" 197 "move.w (%[s2])+, %%d1 \n"
194 "swap %%d1 \n" 198 "swap %%d1 \n"
195 "1: \n" 199 "1: \n"
196 ".rept 2 \n" 200 REPEAT_2(
197 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" 201 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
198 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" 202 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
199 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" 203 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
211 "move.l %%d6, (%[v1])+ \n" 215 "move.l %%d6, (%[v1])+ \n"
212 SUBHALFXREGS(%%a1, %%d1, %%d7) 216 SUBHALFXREGS(%%a1, %%d1, %%d7)
213 "move.l %%d7, (%[v1])+ \n" 217 "move.l %%d7, (%[v1])+ \n"
214 ".endr \n" 218 )
215 219
216#if ORDER > 16 220#if ORDER > 16
217 "subq.l #1, %[res] \n" 221 "subq.l #1, %[res] \n"
@@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
305 "move.l (%[v1])+, %%d0 \n" 309 "move.l (%[v1])+, %%d0 \n"
306 "move.w (%[v2])+, %%d1 \n" 310 "move.w (%[v2])+, %%d1 \n"
307 "1: \n" 311 "1: \n"
308 ".rept 7 \n" 312 REPEAT_7(
309 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" 313 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
310 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" 314 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
311 ".endr \n" 315 )
312 316
313 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" 317 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
314#if ORDER > 16 318#if ORDER > 16
@@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
324 "move.l (%[v1])+, %%d0 \n" 328 "move.l (%[v1])+, %%d0 \n"
325 "move.l (%[v2])+, %%d1 \n" 329 "move.l (%[v2])+, %%d1 \n"
326 "1: \n" 330 "1: \n"
327 ".rept 3 \n" 331 REPEAT_3(
328 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" 332 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
329 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 333 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
330 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" 334 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
331 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 335 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
332 ".endr \n" 336 )
333 337
334 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" 338 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
335 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 339 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
index 207fca3038..cb5fe9e0ee 100644
--- a/apps/codecs/demac/libdemac/vector_math32_armv4.h
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -27,11 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#if ORDER > 32 29#if ORDER > 32
30#define BLOCK_REPEAT "8" 30#define REPEAT_BLOCK(x) x x x x x x x x
31#elif ORDER > 16 31#elif ORDER > 16
32#define BLOCK_REPEAT "7" 32#define REPEAT_BLOCK(x) x x x x x x x
33#else 33#else
34#define BLOCK_REPEAT "3" 34#define REPEAT_BLOCK(x) x x x
35#endif 35#endif
36 36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ 37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
@@ -60,7 +60,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
60 "add r3, r3, r7 \n" 60 "add r3, r3, r7 \n"
61 "stmia %[v1]!, {r0-r3} \n" 61 "stmia %[v1]!, {r0-r3} \n"
62#endif 62#endif
63 ".rept " BLOCK_REPEAT "\n" 63 REPEAT_BLOCK(
64 "ldmia %[v1], {r0-r3} \n" 64 "ldmia %[v1], {r0-r3} \n"
65 "ldmia %[f2]!, {r4-r7} \n" 65 "ldmia %[f2]!, {r4-r7} \n"
66 "mla %[res], r4, r0, %[res] \n" 66 "mla %[res], r4, r0, %[res] \n"
@@ -73,7 +73,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
73 "add r2, r2, r6 \n" 73 "add r2, r2, r6 \n"
74 "add r3, r3, r7 \n" 74 "add r3, r3, r7 \n"
75 "stmia %[v1]!, {r0-r3} \n" 75 "stmia %[v1]!, {r0-r3} \n"
76 ".endr \n" 76 )
77#if ORDER > 32 77#if ORDER > 32
78 "subs %[cnt], %[cnt], #1 \n" 78 "subs %[cnt], %[cnt], #1 \n"
79 "bne 1b \n" 79 "bne 1b \n"
@@ -120,7 +120,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
120 "sub r3, r3, r7 \n" 120 "sub r3, r3, r7 \n"
121 "stmia %[v1]!, {r0-r3} \n" 121 "stmia %[v1]!, {r0-r3} \n"
122#endif 122#endif
123 ".rept " BLOCK_REPEAT "\n" 123 REPEAT_BLOCK(
124 "ldmia %[v1], {r0-r3} \n" 124 "ldmia %[v1], {r0-r3} \n"
125 "ldmia %[f2]!, {r4-r7} \n" 125 "ldmia %[f2]!, {r4-r7} \n"
126 "mla %[res], r4, r0, %[res] \n" 126 "mla %[res], r4, r0, %[res] \n"
@@ -133,7 +133,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
133 "sub r2, r2, r6 \n" 133 "sub r2, r2, r6 \n"
134 "sub r3, r3, r7 \n" 134 "sub r3, r3, r7 \n"
135 "stmia %[v1]!, {r0-r3} \n" 135 "stmia %[v1]!, {r0-r3} \n"
136 ".endr \n" 136 )
137#if ORDER > 32 137#if ORDER > 32
138 "subs %[cnt], %[cnt], #1 \n" 138 "subs %[cnt], %[cnt], #1 \n"
139 "bne 1b \n" 139 "bne 1b \n"
@@ -173,14 +173,14 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
173 "mla %[res], r6, r2, %[res] \n" 173 "mla %[res], r6, r2, %[res] \n"
174 "mla %[res], r7, r3, %[res] \n" 174 "mla %[res], r7, r3, %[res] \n"
175#endif 175#endif
176 ".rept " BLOCK_REPEAT "\n" 176 REPEAT_BLOCK(
177 "ldmia %[v1]!, {r0-r3} \n" 177 "ldmia %[v1]!, {r0-r3} \n"
178 "ldmia %[v2]!, {r4-r7} \n" 178 "ldmia %[v2]!, {r4-r7} \n"
179 "mla %[res], r4, r0, %[res] \n" 179 "mla %[res], r4, r0, %[res] \n"
180 "mla %[res], r5, r1, %[res] \n" 180 "mla %[res], r5, r1, %[res] \n"
181 "mla %[res], r6, r2, %[res] \n" 181 "mla %[res], r6, r2, %[res] \n"
182 "mla %[res], r7, r3, %[res] \n" 182 "mla %[res], r7, r3, %[res] \n"
183 ".endr \n" 183 )
184#if ORDER > 32 184#if ORDER > 32
185 "subs %[cnt], %[cnt], #1 \n" 185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n" 186 "bne 1b \n"