summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv5te.h29
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv6.h34
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_cf.h20
-rw-r--r--apps/codecs/demac/libdemac/vector_math32_armv4.h18
4 files changed, 53 insertions, 48 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index 2940585a42..0a3679ce63 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#define REPEAT_3(x) x x x
30#if ORDER > 16
31#define REPEAT_MLA(x) x x x x x x x
32#else
33#define REPEAT_MLA(x) x x x
34#endif
35
29/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 36/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
30 * This version fetches data as 32 bit words, and *requires* v1 to be 37 * This version fetches data as 32 bit words, and *requires* v1 to be
31 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit 38 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
133 ADDHALFREGS(r1, r2, r4) 140 ADDHALFREGS(r1, r2, r4)
134 "stmia %[v1]!, {r0,r1} \n" 141 "stmia %[v1]!, {r0,r1} \n"
135 142
136 ".rept 3 \n" 143 REPEAT_3(
137 "ldmia %[v1], {r1,r2} \n" 144 "ldmia %[v1], {r1,r2} \n"
138 "ldmia %[f2]!, {r3,r4} \n" 145 "ldmia %[f2]!, {r3,r4} \n"
139 "smlabb %[res], r1, r3, %[res] \n" 146 "smlabb %[res], r1, r3, %[res] \n"
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
144 ADDHALFREGS(r0, r1, r3) 151 ADDHALFREGS(r0, r1, r3)
145 ADDHALFREGS(r1, r2, r4) 152 ADDHALFREGS(r1, r2, r4)
146 "stmia %[v1]!, {r0,r1} \n" 153 "stmia %[v1]!, {r0,r1} \n"
147 ".endr \n" 154 )
148#if ORDER > 16 155#if ORDER > 16
149 "subs %[cnt], %[cnt], #1 \n" 156 "subs %[cnt], %[cnt], #1 \n"
150 "bne 1b \n" 157 "bne 1b \n"
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
275 SUBHALFREGS(r1, r2, r4) 282 SUBHALFREGS(r1, r2, r4)
276 "stmia %[v1]!, {r0,r1} \n" 283 "stmia %[v1]!, {r0,r1} \n"
277 284
278 ".rept 3 \n" 285 REPEAT_3(
279 "ldmia %[v1], {r1,r2} \n" 286 "ldmia %[v1], {r1,r2} \n"
280 "ldmia %[f2]!, {r3,r4} \n" 287 "ldmia %[f2]!, {r3,r4} \n"
281 "smlabb %[res], r1, r3, %[res] \n" 288 "smlabb %[res], r1, r3, %[res] \n"
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
286 SUBHALFREGS(r0, r1, r3) 293 SUBHALFREGS(r0, r1, r3)
287 SUBHALFREGS(r1, r2, r4) 294 SUBHALFREGS(r1, r2, r4)
288 "stmia %[v1]!, {r0,r1} \n" 295 "stmia %[v1]!, {r0,r1} \n"
289 ".endr \n" 296 )
290#if ORDER > 16 297#if ORDER > 16
291 "subs %[cnt], %[cnt], #1 \n" 298 "subs %[cnt], %[cnt], #1 \n"
292 "bne 1b \n" 299 "bne 1b \n"
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
318 int cnt = ORDER>>5; 325 int cnt = ORDER>>5;
319#endif 326#endif
320 327
321#if ORDER > 16
322#define MLA_BLOCKS "7"
323#else
324#define MLA_BLOCKS "3"
325#endif
326
327 asm volatile ( 328 asm volatile (
328#if ORDER > 32 329#if ORDER > 32
329 "mov %[res], #0 \n" 330 "mov %[res], #0 \n"
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
347 "smlabt %[res], r1, r2, %[res] \n" 348 "smlabt %[res], r1, r2, %[res] \n"
348 "smlatb %[res], r1, r3, %[res] \n" 349 "smlatb %[res], r1, r3, %[res] \n"
349 350
350 ".rept " MLA_BLOCKS "\n" 351 REPEAT_MLA(
351 "ldmia %[v1]!, {r0,r1} \n" 352 "ldmia %[v1]!, {r0,r1} \n"
352 "smlabt %[res], r0, r3, %[res] \n" 353 "smlabt %[res], r0, r3, %[res] \n"
353 "ldmia %[v2]!, {r2,r3} \n" 354 "ldmia %[v2]!, {r2,r3} \n"
354 "smlatb %[res], r0, r2, %[res] \n" 355 "smlatb %[res], r0, r2, %[res] \n"
355 "smlabt %[res], r1, r2, %[res] \n" 356 "smlabt %[res], r1, r2, %[res] \n"
356 "smlatb %[res], r1, r3, %[res] \n" 357 "smlatb %[res], r1, r3, %[res] \n"
357 ".endr \n" 358 )
358#if ORDER > 32 359#if ORDER > 32
359 "subs %[cnt], %[cnt], #1 \n" 360 "subs %[cnt], %[cnt], #1 \n"
360 "bne 1b \n" 361 "bne 1b \n"
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
374 "smlabb %[res], r1, r3, %[res] \n" 375 "smlabb %[res], r1, r3, %[res] \n"
375 "smlatt %[res], r1, r3, %[res] \n" 376 "smlatt %[res], r1, r3, %[res] \n"
376 377
377 ".rept " MLA_BLOCKS "\n" 378 REPEAT_MLA(
378 "ldmia %[v1]!, {r0,r1} \n" 379 "ldmia %[v1]!, {r0,r1} \n"
379 "ldmia %[v2]!, {r2,r3} \n" 380 "ldmia %[v2]!, {r2,r3} \n"
380 "smlabb %[res], r0, r2, %[res] \n" 381 "smlabb %[res], r0, r2, %[res] \n"
381 "smlatt %[res], r0, r2, %[res] \n" 382 "smlatt %[res], r0, r2, %[res] \n"
382 "smlabb %[res], r1, r3, %[res] \n" 383 "smlabb %[res], r1, r3, %[res] \n"
383 "smlatt %[res], r1, r3, %[res] \n" 384 "smlatt %[res], r1, r3, %[res] \n"
384 ".endr \n" 385 )
385#if ORDER > 32 386#if ORDER > 32
386 "subs %[cnt], %[cnt], #1 \n" 387 "subs %[cnt], %[cnt], #1 \n"
387 "bne 1b \n" 388 "bne 1b \n"
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h
index 0ace6c5811..2ce62728cb 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h
@@ -22,14 +22,14 @@ You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software 22along with this program; if not, write to the Free Software
23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA 23Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
24 24
25*/ 25*/
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#if ORDER > 16 29#if ORDER > 16
30#define BLOCK_REPEAT "3" 30#define REPEAT_BLOCK(x) x x x
31#else 31#else
32#define BLOCK_REPEAT "1" 32#define REPEAT_BLOCK(x) x
33#endif 33#endif
34 34
35/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
@@ -77,7 +77,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
77 "sadd16 r1, r1, r5 \n" 77 "sadd16 r1, r1, r5 \n"
78 "strd r0, [%[v1]], #8 \n" 78 "strd r0, [%[v1]], #8 \n"
79 79
80 ".rept " BLOCK_REPEAT "\n" 80 REPEAT_BLOCK(
81 "ldmia %[s2]!, {r5,r6} \n" 81 "ldmia %[s2]!, {r5,r6} \n"
82 "pkhtb r4, r4, r2 \n" 82 "pkhtb r4, r4, r2 \n"
83 "pkhtb r2, r2, r3 \n" 83 "pkhtb r2, r2, r3 \n"
@@ -104,7 +104,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
104 "sadd16 r0, r0, r6 \n" 104 "sadd16 r0, r0, r6 \n"
105 "sadd16 r1, r1, r5 \n" 105 "sadd16 r1, r1, r5 \n"
106 "strd r0, [%[v1]], #8 \n" 106 "strd r0, [%[v1]], #8 \n"
107 ".endr \n" 107 )
108 108
109 "ldmia %[s2]!, {r5,r6} \n" 109 "ldmia %[s2]!, {r5,r6} \n"
110 "pkhtb r4, r4, r2 \n" 110 "pkhtb r4, r4, r2 \n"
@@ -148,7 +148,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
148 "sadd16 r1, r1, r7 \n" 148 "sadd16 r1, r1, r7 \n"
149 "strd r0, [%[v1]], #8 \n" 149 "strd r0, [%[v1]], #8 \n"
150 150
151 ".rept " BLOCK_REPEAT "\n" 151 REPEAT_BLOCK(
152 "smlad %[res], r2, r4, %[res] \n" 152 "smlad %[res], r2, r4, %[res] \n"
153 "ldrd r6, [%[s2]], #8 \n" 153 "ldrd r6, [%[s2]], #8 \n"
154 "smlad %[res], r3, r5, %[res] \n" 154 "smlad %[res], r3, r5, %[res] \n"
@@ -165,7 +165,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
165 "sadd16 r0, r0, r6 \n" 165 "sadd16 r0, r0, r6 \n"
166 "sadd16 r1, r1, r7 \n" 166 "sadd16 r1, r1, r7 \n"
167 "strd r0, [%[v1]], #8 \n" 167 "strd r0, [%[v1]], #8 \n"
168 ".endr \n" 168 )
169 169
170 "smlad %[res], r2, r4, %[res] \n" 170 "smlad %[res], r2, r4, %[res] \n"
171 "ldrd r6, [%[s2]], #8 \n" 171 "ldrd r6, [%[s2]], #8 \n"
@@ -246,7 +246,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
246 "ssub16 r1, r1, r5 \n" 246 "ssub16 r1, r1, r5 \n"
247 "strd r0, [%[v1]], #8 \n" 247 "strd r0, [%[v1]], #8 \n"
248 248
249 ".rept " BLOCK_REPEAT "\n" 249 REPEAT_BLOCK(
250 "ldmia %[s2]!, {r5,r6} \n" 250 "ldmia %[s2]!, {r5,r6} \n"
251 "pkhtb r4, r4, r2 \n" 251 "pkhtb r4, r4, r2 \n"
252 "pkhtb r2, r2, r3 \n" 252 "pkhtb r2, r2, r3 \n"
@@ -273,7 +273,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
273 "ssub16 r0, r0, r6 \n" 273 "ssub16 r0, r0, r6 \n"
274 "ssub16 r1, r1, r5 \n" 274 "ssub16 r1, r1, r5 \n"
275 "strd r0, [%[v1]], #8 \n" 275 "strd r0, [%[v1]], #8 \n"
276 ".endr \n" 276 )
277 277
278 "ldmia %[s2]!, {r5,r6} \n" 278 "ldmia %[s2]!, {r5,r6} \n"
279 "pkhtb r4, r4, r2 \n" 279 "pkhtb r4, r4, r2 \n"
@@ -317,7 +317,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
317 "ssub16 r1, r1, r7 \n" 317 "ssub16 r1, r1, r7 \n"
318 "strd r0, [%[v1]], #8 \n" 318 "strd r0, [%[v1]], #8 \n"
319 319
320 ".rept " BLOCK_REPEAT "\n" 320 REPEAT_BLOCK(
321 "smlad %[res], r2, r4, %[res] \n" 321 "smlad %[res], r2, r4, %[res] \n"
322 "ldrd r6, [%[s2]], #8 \n" 322 "ldrd r6, [%[s2]], #8 \n"
323 "smlad %[res], r3, r5, %[res] \n" 323 "smlad %[res], r3, r5, %[res] \n"
@@ -334,7 +334,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
334 "ssub16 r0, r0, r6 \n" 334 "ssub16 r0, r0, r6 \n"
335 "ssub16 r1, r1, r7 \n" 335 "ssub16 r1, r1, r7 \n"
336 "strd r0, [%[v1]], #8 \n" 336 "strd r0, [%[v1]], #8 \n"
337 ".endr \n" 337 )
338 338
339 "smlad %[res], r2, r4, %[res] \n" 339 "smlad %[res], r2, r4, %[res] \n"
340 "ldrd r6, [%[s2]], #8 \n" 340 "ldrd r6, [%[s2]], #8 \n"
@@ -400,7 +400,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
400#else 400#else
401 "smuadx %[res], r0, r3 \n" 401 "smuadx %[res], r0, r3 \n"
402#endif 402#endif
403 ".rept " BLOCK_REPEAT "\n" 403 REPEAT_BLOCK(
404 "pkhtb r0, r6, r7 \n" 404 "pkhtb r0, r6, r7 \n"
405 "ldrd r2, [%[v1]], #8 \n" 405 "ldrd r2, [%[v1]], #8 \n"
406 "smladx %[res], r1, r0, %[res] \n" 406 "smladx %[res], r1, r0, %[res] \n"
@@ -413,8 +413,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
413 "pkhtb r3, r5, r6 \n" 413 "pkhtb r3, r5, r6 \n"
414 "ldrd r4, [%[v2]], #8 \n" 414 "ldrd r4, [%[v2]], #8 \n"
415 "smladx %[res], r0, r3, %[res] \n" 415 "smladx %[res], r0, r3, %[res] \n"
416 ".endr \n" 416 )
417 417
418 "pkhtb r0, r6, r7 \n" 418 "pkhtb r0, r6, r7 \n"
419 "ldrd r2, [%[v1]], #8 \n" 419 "ldrd r2, [%[v1]], #8 \n"
420 "smladx %[res], r1, r0, %[res] \n" 420 "smladx %[res], r1, r0, %[res] \n"
@@ -434,7 +434,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
434#endif 434#endif
435 435
436 "b 99f \n" 436 "b 99f \n"
437 437
438 "20: \n" 438 "20: \n"
439 "ldrd r0, [%[v1]], #8 \n" 439 "ldrd r0, [%[v1]], #8 \n"
440 "ldmia %[v2]!, {r5-r7} \n" 440 "ldmia %[v2]!, {r5-r7} \n"
@@ -446,7 +446,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
446#else 446#else
447 "smuad %[res], r0, r5 \n" 447 "smuad %[res], r0, r5 \n"
448#endif 448#endif
449 ".rept " BLOCK_REPEAT "\n" 449 REPEAT_BLOCK(
450 "ldrd r4, [%[v2]], #8 \n" 450 "ldrd r4, [%[v2]], #8 \n"
451 "smlad %[res], r1, r6, %[res] \n" 451 "smlad %[res], r1, r6, %[res] \n"
452 "ldrd r0, [%[v1]], #8 \n" 452 "ldrd r0, [%[v1]], #8 \n"
@@ -455,7 +455,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
455 "smlad %[res], r3, r4, %[res] \n" 455 "smlad %[res], r3, r4, %[res] \n"
456 "ldrd r2, [%[v1]], #8 \n" 456 "ldrd r2, [%[v1]], #8 \n"
457 "smlad %[res], r0, r5, %[res] \n" 457 "smlad %[res], r0, r5, %[res] \n"
458 ".endr \n" 458 )
459 459
460#if ORDER > 32 460#if ORDER > 32
461 "ldrd r4, [%[v2]], #8 \n" 461 "ldrd r4, [%[v2]], #8 \n"
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 6e8216c9cc..4d77d3be31 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
28 28
29#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ 29#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
30 30
31#define REPEAT_2(x) x x
32#define REPEAT_3(x) x x x
33#define REPEAT_7(x) x x x x x x x
34
31/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 35/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
32 * This version fetches data as 32 bit words, and *recommends* v1 to be 36 * This version fetches data as 32 bit words, and *recommends* v1 to be
33 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit 37 * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
@@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
64 "move.w (%[s2])+, %%d1 \n" 68 "move.w (%[s2])+, %%d1 \n"
65 "swap %%d1 \n" 69 "swap %%d1 \n"
66 "1: \n" 70 "1: \n"
67 ".rept 2 \n" 71 REPEAT_2(
68 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" 72 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
69 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" 73 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
70 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" 74 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
82 "move.l %%d6, (%[v1])+ \n" 86 "move.l %%d6, (%[v1])+ \n"
83 ADDHALFXREGS(%%a1, %%d1, %%d7) 87 ADDHALFXREGS(%%a1, %%d1, %%d7)
84 "move.l %%d7, (%[v1])+ \n" 88 "move.l %%d7, (%[v1])+ \n"
85 ".endr \n" 89 )
86 90
87#if ORDER > 16 91#if ORDER > 16
88 "subq.l #1, %[res] \n" 92 "subq.l #1, %[res] \n"
@@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
193 "move.w (%[s2])+, %%d1 \n" 197 "move.w (%[s2])+, %%d1 \n"
194 "swap %%d1 \n" 198 "swap %%d1 \n"
195 "1: \n" 199 "1: \n"
196 ".rept 2 \n" 200 REPEAT_2(
197 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" 201 "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
198 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" 202 "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
199 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" 203 "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
@@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
211 "move.l %%d6, (%[v1])+ \n" 215 "move.l %%d6, (%[v1])+ \n"
212 SUBHALFXREGS(%%a1, %%d1, %%d7) 216 SUBHALFXREGS(%%a1, %%d1, %%d7)
213 "move.l %%d7, (%[v1])+ \n" 217 "move.l %%d7, (%[v1])+ \n"
214 ".endr \n" 218 )
215 219
216#if ORDER > 16 220#if ORDER > 16
217 "subq.l #1, %[res] \n" 221 "subq.l #1, %[res] \n"
@@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
305 "move.l (%[v1])+, %%d0 \n" 309 "move.l (%[v1])+, %%d0 \n"
306 "move.w (%[v2])+, %%d1 \n" 310 "move.w (%[v2])+, %%d1 \n"
307 "1: \n" 311 "1: \n"
308 ".rept 7 \n" 312 REPEAT_7(
309 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" 313 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
310 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" 314 "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
311 ".endr \n" 315 )
312 316
313 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" 317 "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
314#if ORDER > 16 318#if ORDER > 16
@@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
324 "move.l (%[v1])+, %%d0 \n" 328 "move.l (%[v1])+, %%d0 \n"
325 "move.l (%[v2])+, %%d1 \n" 329 "move.l (%[v2])+, %%d1 \n"
326 "1: \n" 330 "1: \n"
327 ".rept 3 \n" 331 REPEAT_3(
328 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" 332 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
329 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 333 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
330 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" 334 "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
331 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 335 "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
332 ".endr \n" 336 )
333 337
334 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" 338 "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
335 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" 339 "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h
index 207fca3038..cb5fe9e0ee 100644
--- a/apps/codecs/demac/libdemac/vector_math32_armv4.h
+++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h
@@ -27,11 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#if ORDER > 32 29#if ORDER > 32
30#define BLOCK_REPEAT "8" 30#define REPEAT_BLOCK(x) x x x x x x x x
31#elif ORDER > 16 31#elif ORDER > 16
32#define BLOCK_REPEAT "7" 32#define REPEAT_BLOCK(x) x x x x x x x
33#else 33#else
34#define BLOCK_REPEAT "3" 34#define REPEAT_BLOCK(x) x x x
35#endif 35#endif
36 36
37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ 37/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
@@ -60,7 +60,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
60 "add r3, r3, r7 \n" 60 "add r3, r3, r7 \n"
61 "stmia %[v1]!, {r0-r3} \n" 61 "stmia %[v1]!, {r0-r3} \n"
62#endif 62#endif
63 ".rept " BLOCK_REPEAT "\n" 63 REPEAT_BLOCK(
64 "ldmia %[v1], {r0-r3} \n" 64 "ldmia %[v1], {r0-r3} \n"
65 "ldmia %[f2]!, {r4-r7} \n" 65 "ldmia %[f2]!, {r4-r7} \n"
66 "mla %[res], r4, r0, %[res] \n" 66 "mla %[res], r4, r0, %[res] \n"
@@ -73,7 +73,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
73 "add r2, r2, r6 \n" 73 "add r2, r2, r6 \n"
74 "add r3, r3, r7 \n" 74 "add r3, r3, r7 \n"
75 "stmia %[v1]!, {r0-r3} \n" 75 "stmia %[v1]!, {r0-r3} \n"
76 ".endr \n" 76 )
77#if ORDER > 32 77#if ORDER > 32
78 "subs %[cnt], %[cnt], #1 \n" 78 "subs %[cnt], %[cnt], #1 \n"
79 "bne 1b \n" 79 "bne 1b \n"
@@ -120,7 +120,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
120 "sub r3, r3, r7 \n" 120 "sub r3, r3, r7 \n"
121 "stmia %[v1]!, {r0-r3} \n" 121 "stmia %[v1]!, {r0-r3} \n"
122#endif 122#endif
123 ".rept " BLOCK_REPEAT "\n" 123 REPEAT_BLOCK(
124 "ldmia %[v1], {r0-r3} \n" 124 "ldmia %[v1], {r0-r3} \n"
125 "ldmia %[f2]!, {r4-r7} \n" 125 "ldmia %[f2]!, {r4-r7} \n"
126 "mla %[res], r4, r0, %[res] \n" 126 "mla %[res], r4, r0, %[res] \n"
@@ -133,7 +133,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
133 "sub r2, r2, r6 \n" 133 "sub r2, r2, r6 \n"
134 "sub r3, r3, r7 \n" 134 "sub r3, r3, r7 \n"
135 "stmia %[v1]!, {r0-r3} \n" 135 "stmia %[v1]!, {r0-r3} \n"
136 ".endr \n" 136 )
137#if ORDER > 32 137#if ORDER > 32
138 "subs %[cnt], %[cnt], #1 \n" 138 "subs %[cnt], %[cnt], #1 \n"
139 "bne 1b \n" 139 "bne 1b \n"
@@ -173,14 +173,14 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
173 "mla %[res], r6, r2, %[res] \n" 173 "mla %[res], r6, r2, %[res] \n"
174 "mla %[res], r7, r3, %[res] \n" 174 "mla %[res], r7, r3, %[res] \n"
175#endif 175#endif
176 ".rept " BLOCK_REPEAT "\n" 176 REPEAT_BLOCK(
177 "ldmia %[v1]!, {r0-r3} \n" 177 "ldmia %[v1]!, {r0-r3} \n"
178 "ldmia %[v2]!, {r4-r7} \n" 178 "ldmia %[v2]!, {r4-r7} \n"
179 "mla %[res], r4, r0, %[res] \n" 179 "mla %[res], r4, r0, %[res] \n"
180 "mla %[res], r5, r1, %[res] \n" 180 "mla %[res], r5, r1, %[res] \n"
181 "mla %[res], r6, r2, %[res] \n" 181 "mla %[res], r6, r2, %[res] \n"
182 "mla %[res], r7, r3, %[res] \n" 182 "mla %[res], r7, r3, %[res] \n"
183 ".endr \n" 183 )
184#if ORDER > 32 184#if ORDER > 32
185 "subs %[cnt], %[cnt], #1 \n" 185 "subs %[cnt], %[cnt], #1 \n"
186 "bne 1b \n" 186 "bne 1b \n"