diff options
Diffstat (limited to 'apps/codecs/demac/libdemac')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv5te.h | 29 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv6.h | 34 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_cf.h | 20 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math32_armv4.h | 18 |
4 files changed, 53 insertions, 48 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h index 2940585a42..0a3679ce63 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h | |||
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
26 | 26 | ||
27 | #define FUSED_VECTOR_MATH | 27 | #define FUSED_VECTOR_MATH |
28 | 28 | ||
29 | #define REPEAT_3(x) x x x | ||
30 | #if ORDER > 16 | ||
31 | #define REPEAT_MLA(x) x x x x x x x | ||
32 | #else | ||
33 | #define REPEAT_MLA(x) x x x | ||
34 | #endif | ||
35 | |||
29 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 36 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
30 | * This version fetches data as 32 bit words, and *requires* v1 to be | 37 | * This version fetches data as 32 bit words, and *requires* v1 to be |
31 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | 38 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit |
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
133 | ADDHALFREGS(r1, r2, r4) | 140 | ADDHALFREGS(r1, r2, r4) |
134 | "stmia %[v1]!, {r0,r1} \n" | 141 | "stmia %[v1]!, {r0,r1} \n" |
135 | 142 | ||
136 | ".rept 3 \n" | 143 | REPEAT_3( |
137 | "ldmia %[v1], {r1,r2} \n" | 144 | "ldmia %[v1], {r1,r2} \n" |
138 | "ldmia %[f2]!, {r3,r4} \n" | 145 | "ldmia %[f2]!, {r3,r4} \n" |
139 | "smlabb %[res], r1, r3, %[res] \n" | 146 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
144 | ADDHALFREGS(r0, r1, r3) | 151 | ADDHALFREGS(r0, r1, r3) |
145 | ADDHALFREGS(r1, r2, r4) | 152 | ADDHALFREGS(r1, r2, r4) |
146 | "stmia %[v1]!, {r0,r1} \n" | 153 | "stmia %[v1]!, {r0,r1} \n" |
147 | ".endr \n" | 154 | ) |
148 | #if ORDER > 16 | 155 | #if ORDER > 16 |
149 | "subs %[cnt], %[cnt], #1 \n" | 156 | "subs %[cnt], %[cnt], #1 \n" |
150 | "bne 1b \n" | 157 | "bne 1b \n" |
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
275 | SUBHALFREGS(r1, r2, r4) | 282 | SUBHALFREGS(r1, r2, r4) |
276 | "stmia %[v1]!, {r0,r1} \n" | 283 | "stmia %[v1]!, {r0,r1} \n" |
277 | 284 | ||
278 | ".rept 3 \n" | 285 | REPEAT_3( |
279 | "ldmia %[v1], {r1,r2} \n" | 286 | "ldmia %[v1], {r1,r2} \n" |
280 | "ldmia %[f2]!, {r3,r4} \n" | 287 | "ldmia %[f2]!, {r3,r4} \n" |
281 | "smlabb %[res], r1, r3, %[res] \n" | 288 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
286 | SUBHALFREGS(r0, r1, r3) | 293 | SUBHALFREGS(r0, r1, r3) |
287 | SUBHALFREGS(r1, r2, r4) | 294 | SUBHALFREGS(r1, r2, r4) |
288 | "stmia %[v1]!, {r0,r1} \n" | 295 | "stmia %[v1]!, {r0,r1} \n" |
289 | ".endr \n" | 296 | ) |
290 | #if ORDER > 16 | 297 | #if ORDER > 16 |
291 | "subs %[cnt], %[cnt], #1 \n" | 298 | "subs %[cnt], %[cnt], #1 \n" |
292 | "bne 1b \n" | 299 | "bne 1b \n" |
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
318 | int cnt = ORDER>>5; | 325 | int cnt = ORDER>>5; |
319 | #endif | 326 | #endif |
320 | 327 | ||
321 | #if ORDER > 16 | ||
322 | #define MLA_BLOCKS "7" | ||
323 | #else | ||
324 | #define MLA_BLOCKS "3" | ||
325 | #endif | ||
326 | |||
327 | asm volatile ( | 328 | asm volatile ( |
328 | #if ORDER > 32 | 329 | #if ORDER > 32 |
329 | "mov %[res], #0 \n" | 330 | "mov %[res], #0 \n" |
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
347 | "smlabt %[res], r1, r2, %[res] \n" | 348 | "smlabt %[res], r1, r2, %[res] \n" |
348 | "smlatb %[res], r1, r3, %[res] \n" | 349 | "smlatb %[res], r1, r3, %[res] \n" |
349 | 350 | ||
350 | ".rept " MLA_BLOCKS "\n" | 351 | REPEAT_MLA( |
351 | "ldmia %[v1]!, {r0,r1} \n" | 352 | "ldmia %[v1]!, {r0,r1} \n" |
352 | "smlabt %[res], r0, r3, %[res] \n" | 353 | "smlabt %[res], r0, r3, %[res] \n" |
353 | "ldmia %[v2]!, {r2,r3} \n" | 354 | "ldmia %[v2]!, {r2,r3} \n" |
354 | "smlatb %[res], r0, r2, %[res] \n" | 355 | "smlatb %[res], r0, r2, %[res] \n" |
355 | "smlabt %[res], r1, r2, %[res] \n" | 356 | "smlabt %[res], r1, r2, %[res] \n" |
356 | "smlatb %[res], r1, r3, %[res] \n" | 357 | "smlatb %[res], r1, r3, %[res] \n" |
357 | ".endr \n" | 358 | ) |
358 | #if ORDER > 32 | 359 | #if ORDER > 32 |
359 | "subs %[cnt], %[cnt], #1 \n" | 360 | "subs %[cnt], %[cnt], #1 \n" |
360 | "bne 1b \n" | 361 | "bne 1b \n" |
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
374 | "smlabb %[res], r1, r3, %[res] \n" | 375 | "smlabb %[res], r1, r3, %[res] \n" |
375 | "smlatt %[res], r1, r3, %[res] \n" | 376 | "smlatt %[res], r1, r3, %[res] \n" |
376 | 377 | ||
377 | ".rept " MLA_BLOCKS "\n" | 378 | REPEAT_MLA( |
378 | "ldmia %[v1]!, {r0,r1} \n" | 379 | "ldmia %[v1]!, {r0,r1} \n" |
379 | "ldmia %[v2]!, {r2,r3} \n" | 380 | "ldmia %[v2]!, {r2,r3} \n" |
380 | "smlabb %[res], r0, r2, %[res] \n" | 381 | "smlabb %[res], r0, r2, %[res] \n" |
381 | "smlatt %[res], r0, r2, %[res] \n" | 382 | "smlatt %[res], r0, r2, %[res] \n" |
382 | "smlabb %[res], r1, r3, %[res] \n" | 383 | "smlabb %[res], r1, r3, %[res] \n" |
383 | "smlatt %[res], r1, r3, %[res] \n" | 384 | "smlatt %[res], r1, r3, %[res] \n" |
384 | ".endr \n" | 385 | ) |
385 | #if ORDER > 32 | 386 | #if ORDER > 32 |
386 | "subs %[cnt], %[cnt], #1 \n" | 387 | "subs %[cnt], %[cnt], #1 \n" |
387 | "bne 1b \n" | 388 | "bne 1b \n" |
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv6.h b/apps/codecs/demac/libdemac/vector_math16_armv6.h index 0ace6c5811..2ce62728cb 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv6.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv6.h | |||
@@ -22,14 +22,14 @@ You should have received a copy of the GNU General Public License | |||
22 | along with this program; if not, write to the Free Software | 22 | along with this program; if not, write to the Free Software |
23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | 23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA |
24 | 24 | ||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #define FUSED_VECTOR_MATH | 27 | #define FUSED_VECTOR_MATH |
28 | 28 | ||
29 | #if ORDER > 16 | 29 | #if ORDER > 16 |
30 | #define BLOCK_REPEAT "3" | 30 | #define REPEAT_BLOCK(x) x x x |
31 | #else | 31 | #else |
32 | #define BLOCK_REPEAT "1" | 32 | #define REPEAT_BLOCK(x) x |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
@@ -77,7 +77,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
77 | "sadd16 r1, r1, r5 \n" | 77 | "sadd16 r1, r1, r5 \n" |
78 | "strd r0, [%[v1]], #8 \n" | 78 | "strd r0, [%[v1]], #8 \n" |
79 | 79 | ||
80 | ".rept " BLOCK_REPEAT "\n" | 80 | REPEAT_BLOCK( |
81 | "ldmia %[s2]!, {r5,r6} \n" | 81 | "ldmia %[s2]!, {r5,r6} \n" |
82 | "pkhtb r4, r4, r2 \n" | 82 | "pkhtb r4, r4, r2 \n" |
83 | "pkhtb r2, r2, r3 \n" | 83 | "pkhtb r2, r2, r3 \n" |
@@ -104,7 +104,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
104 | "sadd16 r0, r0, r6 \n" | 104 | "sadd16 r0, r0, r6 \n" |
105 | "sadd16 r1, r1, r5 \n" | 105 | "sadd16 r1, r1, r5 \n" |
106 | "strd r0, [%[v1]], #8 \n" | 106 | "strd r0, [%[v1]], #8 \n" |
107 | ".endr \n" | 107 | ) |
108 | 108 | ||
109 | "ldmia %[s2]!, {r5,r6} \n" | 109 | "ldmia %[s2]!, {r5,r6} \n" |
110 | "pkhtb r4, r4, r2 \n" | 110 | "pkhtb r4, r4, r2 \n" |
@@ -148,7 +148,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
148 | "sadd16 r1, r1, r7 \n" | 148 | "sadd16 r1, r1, r7 \n" |
149 | "strd r0, [%[v1]], #8 \n" | 149 | "strd r0, [%[v1]], #8 \n" |
150 | 150 | ||
151 | ".rept " BLOCK_REPEAT "\n" | 151 | REPEAT_BLOCK( |
152 | "smlad %[res], r2, r4, %[res] \n" | 152 | "smlad %[res], r2, r4, %[res] \n" |
153 | "ldrd r6, [%[s2]], #8 \n" | 153 | "ldrd r6, [%[s2]], #8 \n" |
154 | "smlad %[res], r3, r5, %[res] \n" | 154 | "smlad %[res], r3, r5, %[res] \n" |
@@ -165,7 +165,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
165 | "sadd16 r0, r0, r6 \n" | 165 | "sadd16 r0, r0, r6 \n" |
166 | "sadd16 r1, r1, r7 \n" | 166 | "sadd16 r1, r1, r7 \n" |
167 | "strd r0, [%[v1]], #8 \n" | 167 | "strd r0, [%[v1]], #8 \n" |
168 | ".endr \n" | 168 | ) |
169 | 169 | ||
170 | "smlad %[res], r2, r4, %[res] \n" | 170 | "smlad %[res], r2, r4, %[res] \n" |
171 | "ldrd r6, [%[s2]], #8 \n" | 171 | "ldrd r6, [%[s2]], #8 \n" |
@@ -246,7 +246,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
246 | "ssub16 r1, r1, r5 \n" | 246 | "ssub16 r1, r1, r5 \n" |
247 | "strd r0, [%[v1]], #8 \n" | 247 | "strd r0, [%[v1]], #8 \n" |
248 | 248 | ||
249 | ".rept " BLOCK_REPEAT "\n" | 249 | REPEAT_BLOCK( |
250 | "ldmia %[s2]!, {r5,r6} \n" | 250 | "ldmia %[s2]!, {r5,r6} \n" |
251 | "pkhtb r4, r4, r2 \n" | 251 | "pkhtb r4, r4, r2 \n" |
252 | "pkhtb r2, r2, r3 \n" | 252 | "pkhtb r2, r2, r3 \n" |
@@ -273,7 +273,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
273 | "ssub16 r0, r0, r6 \n" | 273 | "ssub16 r0, r0, r6 \n" |
274 | "ssub16 r1, r1, r5 \n" | 274 | "ssub16 r1, r1, r5 \n" |
275 | "strd r0, [%[v1]], #8 \n" | 275 | "strd r0, [%[v1]], #8 \n" |
276 | ".endr \n" | 276 | ) |
277 | 277 | ||
278 | "ldmia %[s2]!, {r5,r6} \n" | 278 | "ldmia %[s2]!, {r5,r6} \n" |
279 | "pkhtb r4, r4, r2 \n" | 279 | "pkhtb r4, r4, r2 \n" |
@@ -317,7 +317,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
317 | "ssub16 r1, r1, r7 \n" | 317 | "ssub16 r1, r1, r7 \n" |
318 | "strd r0, [%[v1]], #8 \n" | 318 | "strd r0, [%[v1]], #8 \n" |
319 | 319 | ||
320 | ".rept " BLOCK_REPEAT "\n" | 320 | REPEAT_BLOCK( |
321 | "smlad %[res], r2, r4, %[res] \n" | 321 | "smlad %[res], r2, r4, %[res] \n" |
322 | "ldrd r6, [%[s2]], #8 \n" | 322 | "ldrd r6, [%[s2]], #8 \n" |
323 | "smlad %[res], r3, r5, %[res] \n" | 323 | "smlad %[res], r3, r5, %[res] \n" |
@@ -334,7 +334,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
334 | "ssub16 r0, r0, r6 \n" | 334 | "ssub16 r0, r0, r6 \n" |
335 | "ssub16 r1, r1, r7 \n" | 335 | "ssub16 r1, r1, r7 \n" |
336 | "strd r0, [%[v1]], #8 \n" | 336 | "strd r0, [%[v1]], #8 \n" |
337 | ".endr \n" | 337 | ) |
338 | 338 | ||
339 | "smlad %[res], r2, r4, %[res] \n" | 339 | "smlad %[res], r2, r4, %[res] \n" |
340 | "ldrd r6, [%[s2]], #8 \n" | 340 | "ldrd r6, [%[s2]], #8 \n" |
@@ -400,7 +400,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
400 | #else | 400 | #else |
401 | "smuadx %[res], r0, r3 \n" | 401 | "smuadx %[res], r0, r3 \n" |
402 | #endif | 402 | #endif |
403 | ".rept " BLOCK_REPEAT "\n" | 403 | REPEAT_BLOCK( |
404 | "pkhtb r0, r6, r7 \n" | 404 | "pkhtb r0, r6, r7 \n" |
405 | "ldrd r2, [%[v1]], #8 \n" | 405 | "ldrd r2, [%[v1]], #8 \n" |
406 | "smladx %[res], r1, r0, %[res] \n" | 406 | "smladx %[res], r1, r0, %[res] \n" |
@@ -413,8 +413,8 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
413 | "pkhtb r3, r5, r6 \n" | 413 | "pkhtb r3, r5, r6 \n" |
414 | "ldrd r4, [%[v2]], #8 \n" | 414 | "ldrd r4, [%[v2]], #8 \n" |
415 | "smladx %[res], r0, r3, %[res] \n" | 415 | "smladx %[res], r0, r3, %[res] \n" |
416 | ".endr \n" | 416 | ) |
417 | 417 | ||
418 | "pkhtb r0, r6, r7 \n" | 418 | "pkhtb r0, r6, r7 \n" |
419 | "ldrd r2, [%[v1]], #8 \n" | 419 | "ldrd r2, [%[v1]], #8 \n" |
420 | "smladx %[res], r1, r0, %[res] \n" | 420 | "smladx %[res], r1, r0, %[res] \n" |
@@ -434,7 +434,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
434 | #endif | 434 | #endif |
435 | 435 | ||
436 | "b 99f \n" | 436 | "b 99f \n" |
437 | 437 | ||
438 | "20: \n" | 438 | "20: \n" |
439 | "ldrd r0, [%[v1]], #8 \n" | 439 | "ldrd r0, [%[v1]], #8 \n" |
440 | "ldmia %[v2]!, {r5-r7} \n" | 440 | "ldmia %[v2]!, {r5-r7} \n" |
@@ -446,7 +446,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
446 | #else | 446 | #else |
447 | "smuad %[res], r0, r5 \n" | 447 | "smuad %[res], r0, r5 \n" |
448 | #endif | 448 | #endif |
449 | ".rept " BLOCK_REPEAT "\n" | 449 | REPEAT_BLOCK( |
450 | "ldrd r4, [%[v2]], #8 \n" | 450 | "ldrd r4, [%[v2]], #8 \n" |
451 | "smlad %[res], r1, r6, %[res] \n" | 451 | "smlad %[res], r1, r6, %[res] \n" |
452 | "ldrd r0, [%[v1]], #8 \n" | 452 | "ldrd r0, [%[v1]], #8 \n" |
@@ -455,7 +455,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
455 | "smlad %[res], r3, r4, %[res] \n" | 455 | "smlad %[res], r3, r4, %[res] \n" |
456 | "ldrd r2, [%[v1]], #8 \n" | 456 | "ldrd r2, [%[v1]], #8 \n" |
457 | "smlad %[res], r0, r5, %[res] \n" | 457 | "smlad %[res], r0, r5, %[res] \n" |
458 | ".endr \n" | 458 | ) |
459 | 459 | ||
460 | #if ORDER > 32 | 460 | #if ORDER > 32 |
461 | "ldrd r4, [%[v2]], #8 \n" | 461 | "ldrd r4, [%[v2]], #8 \n" |
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 6e8216c9cc..4d77d3be31 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h | |||
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
28 | 28 | ||
29 | #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ | 29 | #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ |
30 | 30 | ||
31 | #define REPEAT_2(x) x x | ||
32 | #define REPEAT_3(x) x x x | ||
33 | #define REPEAT_7(x) x x x x x x x | ||
34 | |||
31 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
32 | * This version fetches data as 32 bit words, and *recommends* v1 to be | 36 | * This version fetches data as 32 bit words, and *recommends* v1 to be |
33 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit | 37 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit |
@@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
64 | "move.w (%[s2])+, %%d1 \n" | 68 | "move.w (%[s2])+, %%d1 \n" |
65 | "swap %%d1 \n" | 69 | "swap %%d1 \n" |
66 | "1: \n" | 70 | "1: \n" |
67 | ".rept 2 \n" | 71 | REPEAT_2( |
68 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | 72 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" |
69 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | 73 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" |
70 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | 74 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" |
@@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
82 | "move.l %%d6, (%[v1])+ \n" | 86 | "move.l %%d6, (%[v1])+ \n" |
83 | ADDHALFXREGS(%%a1, %%d1, %%d7) | 87 | ADDHALFXREGS(%%a1, %%d1, %%d7) |
84 | "move.l %%d7, (%[v1])+ \n" | 88 | "move.l %%d7, (%[v1])+ \n" |
85 | ".endr \n" | 89 | ) |
86 | 90 | ||
87 | #if ORDER > 16 | 91 | #if ORDER > 16 |
88 | "subq.l #1, %[res] \n" | 92 | "subq.l #1, %[res] \n" |
@@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
193 | "move.w (%[s2])+, %%d1 \n" | 197 | "move.w (%[s2])+, %%d1 \n" |
194 | "swap %%d1 \n" | 198 | "swap %%d1 \n" |
195 | "1: \n" | 199 | "1: \n" |
196 | ".rept 2 \n" | 200 | REPEAT_2( |
197 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | 201 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" |
198 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | 202 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" |
199 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | 203 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" |
@@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
211 | "move.l %%d6, (%[v1])+ \n" | 215 | "move.l %%d6, (%[v1])+ \n" |
212 | SUBHALFXREGS(%%a1, %%d1, %%d7) | 216 | SUBHALFXREGS(%%a1, %%d1, %%d7) |
213 | "move.l %%d7, (%[v1])+ \n" | 217 | "move.l %%d7, (%[v1])+ \n" |
214 | ".endr \n" | 218 | ) |
215 | 219 | ||
216 | #if ORDER > 16 | 220 | #if ORDER > 16 |
217 | "subq.l #1, %[res] \n" | 221 | "subq.l #1, %[res] \n" |
@@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
305 | "move.l (%[v1])+, %%d0 \n" | 309 | "move.l (%[v1])+, %%d0 \n" |
306 | "move.w (%[v2])+, %%d1 \n" | 310 | "move.w (%[v2])+, %%d1 \n" |
307 | "1: \n" | 311 | "1: \n" |
308 | ".rept 7 \n" | 312 | REPEAT_7( |
309 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 313 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
310 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | 314 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
311 | ".endr \n" | 315 | ) |
312 | 316 | ||
313 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 317 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
314 | #if ORDER > 16 | 318 | #if ORDER > 16 |
@@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
324 | "move.l (%[v1])+, %%d0 \n" | 328 | "move.l (%[v1])+, %%d0 \n" |
325 | "move.l (%[v2])+, %%d1 \n" | 329 | "move.l (%[v2])+, %%d1 \n" |
326 | "1: \n" | 330 | "1: \n" |
327 | ".rept 3 \n" | 331 | REPEAT_3( |
328 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | 332 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
329 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 333 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
330 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" | 334 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
331 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 335 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
332 | ".endr \n" | 336 | ) |
333 | 337 | ||
334 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | 338 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
335 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 339 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
diff --git a/apps/codecs/demac/libdemac/vector_math32_armv4.h b/apps/codecs/demac/libdemac/vector_math32_armv4.h index 207fca3038..cb5fe9e0ee 100644 --- a/apps/codecs/demac/libdemac/vector_math32_armv4.h +++ b/apps/codecs/demac/libdemac/vector_math32_armv4.h | |||
@@ -27,11 +27,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
27 | #define FUSED_VECTOR_MATH | 27 | #define FUSED_VECTOR_MATH |
28 | 28 | ||
29 | #if ORDER > 32 | 29 | #if ORDER > 32 |
30 | #define BLOCK_REPEAT "8" | 30 | #define REPEAT_BLOCK(x) x x x x x x x x |
31 | #elif ORDER > 16 | 31 | #elif ORDER > 16 |
32 | #define BLOCK_REPEAT "7" | 32 | #define REPEAT_BLOCK(x) x x x x x x x |
33 | #else | 33 | #else |
34 | #define BLOCK_REPEAT "3" | 34 | #define REPEAT_BLOCK(x) x x x |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ | 37 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) */ |
@@ -60,7 +60,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2) | |||
60 | "add r3, r3, r7 \n" | 60 | "add r3, r3, r7 \n" |
61 | "stmia %[v1]!, {r0-r3} \n" | 61 | "stmia %[v1]!, {r0-r3} \n" |
62 | #endif | 62 | #endif |
63 | ".rept " BLOCK_REPEAT "\n" | 63 | REPEAT_BLOCK( |
64 | "ldmia %[v1], {r0-r3} \n" | 64 | "ldmia %[v1], {r0-r3} \n" |
65 | "ldmia %[f2]!, {r4-r7} \n" | 65 | "ldmia %[f2]!, {r4-r7} \n" |
66 | "mla %[res], r4, r0, %[res] \n" | 66 | "mla %[res], r4, r0, %[res] \n" |
@@ -73,7 +73,7 @@ static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2) | |||
73 | "add r2, r2, r6 \n" | 73 | "add r2, r2, r6 \n" |
74 | "add r3, r3, r7 \n" | 74 | "add r3, r3, r7 \n" |
75 | "stmia %[v1]!, {r0-r3} \n" | 75 | "stmia %[v1]!, {r0-r3} \n" |
76 | ".endr \n" | 76 | ) |
77 | #if ORDER > 32 | 77 | #if ORDER > 32 |
78 | "subs %[cnt], %[cnt], #1 \n" | 78 | "subs %[cnt], %[cnt], #1 \n" |
79 | "bne 1b \n" | 79 | "bne 1b \n" |
@@ -120,7 +120,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2) | |||
120 | "sub r3, r3, r7 \n" | 120 | "sub r3, r3, r7 \n" |
121 | "stmia %[v1]!, {r0-r3} \n" | 121 | "stmia %[v1]!, {r0-r3} \n" |
122 | #endif | 122 | #endif |
123 | ".rept " BLOCK_REPEAT "\n" | 123 | REPEAT_BLOCK( |
124 | "ldmia %[v1], {r0-r3} \n" | 124 | "ldmia %[v1], {r0-r3} \n" |
125 | "ldmia %[f2]!, {r4-r7} \n" | 125 | "ldmia %[f2]!, {r4-r7} \n" |
126 | "mla %[res], r4, r0, %[res] \n" | 126 | "mla %[res], r4, r0, %[res] \n" |
@@ -133,7 +133,7 @@ static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2) | |||
133 | "sub r2, r2, r6 \n" | 133 | "sub r2, r2, r6 \n" |
134 | "sub r3, r3, r7 \n" | 134 | "sub r3, r3, r7 \n" |
135 | "stmia %[v1]!, {r0-r3} \n" | 135 | "stmia %[v1]!, {r0-r3} \n" |
136 | ".endr \n" | 136 | ) |
137 | #if ORDER > 32 | 137 | #if ORDER > 32 |
138 | "subs %[cnt], %[cnt], #1 \n" | 138 | "subs %[cnt], %[cnt], #1 \n" |
139 | "bne 1b \n" | 139 | "bne 1b \n" |
@@ -173,14 +173,14 @@ static inline int32_t scalarproduct(int32_t* v1, int32_t* v2) | |||
173 | "mla %[res], r6, r2, %[res] \n" | 173 | "mla %[res], r6, r2, %[res] \n" |
174 | "mla %[res], r7, r3, %[res] \n" | 174 | "mla %[res], r7, r3, %[res] \n" |
175 | #endif | 175 | #endif |
176 | ".rept " BLOCK_REPEAT "\n" | 176 | REPEAT_BLOCK( |
177 | "ldmia %[v1]!, {r0-r3} \n" | 177 | "ldmia %[v1]!, {r0-r3} \n" |
178 | "ldmia %[v2]!, {r4-r7} \n" | 178 | "ldmia %[v2]!, {r4-r7} \n" |
179 | "mla %[res], r4, r0, %[res] \n" | 179 | "mla %[res], r4, r0, %[res] \n" |
180 | "mla %[res], r5, r1, %[res] \n" | 180 | "mla %[res], r5, r1, %[res] \n" |
181 | "mla %[res], r6, r2, %[res] \n" | 181 | "mla %[res], r6, r2, %[res] \n" |
182 | "mla %[res], r7, r3, %[res] \n" | 182 | "mla %[res], r7, r3, %[res] \n" |
183 | ".endr \n" | 183 | ) |
184 | #if ORDER > 32 | 184 | #if ORDER > 32 |
185 | "subs %[cnt], %[cnt], #1 \n" | 185 | "subs %[cnt], %[cnt], #1 \n" |
186 | "bne 1b \n" | 186 | "bne 1b \n" |