diff options
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_armv5te.h | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h index 2940585a42..0a3679ce63 100644 --- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h +++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h | |||
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
26 | 26 | ||
27 | #define FUSED_VECTOR_MATH | 27 | #define FUSED_VECTOR_MATH |
28 | 28 | ||
29 | #define REPEAT_3(x) x x x | ||
30 | #if ORDER > 16 | ||
31 | #define REPEAT_MLA(x) x x x x x x x | ||
32 | #else | ||
33 | #define REPEAT_MLA(x) x x x | ||
34 | #endif | ||
35 | |||
29 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 36 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
30 | * This version fetches data as 32 bit words, and *requires* v1 to be | 37 | * This version fetches data as 32 bit words, and *requires* v1 to be |
31 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit | 38 | * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit |
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
133 | ADDHALFREGS(r1, r2, r4) | 140 | ADDHALFREGS(r1, r2, r4) |
134 | "stmia %[v1]!, {r0,r1} \n" | 141 | "stmia %[v1]!, {r0,r1} \n" |
135 | 142 | ||
136 | ".rept 3 \n" | 143 | REPEAT_3( |
137 | "ldmia %[v1], {r1,r2} \n" | 144 | "ldmia %[v1], {r1,r2} \n" |
138 | "ldmia %[f2]!, {r3,r4} \n" | 145 | "ldmia %[f2]!, {r3,r4} \n" |
139 | "smlabb %[res], r1, r3, %[res] \n" | 146 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
144 | ADDHALFREGS(r0, r1, r3) | 151 | ADDHALFREGS(r0, r1, r3) |
145 | ADDHALFREGS(r1, r2, r4) | 152 | ADDHALFREGS(r1, r2, r4) |
146 | "stmia %[v1]!, {r0,r1} \n" | 153 | "stmia %[v1]!, {r0,r1} \n" |
147 | ".endr \n" | 154 | ) |
148 | #if ORDER > 16 | 155 | #if ORDER > 16 |
149 | "subs %[cnt], %[cnt], #1 \n" | 156 | "subs %[cnt], %[cnt], #1 \n" |
150 | "bne 1b \n" | 157 | "bne 1b \n" |
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
275 | SUBHALFREGS(r1, r2, r4) | 282 | SUBHALFREGS(r1, r2, r4) |
276 | "stmia %[v1]!, {r0,r1} \n" | 283 | "stmia %[v1]!, {r0,r1} \n" |
277 | 284 | ||
278 | ".rept 3 \n" | 285 | REPEAT_3( |
279 | "ldmia %[v1], {r1,r2} \n" | 286 | "ldmia %[v1], {r1,r2} \n" |
280 | "ldmia %[f2]!, {r3,r4} \n" | 287 | "ldmia %[f2]!, {r3,r4} \n" |
281 | "smlabb %[res], r1, r3, %[res] \n" | 288 | "smlabb %[res], r1, r3, %[res] \n" |
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
286 | SUBHALFREGS(r0, r1, r3) | 293 | SUBHALFREGS(r0, r1, r3) |
287 | SUBHALFREGS(r1, r2, r4) | 294 | SUBHALFREGS(r1, r2, r4) |
288 | "stmia %[v1]!, {r0,r1} \n" | 295 | "stmia %[v1]!, {r0,r1} \n" |
289 | ".endr \n" | 296 | ) |
290 | #if ORDER > 16 | 297 | #if ORDER > 16 |
291 | "subs %[cnt], %[cnt], #1 \n" | 298 | "subs %[cnt], %[cnt], #1 \n" |
292 | "bne 1b \n" | 299 | "bne 1b \n" |
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
318 | int cnt = ORDER>>5; | 325 | int cnt = ORDER>>5; |
319 | #endif | 326 | #endif |
320 | 327 | ||
321 | #if ORDER > 16 | ||
322 | #define MLA_BLOCKS "7" | ||
323 | #else | ||
324 | #define MLA_BLOCKS "3" | ||
325 | #endif | ||
326 | |||
327 | asm volatile ( | 328 | asm volatile ( |
328 | #if ORDER > 32 | 329 | #if ORDER > 32 |
329 | "mov %[res], #0 \n" | 330 | "mov %[res], #0 \n" |
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
347 | "smlabt %[res], r1, r2, %[res] \n" | 348 | "smlabt %[res], r1, r2, %[res] \n" |
348 | "smlatb %[res], r1, r3, %[res] \n" | 349 | "smlatb %[res], r1, r3, %[res] \n" |
349 | 350 | ||
350 | ".rept " MLA_BLOCKS "\n" | 351 | REPEAT_MLA( |
351 | "ldmia %[v1]!, {r0,r1} \n" | 352 | "ldmia %[v1]!, {r0,r1} \n" |
352 | "smlabt %[res], r0, r3, %[res] \n" | 353 | "smlabt %[res], r0, r3, %[res] \n" |
353 | "ldmia %[v2]!, {r2,r3} \n" | 354 | "ldmia %[v2]!, {r2,r3} \n" |
354 | "smlatb %[res], r0, r2, %[res] \n" | 355 | "smlatb %[res], r0, r2, %[res] \n" |
355 | "smlabt %[res], r1, r2, %[res] \n" | 356 | "smlabt %[res], r1, r2, %[res] \n" |
356 | "smlatb %[res], r1, r3, %[res] \n" | 357 | "smlatb %[res], r1, r3, %[res] \n" |
357 | ".endr \n" | 358 | ) |
358 | #if ORDER > 32 | 359 | #if ORDER > 32 |
359 | "subs %[cnt], %[cnt], #1 \n" | 360 | "subs %[cnt], %[cnt], #1 \n" |
360 | "bne 1b \n" | 361 | "bne 1b \n" |
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
374 | "smlabb %[res], r1, r3, %[res] \n" | 375 | "smlabb %[res], r1, r3, %[res] \n" |
375 | "smlatt %[res], r1, r3, %[res] \n" | 376 | "smlatt %[res], r1, r3, %[res] \n" |
376 | 377 | ||
377 | ".rept " MLA_BLOCKS "\n" | 378 | REPEAT_MLA( |
378 | "ldmia %[v1]!, {r0,r1} \n" | 379 | "ldmia %[v1]!, {r0,r1} \n" |
379 | "ldmia %[v2]!, {r2,r3} \n" | 380 | "ldmia %[v2]!, {r2,r3} \n" |
380 | "smlabb %[res], r0, r2, %[res] \n" | 381 | "smlabb %[res], r0, r2, %[res] \n" |
381 | "smlatt %[res], r0, r2, %[res] \n" | 382 | "smlatt %[res], r0, r2, %[res] \n" |
382 | "smlabb %[res], r1, r3, %[res] \n" | 383 | "smlabb %[res], r1, r3, %[res] \n" |
383 | "smlatt %[res], r1, r3, %[res] \n" | 384 | "smlatt %[res], r1, r3, %[res] \n" |
384 | ".endr \n" | 385 | ) |
385 | #if ORDER > 32 | 386 | #if ORDER > 32 |
386 | "subs %[cnt], %[cnt], #1 \n" | 387 | "subs %[cnt], %[cnt], #1 \n" |
387 | "bne 1b \n" | 388 | "bne 1b \n" |