summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/vector_math16_armv5te.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_armv5te.h')
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_armv5te.h29
1 files changed, 15 insertions, 14 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_armv5te.h b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
index 2940585a42..0a3679ce63 100644
--- a/apps/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/apps/codecs/demac/libdemac/vector_math16_armv5te.h
@@ -26,6 +26,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 26
27#define FUSED_VECTOR_MATH 27#define FUSED_VECTOR_MATH
28 28
29#define REPEAT_3(x) x x x
30#if ORDER > 16
31#define REPEAT_MLA(x) x x x x x x x
32#else
33#define REPEAT_MLA(x) x x x
34#endif
35
29/* Calculate scalarproduct, then add a 2nd vector (fused for performance) 36/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
30 * This version fetches data as 32 bit words, and *requires* v1 to be 37 * This version fetches data as 32 bit words, and *requires* v1 to be
31 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit 38 * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
@@ -133,7 +140,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
133 ADDHALFREGS(r1, r2, r4) 140 ADDHALFREGS(r1, r2, r4)
134 "stmia %[v1]!, {r0,r1} \n" 141 "stmia %[v1]!, {r0,r1} \n"
135 142
136 ".rept 3 \n" 143 REPEAT_3(
137 "ldmia %[v1], {r1,r2} \n" 144 "ldmia %[v1], {r1,r2} \n"
138 "ldmia %[f2]!, {r3,r4} \n" 145 "ldmia %[f2]!, {r3,r4} \n"
139 "smlabb %[res], r1, r3, %[res] \n" 146 "smlabb %[res], r1, r3, %[res] \n"
@@ -144,7 +151,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
144 ADDHALFREGS(r0, r1, r3) 151 ADDHALFREGS(r0, r1, r3)
145 ADDHALFREGS(r1, r2, r4) 152 ADDHALFREGS(r1, r2, r4)
146 "stmia %[v1]!, {r0,r1} \n" 153 "stmia %[v1]!, {r0,r1} \n"
147 ".endr \n" 154 )
148#if ORDER > 16 155#if ORDER > 16
149 "subs %[cnt], %[cnt], #1 \n" 156 "subs %[cnt], %[cnt], #1 \n"
150 "bne 1b \n" 157 "bne 1b \n"
@@ -275,7 +282,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
275 SUBHALFREGS(r1, r2, r4) 282 SUBHALFREGS(r1, r2, r4)
276 "stmia %[v1]!, {r0,r1} \n" 283 "stmia %[v1]!, {r0,r1} \n"
277 284
278 ".rept 3 \n" 285 REPEAT_3(
279 "ldmia %[v1], {r1,r2} \n" 286 "ldmia %[v1], {r1,r2} \n"
280 "ldmia %[f2]!, {r3,r4} \n" 287 "ldmia %[f2]!, {r3,r4} \n"
281 "smlabb %[res], r1, r3, %[res] \n" 288 "smlabb %[res], r1, r3, %[res] \n"
@@ -286,7 +293,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
286 SUBHALFREGS(r0, r1, r3) 293 SUBHALFREGS(r0, r1, r3)
287 SUBHALFREGS(r1, r2, r4) 294 SUBHALFREGS(r1, r2, r4)
288 "stmia %[v1]!, {r0,r1} \n" 295 "stmia %[v1]!, {r0,r1} \n"
289 ".endr \n" 296 )
290#if ORDER > 16 297#if ORDER > 16
291 "subs %[cnt], %[cnt], #1 \n" 298 "subs %[cnt], %[cnt], #1 \n"
292 "bne 1b \n" 299 "bne 1b \n"
@@ -318,12 +325,6 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
318 int cnt = ORDER>>5; 325 int cnt = ORDER>>5;
319#endif 326#endif
320 327
321#if ORDER > 16
322#define MLA_BLOCKS "7"
323#else
324#define MLA_BLOCKS "3"
325#endif
326
327 asm volatile ( 328 asm volatile (
328#if ORDER > 32 329#if ORDER > 32
329 "mov %[res], #0 \n" 330 "mov %[res], #0 \n"
@@ -347,14 +348,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
347 "smlabt %[res], r1, r2, %[res] \n" 348 "smlabt %[res], r1, r2, %[res] \n"
348 "smlatb %[res], r1, r3, %[res] \n" 349 "smlatb %[res], r1, r3, %[res] \n"
349 350
350 ".rept " MLA_BLOCKS "\n" 351 REPEAT_MLA(
351 "ldmia %[v1]!, {r0,r1} \n" 352 "ldmia %[v1]!, {r0,r1} \n"
352 "smlabt %[res], r0, r3, %[res] \n" 353 "smlabt %[res], r0, r3, %[res] \n"
353 "ldmia %[v2]!, {r2,r3} \n" 354 "ldmia %[v2]!, {r2,r3} \n"
354 "smlatb %[res], r0, r2, %[res] \n" 355 "smlatb %[res], r0, r2, %[res] \n"
355 "smlabt %[res], r1, r2, %[res] \n" 356 "smlabt %[res], r1, r2, %[res] \n"
356 "smlatb %[res], r1, r3, %[res] \n" 357 "smlatb %[res], r1, r3, %[res] \n"
357 ".endr \n" 358 )
358#if ORDER > 32 359#if ORDER > 32
359 "subs %[cnt], %[cnt], #1 \n" 360 "subs %[cnt], %[cnt], #1 \n"
360 "bne 1b \n" 361 "bne 1b \n"
@@ -374,14 +375,14 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
374 "smlabb %[res], r1, r3, %[res] \n" 375 "smlabb %[res], r1, r3, %[res] \n"
375 "smlatt %[res], r1, r3, %[res] \n" 376 "smlatt %[res], r1, r3, %[res] \n"
376 377
377 ".rept " MLA_BLOCKS "\n" 378 REPEAT_MLA(
378 "ldmia %[v1]!, {r0,r1} \n" 379 "ldmia %[v1]!, {r0,r1} \n"
379 "ldmia %[v2]!, {r2,r3} \n" 380 "ldmia %[v2]!, {r2,r3} \n"
380 "smlabb %[res], r0, r2, %[res] \n" 381 "smlabb %[res], r0, r2, %[res] \n"
381 "smlatt %[res], r0, r2, %[res] \n" 382 "smlatt %[res], r0, r2, %[res] \n"
382 "smlabb %[res], r1, r3, %[res] \n" 383 "smlabb %[res], r1, r3, %[res] \n"
383 "smlatt %[res], r1, r3, %[res] \n" 384 "smlatt %[res], r1, r3, %[res] \n"
384 ".endr \n" 385 )
385#if ORDER > 32 386#if ORDER > 32
386 "subs %[cnt], %[cnt], #1 \n" 387 "subs %[cnt], %[cnt], #1 \n"
387 "bne 1b \n" 388 "bne 1b \n"