diff options
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_cf.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_cf.h | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 6e8216c9cc..4d77d3be31 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h | |||
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
28 | 28 | ||
29 | #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ | 29 | #define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */ |
30 | 30 | ||
31 | #define REPEAT_2(x) x x | ||
32 | #define REPEAT_3(x) x x x | ||
33 | #define REPEAT_7(x) x x x x x x x | ||
34 | |||
31 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) | 35 | /* Calculate scalarproduct, then add a 2nd vector (fused for performance) |
32 | * This version fetches data as 32 bit words, and *recommends* v1 to be | 36 | * This version fetches data as 32 bit words, and *recommends* v1 to be |
33 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit | 37 | * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit |
@@ -64,7 +68,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
64 | "move.w (%[s2])+, %%d1 \n" | 68 | "move.w (%[s2])+, %%d1 \n" |
65 | "swap %%d1 \n" | 69 | "swap %%d1 \n" |
66 | "1: \n" | 70 | "1: \n" |
67 | ".rept 2 \n" | 71 | REPEAT_2( |
68 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | 72 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" |
69 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | 73 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" |
70 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | 74 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" |
@@ -82,7 +86,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2) | |||
82 | "move.l %%d6, (%[v1])+ \n" | 86 | "move.l %%d6, (%[v1])+ \n" |
83 | ADDHALFXREGS(%%a1, %%d1, %%d7) | 87 | ADDHALFXREGS(%%a1, %%d1, %%d7) |
84 | "move.l %%d7, (%[v1])+ \n" | 88 | "move.l %%d7, (%[v1])+ \n" |
85 | ".endr \n" | 89 | ) |
86 | 90 | ||
87 | #if ORDER > 16 | 91 | #if ORDER > 16 |
88 | "subq.l #1, %[res] \n" | 92 | "subq.l #1, %[res] \n" |
@@ -193,7 +197,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
193 | "move.w (%[s2])+, %%d1 \n" | 197 | "move.w (%[s2])+, %%d1 \n" |
194 | "swap %%d1 \n" | 198 | "swap %%d1 \n" |
195 | "1: \n" | 199 | "1: \n" |
196 | ".rept 2 \n" | 200 | REPEAT_2( |
197 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" | 201 | "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n" |
198 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" | 202 | "mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n" |
199 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" | 203 | "mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n" |
@@ -211,7 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2) | |||
211 | "move.l %%d6, (%[v1])+ \n" | 215 | "move.l %%d6, (%[v1])+ \n" |
212 | SUBHALFXREGS(%%a1, %%d1, %%d7) | 216 | SUBHALFXREGS(%%a1, %%d1, %%d7) |
213 | "move.l %%d7, (%[v1])+ \n" | 217 | "move.l %%d7, (%[v1])+ \n" |
214 | ".endr \n" | 218 | ) |
215 | 219 | ||
216 | #if ORDER > 16 | 220 | #if ORDER > 16 |
217 | "subq.l #1, %[res] \n" | 221 | "subq.l #1, %[res] \n" |
@@ -305,10 +309,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
305 | "move.l (%[v1])+, %%d0 \n" | 309 | "move.l (%[v1])+, %%d0 \n" |
306 | "move.w (%[v2])+, %%d1 \n" | 310 | "move.w (%[v2])+, %%d1 \n" |
307 | "1: \n" | 311 | "1: \n" |
308 | ".rept 7 \n" | 312 | REPEAT_7( |
309 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 313 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
310 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | 314 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
311 | ".endr \n" | 315 | ) |
312 | 316 | ||
313 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 317 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
314 | #if ORDER > 16 | 318 | #if ORDER > 16 |
@@ -324,12 +328,12 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
324 | "move.l (%[v1])+, %%d0 \n" | 328 | "move.l (%[v1])+, %%d0 \n" |
325 | "move.l (%[v2])+, %%d1 \n" | 329 | "move.l (%[v2])+, %%d1 \n" |
326 | "1: \n" | 330 | "1: \n" |
327 | ".rept 3 \n" | 331 | REPEAT_3( |
328 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | 332 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
329 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 333 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
330 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" | 334 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
331 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 335 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
332 | ".endr \n" | 336 | ) |
333 | 337 | ||
334 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | 338 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
335 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 339 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |