diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-11-19 21:31:33 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-11-19 21:31:33 +0000 |
commit | 2a5053f58c1a33334776cc90264c67dde815cef3 (patch) | |
tree | 7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h | |
parent | 14d37cb4555703d216e954db15ccca2c34642dc3 (diff) | |
download | rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip |
Several tweaks and cleanups: * Use .rept instead of repeated macros for repeating blocks. * Use MUL (variant) instead of MLA (variant) in the first step of the ARM scalarproduct() if there's no loop. * Unroll ARM assembler functions to 32 where not already done, plus the generic scalarproduct().
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19144 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/vector_math16_cf.h')
-rw-r--r-- | apps/codecs/demac/libdemac/vector_math16_cf.h | 55 |
1 files changed, 22 insertions, 33 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 0c3aaca223..11e7f07adf 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h | |||
@@ -67,7 +67,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2) | |||
67 | "move.l %%d3, (%[v1])+ \n" | 67 | "move.l %%d3, (%[v1])+ \n" |
68 | "lea.l (16, %[v2]), %[v2] \n" | 68 | "lea.l (16, %[v2]), %[v2] \n" |
69 | "move.l %%d4, %%d0 \n" | 69 | "move.l %%d4, %%d0 \n" |
70 | 70 | ||
71 | "movem.l (%[v1]), %%a0-%%a3 \n" | 71 | "movem.l (%[v1]), %%a0-%%a3 \n" |
72 | "movem.l (%[v2]), %%d1-%%d4 \n" | 72 | "movem.l (%[v2]), %%d1-%%d4 \n" |
73 | ADDHALFXREGS(%%a0, %%d1, %%d0) | 73 | ADDHALFXREGS(%%a0, %%d1, %%d0) |
@@ -175,7 +175,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
175 | "move.l %%d3, (%[v1])+ \n" | 175 | "move.l %%d3, (%[v1])+ \n" |
176 | "lea.l (16, %[v2]), %[v2] \n" | 176 | "lea.l (16, %[v2]), %[v2] \n" |
177 | "move.l %%d4, %%d0 \n" | 177 | "move.l %%d4, %%d0 \n" |
178 | 178 | ||
179 | "movem.l (%[v2]), %%d1-%%d4 \n" | 179 | "movem.l (%[v2]), %%d1-%%d4 \n" |
180 | "movem.l (%[v1]), %%a0-%%a3 \n" | 180 | "movem.l (%[v1]), %%a0-%%a3 \n" |
181 | SUBHALFXREGS(%%a0, %%d1, %%d0) | 181 | SUBHALFXREGS(%%a0, %%d1, %%d0) |
@@ -207,7 +207,6 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
207 | "move.l %%d2, (%[v1])+ \n" | 207 | "move.l %%d2, (%[v1])+ \n" |
208 | SUBHALFREGS(%%a3, %%d4, %%d3) | 208 | SUBHALFREGS(%%a3, %%d4, %%d3) |
209 | "move.l %%d3, (%[v1])+ \n" | 209 | "move.l %%d3, (%[v1])+ \n" |
210 | |||
211 | "lea.l (16, %[v2]), %[v2] \n" | 210 | "lea.l (16, %[v2]), %[v2] \n" |
212 | 211 | ||
213 | "movem.l (%[v2]), %%d1-%%d4 \n" | 212 | "movem.l (%[v2]), %%d1-%%d4 \n" |
@@ -248,22 +247,16 @@ static inline void vector_sub(int16_t* v1, int16_t* v2) | |||
248 | * in signed integer mode - call above macro before use. */ | 247 | * in signed integer mode - call above macro before use. */ |
249 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | 248 | static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) |
250 | { | 249 | { |
251 | int res = 0; | 250 | int res; |
252 | #if ORDER > 32 | 251 | #if ORDER > 32 |
253 | int cnt = ORDER>>5; | 252 | int cnt = ORDER>>5; |
254 | #endif | 253 | #endif |
255 | 254 | ||
256 | #define MACBLOCK4 \ | 255 | #if ORDER > 16 |
257 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" \ | 256 | #define MAC_BLOCKS "7" |
258 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" \ | 257 | #else |
259 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" \ | 258 | #define MAC_BLOCKS "3" |
260 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 259 | #endif |
261 | |||
262 | #define MACBLOCK4_U2 \ | ||
263 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \ | ||
264 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" \ | ||
265 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \ | ||
266 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | ||
267 | 260 | ||
268 | asm volatile ( | 261 | asm volatile ( |
269 | "move.l %[v2], %%d0 \n" | 262 | "move.l %[v2], %%d0 \n" |
@@ -274,15 +267,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
274 | "move.l (%[v1])+, %%d0 \n" | 267 | "move.l (%[v1])+, %%d0 \n" |
275 | "move.w (%[v2])+, %%d1 \n" | 268 | "move.w (%[v2])+, %%d1 \n" |
276 | "1: \n" | 269 | "1: \n" |
277 | #if ORDER > 16 | 270 | ".rept " MAC_BLOCKS "\n" |
278 | MACBLOCK4_U2 | 271 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
279 | MACBLOCK4_U2 | 272 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
280 | MACBLOCK4_U2 | 273 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
281 | MACBLOCK4_U2 | 274 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
282 | #endif | 275 | ".endr \n" |
283 | MACBLOCK4_U2 | 276 | |
284 | MACBLOCK4_U2 | ||
285 | MACBLOCK4_U2 | ||
286 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 277 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
287 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" | 278 | "mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
288 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 279 | "mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
@@ -299,15 +290,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2) | |||
299 | "move.l (%[v1])+, %%d0 \n" | 290 | "move.l (%[v1])+, %%d0 \n" |
300 | "move.l (%[v2])+, %%d1 \n" | 291 | "move.l (%[v2])+, %%d1 \n" |
301 | "1: \n" | 292 | "1: \n" |
302 | #if ORDER > 16 | 293 | ".rept " MAC_BLOCKS "\n" |
303 | MACBLOCK4 | 294 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
304 | MACBLOCK4 | 295 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
305 | MACBLOCK4 | 296 | "mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" |
306 | MACBLOCK4 | 297 | "mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
307 | #endif | 298 | ".endr \n" |
308 | MACBLOCK4 | 299 | |
309 | MACBLOCK4 | ||
310 | MACBLOCK4 | ||
311 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" | 300 | "mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" |
312 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" | 301 | "mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" |
313 | #if ORDER > 32 | 302 | #if ORDER > 32 |