Several tweaks and cleanups: * Use .rept instead of repeated macros for repeating blocks. * Use MUL (variant) instead of MLA (variant) in the first step of the ARM scalarproduct() if there's no loop. * Unroll ARM assembler functions to 32 where not already done, plus the generic scalarproduct().

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19144 a1c6a512-1295-4272-9138-f99709370657
author: Jens Arnold <amiconn@rockbox.org> 2008-11-19 21:31:33 +0000
committer: Jens Arnold <amiconn@rockbox.org> 2008-11-19 21:31:33 +0000
commit: 2a5053f58c1a33334776cc90264c67dde815cef3 (patch)
tree: 7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h
parent: 14d37cb4555703d216e954db15ccca2c34642dc3 (diff)
download: rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz
rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip
1 files changed, 22 insertions, 33 deletions
diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h
index 0c3aaca223..11e7f07adf 100644
--- a/apps/codecs/demac/libdemac/vector_math16_cf.h
+++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -67,7 +67,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
        "move.l  %%d3, (%[v1])+      \n"
        "lea.l   (16, %[v2]), %[v2]  \n"
        "move.l  %%d4, %%d0          \n"
-        
        "movem.l (%[v1]), %%a0-%%a3  \n"
        "movem.l (%[v2]), %%d1-%%d4  \n"
        ADDHALFXREGS(%%a0, %%d1, %%d0)
@@ -175,7 +175,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
        "move.l  %%d3, (%[v1])+      \n"
        "lea.l   (16, %[v2]), %[v2]  \n"
        "move.l  %%d4, %%d0          \n"
-        
        "movem.l (%[v2]), %%d1-%%d4  \n"
        "movem.l (%[v1]), %%a0-%%a3  \n"
        SUBHALFXREGS(%%a0, %%d1, %%d0)
@@ -207,7 +207,6 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
        "move.l  %%d2, (%[v1])+      \n"
        SUBHALFREGS(%%a3, %%d4, %%d3)
        "move.l  %%d3, (%[v1])+      \n"
        "lea.l   (16, %[v2]), %[v2]  \n"
        "movem.l (%[v2]), %%d1-%%d4  \n"
@@ -248,22 +247,16 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
 * in signed integer mode - call above macro before use. */
 static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
 {
-    int res = 0;
+    int res;
 #if ORDER > 32
    int cnt = ORDER>>5;
 #endif
-#define MACBLOCK4                                        \
+#if ORDER > 16
-        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" \
+#define MAC_BLOCKS "7"
-        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
+#else
-        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
+#define MAC_BLOCKS "3"
-        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+#endif
-#define MACBLOCK4_U2                                     \
-        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
-        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
-        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
-        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
    asm volatile (
        "move.l  %[v2], %%d0                         \n"
@@ -274,15 +267,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
        "move.l  (%[v1])+, %%d0                      \n"
        "move.w  (%[v2])+, %%d1                      \n"
    "1:                                              \n"
-#if ORDER > 16
+        ".rept " MAC_BLOCKS                         "\n"
-        MACBLOCK4_U2
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-        MACBLOCK4_U2
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
-        MACBLOCK4_U2
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-        MACBLOCK4_U2
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
-#endif
+        ".endr                                       \n"
-        MACBLOCK4_U2
-        MACBLOCK4_U2
-        MACBLOCK4_U2
        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
@@ -299,15 +290,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
        "move.l  (%[v1])+, %%d0                      \n"
        "move.l  (%[v2])+, %%d1                      \n"
    "1:                                              \n"
-#if ORDER > 16
+        ".rept " MAC_BLOCKS                         "\n"
-        MACBLOCK4
+        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
-        MACBLOCK4
+        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-        MACBLOCK4
+        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
-        MACBLOCK4
+        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
-#endif
+        ".endr                                       \n"
-        MACBLOCK4
-        MACBLOCK4
-        MACBLOCK4
        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
 #if ORDER > 32
author	Jens Arnold <amiconn@rockbox.org>	2008-11-19 21:31:33 +0000
committer	Jens Arnold <amiconn@rockbox.org>	2008-11-19 21:31:33 +0000
commit	2a5053f58c1a33334776cc90264c67dde815cef3 (patch)
tree	7acc0727874ff6b307eff293a18172a3239cd895 /apps/codecs/demac/libdemac/vector_math16_cf.h
parent	14d37cb4555703d216e954db15ccca2c34642dc3 (diff)
download	rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.tar.gz rockbox-2a5053f58c1a33334776cc90264c67dde815cef3.zip

diff --git a/apps/codecs/demac/libdemac/vector_math16_cf.h b/apps/codecs/demac/libdemac/vector_math16_cf.h index 0c3aaca223..11e7f07adf 100644 --- a/apps/codecs/demac/libdemac/vector_math16_cf.h +++ b/apps/codecs/demac/libdemac/vector_math16_cf.h
@@ -67,7 +67,7 @@ static inline void vector_add(int16_t* v1, int16_t* v2)
67	"move.l %%d3, (%[v1])+ \n"	67	"move.l %%d3, (%[v1])+ \n"
68	"lea.l (16, %[v2]), %[v2] \n"	68	"lea.l (16, %[v2]), %[v2] \n"
69	"move.l %%d4, %%d0 \n"	69	"move.l %%d4, %%d0 \n"
70		70
71	"movem.l (%[v1]), %%a0-%%a3 \n"	71	"movem.l (%[v1]), %%a0-%%a3 \n"
72	"movem.l (%[v2]), %%d1-%%d4 \n"	72	"movem.l (%[v2]), %%d1-%%d4 \n"
73	ADDHALFXREGS(%%a0, %%d1, %%d0)	73	ADDHALFXREGS(%%a0, %%d1, %%d0)
@@ -175,7 +175,7 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
175	"move.l %%d3, (%[v1])+ \n"	175	"move.l %%d3, (%[v1])+ \n"
176	"lea.l (16, %[v2]), %[v2] \n"	176	"lea.l (16, %[v2]), %[v2] \n"
177	"move.l %%d4, %%d0 \n"	177	"move.l %%d4, %%d0 \n"
178		178
179	"movem.l (%[v2]), %%d1-%%d4 \n"	179	"movem.l (%[v2]), %%d1-%%d4 \n"
180	"movem.l (%[v1]), %%a0-%%a3 \n"	180	"movem.l (%[v1]), %%a0-%%a3 \n"
181	SUBHALFXREGS(%%a0, %%d1, %%d0)	181	SUBHALFXREGS(%%a0, %%d1, %%d0)
@@ -207,7 +207,6 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
207	"move.l %%d2, (%[v1])+ \n"	207	"move.l %%d2, (%[v1])+ \n"
208	SUBHALFREGS(%%a3, %%d4, %%d3)	208	SUBHALFREGS(%%a3, %%d4, %%d3)
209	"move.l %%d3, (%[v1])+ \n"	209	"move.l %%d3, (%[v1])+ \n"
210
211	"lea.l (16, %[v2]), %[v2] \n"	210	"lea.l (16, %[v2]), %[v2] \n"
212		211
213	"movem.l (%[v2]), %%d1-%%d4 \n"	212	"movem.l (%[v2]), %%d1-%%d4 \n"
@@ -248,22 +247,16 @@ static inline void vector_sub(int16_t* v1, int16_t* v2)
248	* in signed integer mode - call above macro before use. */	247	* in signed integer mode - call above macro before use. */
249	static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)	248	static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
250	{	249	{
251	int res = 0;	250	int res;
252	#if ORDER > 32	251	#if ORDER > 32
253	int cnt = ORDER>>5;	252	int cnt = ORDER>>5;
254	#endif	253	#endif
255		254
256	#define MACBLOCK4 \	255	#if ORDER > 16
257	"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n" \	256	#define MAC_BLOCKS "7"
258	"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n" \	257	#else
259	"mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n" \	258	#define MAC_BLOCKS "3"
260	"mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"	259	#endif
261
262	#define MACBLOCK4_U2 \
263	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
264	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n" \
265	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n" \
266	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
267		260
268	asm volatile (	261	asm volatile (
269	"move.l %[v2], %%d0 \n"	262	"move.l %[v2], %%d0 \n"
@@ -274,15 +267,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
274	"move.l (%[v1])+, %%d0 \n"	267	"move.l (%[v1])+, %%d0 \n"
275	"move.w (%[v2])+, %%d1 \n"	268	"move.w (%[v2])+, %%d1 \n"
276	"1: \n"	269	"1: \n"
277	#if ORDER > 16	270	".rept " MAC_BLOCKS "\n"
278	MACBLOCK4_U2	271	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
279	MACBLOCK4_U2	272	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
280	MACBLOCK4_U2	273	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
281	MACBLOCK4_U2	274	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
282	#endif	275	".endr \n"
283	MACBLOCK4_U2	276
284	MACBLOCK4_U2
285	MACBLOCK4_U2
286	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"	277	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
287	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"	278	"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
288	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"	279	"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
@@ -299,15 +290,13 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
299	"move.l (%[v1])+, %%d0 \n"	290	"move.l (%[v1])+, %%d0 \n"
300	"move.l (%[v2])+, %%d1 \n"	291	"move.l (%[v2])+, %%d1 \n"
301	"1: \n"	292	"1: \n"
302	#if ORDER > 16	293	".rept " MAC_BLOCKS "\n"
303	MACBLOCK4	294	"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
304	MACBLOCK4	295	"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
305	MACBLOCK4	296	"mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
306	MACBLOCK4	297	"mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
307	#endif	298	".endr \n"
308	MACBLOCK4	299
309	MACBLOCK4
310	MACBLOCK4
311	"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"	300	"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
312	"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"	301	"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
313	#if ORDER > 32	302	#if ORDER > 32