From 025eed5c745513ff181762a172d2eda7b886c40d Mon Sep 17 00:00:00 2001
From: Nils Wallménius <nils@rockbox.org>
Date: Mon, 26 Jul 2010 11:15:25 +0000
Subject: libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of
 ~13%, drop the add_bias argument for the vector_fixmul_window function, since
 it was always 0

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/libwmapro/wmapro_math.h | 43 +++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 7 deletions(-)

(limited to 'apps/codecs/libwmapro/wmapro_math.h')

diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index 823c002c09..71cc3d33d7 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -19,10 +19,39 @@ static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
     return (int32_t)temp;
 }
 
+#ifdef CPU_COLDFIRE
+static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
+                                   const int32_t *src1, const int32_t *win, 
+                                   int len)
+{
+    int i, j;
+    dst += len;
+    win += len;
+    src0+= len;
+        for(i=-len, j=len-1; i<0; i++, j--) {
+        int32_t s0 = src0[i];
+        int32_t s1 = src1[j];
+        int32_t wi = -win[i];
+        int32_t wj = -win[j];
 
+        asm volatile ("mac.l    %[s0], %[wj], %%acc0\n\t"
+                      "msac.l   %[s1], %[wi], %%acc0\n\t"
+                      "mac.l    %[s0], %[wi], %%acc1\n\t"
+                      "mac.l    %[s1], %[wj], %%acc1\n\t"
+                      "movclr.l %%acc0, %[s0]\n\t"
+                      "move.l   %[s0], (%[dst_i])\n\t"
+                      "movclr.l %%acc1, %[s0]\n\t"
+                      "move.l   %[s0], (%[dst_j])\n\t"
+                      : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
+                      : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
+                        [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
+                      : "cc", "memory");
+    }
+}
+#else
 static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
                                    const int32_t *src1, const int32_t *win, 
-                                   int32_t add_bias, int len)
+                                   int len)
 {
     int i, j;
     dst += len;
@@ -31,13 +60,13 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
         for(i=-len, j=len-1; i<0; i++, j--) {
         int32_t s0 = src0[i];
         int32_t s1 = src1[j];
-        int32_t wi = win[i];
-        int32_t wj = win[j];
-        dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16);
-        dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16);
-    }   
-    
+        int32_t wi = -win[i];
+        int32_t wj = -win[j];
+        dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31);
+        dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31);
+    }
 }
+#endif
 
 static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul,
                                         int len, int shift)
-- 
cgit v1.2.3