1 files changed, 48 insertions, 18 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index 6f8d6dbe0f..c94fa80271 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -180,7 +180,7 @@
    }
 #endif /* CPU_COLDFIRE, CPU_ARM */
-#ifdef CPU_COLDFIRE
+#if defined(CPU_COLDFIRE)
 static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
                                   const int32_t *src1, const int32_t *win, 
                                   int len)
@@ -190,23 +190,23 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
    win += len;
    src0+= len;
        for(i=-len, j=len-1; i<0; i++, j--) {
-        int32_t s0 = src0[i];
+            int32_t s0 = src0[i];
-        int32_t s1 = src1[j];
+            int32_t s1 = src1[j];
-        int32_t wi = -win[i];
+            int32_t wi = -win[i];
-        int32_t wj = -win[j];
+            int32_t wj = -win[j];
+            asm volatile (
-        asm volatile ("mac.l    %[s0], %[wj], %%acc0\n\t"
+                "mac.l    %[s0], %[wj], %%acc0\n\t"
-                      "msac.l   %[s1], %[wi], %%acc0\n\t"
+                "msac.l   %[s1], %[wi], %%acc0\n\t"
-                      "mac.l    %[s0], %[wi], %%acc1\n\t"
+                "mac.l    %[s0], %[wi], %%acc1\n\t"
-                      "mac.l    %[s1], %[wj], %%acc1\n\t"
+                "mac.l    %[s1], %[wj], %%acc1\n\t"
-                      "movclr.l %%acc0, %[s0]\n\t"
+                "movclr.l %%acc0, %[s0]\n\t"
-                      "move.l   %[s0], (%[dst_i])\n\t"
+                "move.l   %[s0], (%[dst_i])\n\t"
-                      "movclr.l %%acc1, %[s0]\n\t"
+                "movclr.l %%acc1, %[s0]\n\t"
-                      "move.l   %[s0], (%[dst_j])\n\t"
+                "move.l   %[s0], (%[dst_j])\n\t"
-                      : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
+                : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
-                      : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
+                : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
-                        [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
+                  [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
-                      : "cc", "memory");
+                : "cc", "memory");
    }
 }
 #else
@@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
 }
 #endif
+#if defined(CPU_ARM)
+static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 
+                                        int32_t mul, int len)
+{
+    /* len is _always_ a multiple of 4, because len is the difference of sfb's
+     * which themselves are always a multiple of 4. */
+    int i;
+    for (i=0; i<len; i+=4) {
+        asm volatile (
+            "ldmia %[src]!, {r1-r4}    \n\t"
+            "smull r0, r5, r1, %[mul] \n\t"
+            "mov   r0, r0, lsr #24    \n\t"
+            "orr   r0, r0, r5, lsl #8 \n\t"
+            "smull r1, r5, r2, %[mul] \n\t"
+            "mov   r1, r1, lsr #24    \n\t"
+            "orr   r1, r1, r5, lsl #8 \n\t"
+            "smull r2, r5, r3, %[mul] \n\t"
+            "mov   r2, r2, lsr #24    \n\t"
+            "orr   r2, r2, r5, lsl #8 \n\t"
+            "smull r3, r5, r4, %[mul] \n\t"
+            "mov   r3, r3, lsr #24    \n\t"
+            "orr   r3, r3, r5, lsl #8 \n\t"
+            "stmia %[dst]!, {r0-r3}    \n"
+            : [dst]"+r"(dst), [src]"+r"(src)
+            : [mul]"r"(mul)
+            : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
+    }
+}
+#else
 static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 
                                        int32_t mul, int len)
 {
@@ -242,6 +271,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
        dst[i+3] = fixmul24(src[i+3], mul);
    }
 }
+#endif /* CPU_ARM */
 static inline int av_clip(int a, int amin, int amax)
 {

diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 6f8d6dbe0f..c94fa80271 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -180,7 +180,7 @@
180	}	180	}
181	#endif /* CPU_COLDFIRE, CPU_ARM */	181	#endif /* CPU_COLDFIRE, CPU_ARM */
182		182
183	#ifdef CPU_COLDFIRE	183	#if defined(CPU_COLDFIRE)
184	static inline void vector_fixmul_window(int32_t dst, const int32_t src0,	184	static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
185	const int32_t src1, const int32_t win,	185	const int32_t src1, const int32_t win,
186	int len)	186	int len)
@@ -190,23 +190,23 @@ static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
190	win += len;	190	win += len;
191	src0+= len;	191	src0+= len;
192	for(i=-len, j=len-1; i<0; i++, j--) {	192	for(i=-len, j=len-1; i<0; i++, j--) {
193	int32_t s0 = src0[i];	193	int32_t s0 = src0[i];
194	int32_t s1 = src1[j];	194	int32_t s1 = src1[j];
195	int32_t wi = -win[i];	195	int32_t wi = -win[i];
196	int32_t wj = -win[j];	196	int32_t wj = -win[j];
197		197	asm volatile (
198	asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"	198	"mac.l %[s0], %[wj], %%acc0\n\t"
199	"msac.l %[s1], %[wi], %%acc0\n\t"	199	"msac.l %[s1], %[wi], %%acc0\n\t"
200	"mac.l %[s0], %[wi], %%acc1\n\t"	200	"mac.l %[s0], %[wi], %%acc1\n\t"
201	"mac.l %[s1], %[wj], %%acc1\n\t"	201	"mac.l %[s1], %[wj], %%acc1\n\t"
202	"movclr.l %%acc0, %[s0]\n\t"	202	"movclr.l %%acc0, %[s0]\n\t"
203	"move.l %[s0], (%[dst_i])\n\t"	203	"move.l %[s0], (%[dst_i])\n\t"
204	"movclr.l %%acc1, %[s0]\n\t"	204	"movclr.l %%acc1, %[s0]\n\t"
205	"move.l %[s0], (%[dst_j])\n\t"	205	"move.l %[s0], (%[dst_j])\n\t"
206	: [s0] "+r" (s0) /* this register is clobbered so specify it as an input */	206	: [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
207	: [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),	207	: [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
208	[s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)	208	[s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
209	: "cc", "memory");	209	: "cc", "memory");
210	}	210	}
211	}	211	}
212	#else	212	#else
@@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
229	}	229	}
230	#endif	230	#endif
231		231
		232	#if defined(CPU_ARM)
		233	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,
		234	int32_t mul, int len)
		235	{
		236	/* len is _always_ a multiple of 4, because len is the difference of sfb's
		237	* which themselves are always a multiple of 4. */
		238	int i;
		239	for (i=0; i<len; i+=4) {
		240	asm volatile (
		241	"ldmia %[src]!, {r1-r4} \n\t"
		242	"smull r0, r5, r1, %[mul] \n\t"
		243	"mov r0, r0, lsr #24 \n\t"
		244	"orr r0, r0, r5, lsl #8 \n\t"
		245	"smull r1, r5, r2, %[mul] \n\t"
		246	"mov r1, r1, lsr #24 \n\t"
		247	"orr r1, r1, r5, lsl #8 \n\t"
		248	"smull r2, r5, r3, %[mul] \n\t"
		249	"mov r2, r2, lsr #24 \n\t"
		250	"orr r2, r2, r5, lsl #8 \n\t"
		251	"smull r3, r5, r4, %[mul] \n\t"
		252	"mov r3, r3, lsr #24 \n\t"
		253	"orr r3, r3, r5, lsl #8 \n\t"
		254	"stmia %[dst]!, {r0-r3} \n"
		255	: [dst]"+r"(dst), [src]"+r"(src)
		256	: [mul]"r"(mul)
		257	: "r0", "r1", "r2", "r3", "r4", "r5", "memory");
		258	}
		259	}
		260	#else
232	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,	261	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,
233	int32_t mul, int len)	262	int32_t mul, int len)
234	{	263	{
@@ -242,6 +271,7 @@ static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,
242	dst[i+3] = fixmul24(src[i+3], mul);	271	dst[i+3] = fixmul24(src[i+3], mul);
243	}	272	}
244	}	273	}
		274	#endif /* CPU_ARM */
245		275
246	static inline int av_clip(int a, int amin, int amax)	276	static inline int av_clip(int a, int amin, int amax)
247	{	277	{