From 3bb8020f787514cd853d17e6d5ee9df29b156e28 Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Wed, 28 Jul 2010 19:36:15 +0000 Subject: Add ARM assembler to libwmapro vector_fixmul_scalar(). Speeds up decoding by 1% on PP5022. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27603 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libwmapro/wmapro_math.h | 66 +++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 18 deletions(-) (limited to 'apps') diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 6f8d6dbe0f..c94fa80271 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h @@ -180,7 +180,7 @@ } #endif /* CPU_COLDFIRE, CPU_ARM */ -#ifdef CPU_COLDFIRE +#if defined(CPU_COLDFIRE) static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len) @@ -190,23 +190,23 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, win += len; src0+= len; for(i=-len, j=len-1; i<0; i++, j--) { - int32_t s0 = src0[i]; - int32_t s1 = src1[j]; - int32_t wi = -win[i]; - int32_t wj = -win[j]; - - asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t" - "msac.l %[s1], %[wi], %%acc0\n\t" - "mac.l %[s0], %[wi], %%acc1\n\t" - "mac.l %[s1], %[wj], %%acc1\n\t" - "movclr.l %%acc0, %[s0]\n\t" - "move.l %[s0], (%[dst_i])\n\t" - "movclr.l %%acc1, %[s0]\n\t" - "move.l %[s0], (%[dst_j])\n\t" - : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ - : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), - [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) - : "cc", "memory"); + int32_t s0 = src0[i]; + int32_t s1 = src1[j]; + int32_t wi = -win[i]; + int32_t wj = -win[j]; + asm volatile ( + "mac.l %[s0], %[wj], %%acc0\n\t" + "msac.l %[s1], %[wi], %%acc0\n\t" + "mac.l %[s0], %[wi], %%acc1\n\t" + "mac.l %[s1], %[wj], %%acc1\n\t" + "movclr.l %%acc0, %[s0]\n\t" + "move.l %[s0], (%[dst_i])\n\t" + "movclr.l %%acc1, %[s0]\n\t" + "move.l %[s0], (%[dst_j])\n\t" + : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ + : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), + [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) + : "cc", "memory"); } } #else @@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, } #endif +#if defined(CPU_ARM) +static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, + int32_t mul, int len) +{ + /* len is _always_ a multiple of 4, because len is the difference of sfb's + * which themselves are always a multiple of 4. */ + int i; + for (i=0; i