From e7cdd6cbc6040c3c6225580ba155edfdfd35efb1 Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Wed, 4 Jul 2007 19:23:18 +0000 Subject: Assemblerised CMUL() for ARM, giving ~20% speedup. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13787 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libwma/wmadeci.c | 24 +++++++++++++++++++++++- apps/codecs/libwma/wmafixed.h | 24 ------------------------ 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'apps/codecs') diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index 6647ed4b40..29651382e2 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c @@ -50,7 +50,29 @@ uint32_t bswap_32(uint32_t x) return (b1 >> 24) | (b2 >> 8) | (b3 << 8) | (b4 << 24); } -#ifdef CPU_COLDFIRE +#ifdef CPU_ARM +static inline +void CMUL(fixed32 *x, fixed32 *y, + fixed32 a, fixed32 b, + fixed32 t, fixed32 v) +{ + /* This version loses one bit of precision. Could be solved at the cost + * of 2 extra cycles if it becomes an issue. */ + int x1, y1, l; + asm( + "smull %[l], %[y1], %[b], %[t] \n" + "smlal %[l], %[y1], %[a], %[v] \n" + "rsb %[b], %[b], #0 \n" + "smull %[l], %[x1], %[a], %[t] \n" + "smlal %[l], %[x1], %[b], %[v] \n" + : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b) + : [a] "r" (a), [t] "r" (t), [v] "r" (v) + : "cc" + ); + *x = x1 << 1; + *y = y1 << 1; +} +#elif defined CPU_COLDFIRE static inline void CMUL(fixed32 *x, fixed32 *y, fixed32 a, fixed32 b, diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 887973a78a..99ddec759e 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h @@ -61,30 +61,6 @@ long fsincos(unsigned long phase, fixed32 *cos); __result; \ }) -/* - Special fixmul32 that does a 16.16 x 1.31 multiply that returns a 16.16 value. - this is needed because the fft constants are all normalized to be less then 1 - and can't fit into a 16 bit number without excessive rounding - - -*/ - - -# define fixmul32b(x, y) \ - ({ int32_t __hi; \ - uint32_t __lo; \ - int32_t __result; \ - asm ("smull %0, %1, %3, %4\n\t" \ - "movs %0, %0, lsr %5\n\t" \ - "adc %2, %0, %1, lsl %6" \ - : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \ - : "%r" (x), "r" (y), \ - "M" (31), "M" (1) \ - : "cc"); \ - __result; \ - }) - - #elif defined(CPU_COLDFIRE) static inline int32_t fixmul32(int32_t x, int32_t y) { -- cgit v1.2.3