From 30d286d859aad049fb549f48080a196f6074a9fa Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 26 Jul 2010 23:00:22 +0000 Subject: libwmapro: slightly shorter and faster inline asm fixed point multiplication routines, speedup is ~0.5%. Also don't lie to gcc about which vars are changed by the asm. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27584 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libwmapro/wmapro_math.h | 47 +++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'apps') diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 30b9a987ee..5220560998 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h @@ -95,37 +95,34 @@ /* Calculates: result = (X*Y)>>16 */ #define fixmul16(X,Y) \ ({ \ - int32_t t1, t2; \ + int32_t t, x = (X); \ asm volatile ( \ - "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ - "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ - "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \ - "moveq.l #15,%[t2] \n\t" \ - "asl.l %[t2],%[t1] \n\t" /* hi <<= 15, plus one free */ \ - "moveq.l #16,%[t2] \n\t" \ - "lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 16 */ \ - "or.l %[x],%[t1] \n\t" /* combine result */ \ - : [t1]"=&d"(t1), [t2]"=&d"(t2) \ - : [x] "d" ((X)), [y] "d" ((Y))); \ - t1; \ + "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ + "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ + "movclr.l %%acc0,%[t] \n\t" /* get higher half */ \ + "lsr.l #1,%[t] \n\t" /* hi >>= 1 to compensate emac shift */ \ + "move.w %[t],%[x] \n\t" /* combine halfwords */\ + "swap %[x] \n\t" \ + : [t]"=&d"(t), [x] "+d" (x) \ + : [y] "d" ((Y))); \ + x; \ }) /* Calculates: result = (X*Y)>>24 */ #define fixmul24(X,Y) \ ({ \ - int32_t t1, t2; \ + int32_t t, x = (X); \ asm volatile ( \ - "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ - "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ - "movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \ - "moveq.l #7,%[t2] \n\t" \ - "asl.l %[t2],%[t1] \n\t" /* hi <<= 7, plus one free */ \ - "moveq.l #24,%[t2] \n\t" \ - "lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 24 */ \ - "or.l %[x],%[t1] \n\t" /* combine result */ \ - : [t1]"=&d"(t1), [t2]"=&d"(t2) \ - : [x] "d" ((X)), [y] "d" ((Y))); \ - t1; \ + "mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \ + "mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \ + "moveq.l #24,%[t] \n\t" \ + "lsr.l %[t],%[x] \n\t" /* (unsigned)lo >>= 24 */ \ + "movclr.l %%acc0,%[t] \n\t" /* get higher half */ \ + "asl.l #7,%[t] \n\t" /* hi <<= 7, plus one free */ \ + "or.l %[x],%[t] \n\t" /* combine result */ \ + : [t]"=&d"(t), [x] "+d" (x) \ + : [y] "d" ((Y))); \ + t; \ }) /* Calculates: result = (X*Y)>>32 */ @@ -239,7 +236,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, { int i; for(i=0; i