From 6087d8960328b427f495d46d74664a4b0e32af17 Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Mon, 26 Jul 2010 17:22:35 +0000 Subject: libatrac: Coldfire asm for atrac3_iqmf_dewindowing, speeds up decoding of my atrac3 rm file by 45%. Based on preliminary patch by Andree Buschmann git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27578 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libatrac/atrac3.c | 53 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index 4c0b7d079f..b93b058bb2 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c @@ -124,6 +124,57 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; int32_t *in, int32_t *win, unsigned int nIn); +#elif defined (CPU_COLDFIRE) + #define MULTIPLY_ADD_BLOCK \ + "movem.l (%[win]), %%d0-%%d7 \n\t" \ + "lea.l (8*4, %[win]), %[win] \n\t" \ + "mac.l %%d0, %%a5, (%[in])+, %%a5, %%acc0\n\t" \ + "mac.l %%d1, %%a5, (%[in])+, %%a5, %%acc1\n\t" \ + "mac.l %%d2, %%a5, (%[in])+, %%a5, %%acc0\n\t" \ + "mac.l %%d3, %%a5, (%[in])+, %%a5, %%acc1\n\t" \ + "mac.l %%d4, %%a5, (%[in])+, %%a5, %%acc0\n\t" \ + "mac.l %%d5, %%a5, (%[in])+, %%a5, %%acc1\n\t" \ + "mac.l %%d6, %%a5, (%[in])+, %%a5, %%acc0\n\t" \ + "mac.l %%d7, %%a5, (%[in])+, %%a5, %%acc1\n\t" \ + + + static inline void + atrac3_iqmf_dewindowing(int32_t *out, + int32_t *in, + int32_t *win, + unsigned int nIn) + { + int32_t j; + int32_t *_in, *_win; + for (j = nIn; j != 0; j--, in+=2, out+=2) { + _in = in; + _win = win; + + asm volatile ( + "move.l (%[in])+, %%a5 \n\t" /* preload frist in value */ + MULTIPLY_ADD_BLOCK /* 0.. 7 */ + MULTIPLY_ADD_BLOCK /* 8..15 */ + MULTIPLY_ADD_BLOCK /* 16..23 */ + MULTIPLY_ADD_BLOCK /* 24..31 */ + MULTIPLY_ADD_BLOCK /* 32..39 */ + /* 40..47 */ + "movem.l (%[win]), %%d0-%%d7 \n\t" + "mac.l %%d0, %%a5, (%[in])+, %%a5, %%acc0 \n\t" + "mac.l %%d1, %%a5, (%[in])+, %%a5, %%acc1 \n\t" + "mac.l %%d2, %%a5, (%[in])+, %%a5, %%acc0 \n\t" + "mac.l %%d3, %%a5, (%[in])+, %%a5, %%acc1 \n\t" + "mac.l %%d4, %%a5, (%[in])+, %%a5, %%acc0 \n\t" + "mac.l %%d5, %%a5, (%[in])+, %%a5, %%acc1 \n\t" + "mac.l %%d6, %%a5, (%[in])+, %%a5, %%acc0 \n\t" + "mac.l %%d7, %%a5, %%acc1 \n\t" + "movclr.l %%acc0, %%d1 \n\t" /* s1 */ + "movclr.l %%acc1, %%d0 \n\t" /* s2 */ + "movem.l %%d0-%%d1, (%[out]) \n\t" + : [in] "+a" (_in), [win] "+a" (_win) + : [out] "a" (out) + : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a5", "memory"); + } + } #else #define MULTIPLY_ADD_BLOCK(y1, y2, x, c, k) \ y1 += fixmul31(c[k], x[k]); k++; \ @@ -134,7 +185,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; y2 += fixmul31(c[k], x[k]); k++; \ y1 += fixmul31(c[k], x[k]); k++; \ y2 += fixmul31(c[k], x[k]); k++; - + static inline void atrac3_iqmf_dewindowing(int32_t *out, int32_t *in, -- cgit v1.2.3