From 3f08357f2b89277d6196a46b099f7143ef14f223 Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Thu, 15 Sep 2011 08:31:39 +0000 Subject: codeclib: more cf asm for the mdct, saves ~0.7MHz decoding vorbis on h300. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30552 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/lib/mdct.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) (limited to 'apps/codecs/lib/mdct.c') diff --git a/apps/codecs/lib/mdct.c b/apps/codecs/lib/mdct.c index 5524afbbc4..777aec4a55 100644 --- a/apps/codecs/lib/mdct.c +++ b/apps/codecs/lib/mdct.c @@ -41,7 +41,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) { int n8, n4, n2, n, j; const fixed32 *in1, *in2; - + (void)j; n = 1 << nbits; n2 = n >> 1; @@ -79,6 +79,62 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) const uint16_t * p_revtab=revtab; { const uint16_t * const p_revtab_end = p_revtab + n8; +#ifdef CPU_COLDFIRE + asm volatile ("move.l (%[in2]), %%d0\n\t" + "move.l (%[in1]), %%d1\n\t" + "bra.s 1f\n\t" + "0:\n\t" + "movem.l (%[T]), %%d2-%%d3\n\t" + + "addq.l #8, %[in1]\n\t" + "subq.l #8, %[in2]\n\t" + + "lea (%[step]*4, %[T]), %[T]\n\t" + + "mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;" + "msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;" + "mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;" + "mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;" + + "addq.l #8, %[in1]\n\t" + "subq.l #8, %[in2]\n\t" + + "mac.l %%d0, %%d5, %%acc2;" + "msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;" + "mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;" + "mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;" + + "clr.l %%d3\n\t" + "move.w %%d2, %%d3\n\t" + "eor.l %%d3, %%d2\n\t" + "swap %%d2\n\t" + "lsr.l %[revtab_shift], %%d2\n\t" + + "movclr.l %%acc0, %%d4;" + "movclr.l %%acc1, %%d5;" + "lsl.l #3, %%d2\n\t" + "lea (%%d2, %[z]), %%a1\n\t" + "movem.l %%d4-%%d5, (%%a1)\n\t" + + "lsr.l %[revtab_shift], %%d3\n\t" + + "movclr.l %%acc2, %%d4;" + "movclr.l %%acc3, %%d5;" + "lsl.l #3, %%d3\n\t" + "lea (%%d3, %[z]), %%a1\n\t" + "movem.l %%d4-%%d5, (%%a1)\n\t" + + "lea (%[step]*4, %[T]), %[T]\n\t" + + "1:\n\t" + "cmp.l %[p_revtab_end], %[p_revtab]\n\t" + "bcs.s 0b\n\t" + : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), + [p_revtab] "+a" (p_revtab) + : [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift), + [p_revtab_end] "r" (p_revtab_end) + : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); +#else while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; @@ -94,9 +150,66 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) in2 -= 2; p_revtab++; } +#endif } { const uint16_t * const p_revtab_end = p_revtab + n8; +#ifdef CPU_COLDFIRE + asm volatile ("move.l (%[in2]), %%d0\n\t" + "move.l (%[in1]), %%d1\n\t" + "bra.s 1f\n\t" + "0:\n\t" + "movem.l (%[T]), %%d2-%%d3\n\t" + + "addq.l #8, %[in1]\n\t" + "subq.l #8, %[in2]\n\t" + + "lea (%[step]*4, %[T]), %[T]\n\t" + + "mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;" + "msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;" + "mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;" + "mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;" + + "addq.l #8, %[in1]\n\t" + "subq.l #8, %[in2]\n\t" + + "mac.l %%d0, %%d4, %%acc2;" + "msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;" + "mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;" + "mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;" + + "clr.l %%d3\n\t" + "move.w %%d2, %%d3\n\t" + "eor.l %%d3, %%d2\n\t" + "swap %%d2\n\t" + "lsr.l %[revtab_shift], %%d2\n\t" + + "movclr.l %%acc0, %%d4;" + "movclr.l %%acc1, %%d5;" + "lsl.l #3, %%d2\n\t" + "lea (%%d2, %[z]), %%a1\n\t" + "movem.l %%d4-%%d5, (%%a1)\n\t" + + "lsr.l %[revtab_shift], %%d3\n\t" + + "movclr.l %%acc2, %%d4;" + "movclr.l %%acc3, %%d5;" + "lsl.l #3, %%d3\n\t" + "lea (%%d3, %[z]), %%a1\n\t" + "movem.l %%d4-%%d5, (%%a1)\n\t" + + "lea (%[step]*4, %[T]), %[T]\n\t" + + "1:\n\t" + "cmp.l %[p_revtab_end], %[p_revtab]\n\t" + "bcs.s 0b\n\t" + : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T), + [p_revtab] "+a" (p_revtab) + : [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift), + [p_revtab_end] "r" (p_revtab_end) + : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory"); +#else while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; @@ -112,6 +225,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) in2 -= 2; p_revtab++; } +#endif } -- cgit v1.2.3