summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-22 17:32:02 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-22 17:32:02 +0000
commitc2bd876e89e39fe0639cd60a1092841dd778c61f (patch)
tree004fcf5501ca78b2222da2a8c98a43d121078862
parent3afdedaef2d0eaaf84c7ac5d04adf22d330912e9 (diff)
downloadrockbox-c2bd876e89e39fe0639cd60a1092841dd778c61f.tar.gz
rockbox-c2bd876e89e39fe0639cd60a1092841dd778c61f.zip
libtremor: cf asm for ff_vector_fmul_window_c, gives a speedup of ~0.3MHz.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30582 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h53
1 files changed, 53 insertions, 0 deletions
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 66de07615f..38aa117d84 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -26,6 +26,59 @@
26#ifndef _V_WIDE_MATH 26#ifndef _V_WIDE_MATH
27#define _V_WIDE_MATH 27#define _V_WIDE_MATH
28 28
29#define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
30static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0,
31 const ogg_int32_t *src1, const ogg_int32_t *win, int len)
32{
33 /* len is always a power of 2 and always >= 16 so this is unrolled 4 times*/
34 ogg_int32_t *dst0 = dst, *dst1 = dst + 2*len;
35 const ogg_int32_t *win0 = win, *win1 = win + 2*len;
36 src1 += len;
37 asm volatile ("move.l (%[src0])+, %%d0\n\t"
38 "move.l -(%[win1]), %%d3\n\t"
39 "tst.l %[len]\n\t"
40 "bra.s 1f\n\t"
41 "0:\n\t"
42 "mac.l %%d0, %%d3, (%[win0])+, %%d2, %%acc0\n\t"
43 "mac.l %%d0, %%d2, -(%[src1]), %%d1, %%acc1\n\t"
44 "msac.l %%d1, %%d2, (%[src0])+, %%d0, %%acc0\n\t"
45 "mac.l %%d1, %%d3, -(%[win1]), %%d3, %%acc1\n\t"
46 "mac.l %%d0, %%d3, (%[win0])+, %%d2, %%acc2\n\t"
47 "mac.l %%d0, %%d2, -(%[src1]), %%d1, %%acc3\n\t"
48 "msac.l %%d1, %%d2, (%[src0])+, %%d0, %%acc2\n\t"
49 "mac.l %%d1, %%d3, -(%[win1]), %%d3, %%acc3\n\t"
50 "movclr.l %%acc0, %%d1\n\t"
51 "movclr.l %%acc2, %%d2\n\t"
52 "subq.l #8, %[dst1]\n\t"
53 "movclr.l %%acc1, %%d5\n\t"
54 "movclr.l %%acc3, %%d4\n\t"
55 "movem.l %%d4-%%d5, (%[dst1])\n\t"
56 "mac.l %%d0, %%d3, (%[win0])+, %%d5, %%acc0\n\t"
57 "mac.l %%d0, %%d5, -(%[src1]), %%d4, %%acc1\n\t"
58 "msac.l %%d4, %%d5, (%[src0])+, %%d0, %%acc0\n\t"
59 "mac.l %%d4, %%d3, -(%[win1]), %%d3, %%acc1\n\t"
60 "mac.l %%d0, %%d3, (%[win0])+, %%d5, %%acc2\n\t"
61 "mac.l %%d0, %%d5, -(%[src1]), %%d4, %%acc3\n\t"
62 "msac.l %%d4, %%d5, (%[src0])+, %%d0, %%acc2\n\t" /* will read one past end of src0 */
63 "mac.l %%d4, %%d3, -(%[win1]), %%d3, %%acc3\n\t" /* will read one into win0 */
64 "movclr.l %%acc0, %%d4\n\t"
65 "movclr.l %%acc2, %%d5\n\t"
66 "movem.l %%d1-%%d2/%%d4-%%d5, (%[dst0])\n\t"
67 "lea (16, %[dst0]), %[dst0]\n\t"
68 "subq.l #8, %[dst1]\n\t"
69 "movclr.l %%acc1, %%d2\n\t"
70 "movclr.l %%acc3, %%d1\n\t"
71 "movem.l %%d1-%%d2, (%[dst1])\n\t"
72 "subq.l #4, %[len]\n\t"
73 "1:\n\t"
74 "bgt.s 0b\n\t"
75 : [dst0] "+a" (dst0), [dst1] "+a" (dst1),
76 [src0] "+a" (src0), [src1] "+a" (src1),
77 [win0] "+a" (win0), [win1] "+a" (win1),
78 [len] "+d" (len)
79 :: "d0", "d1", "d2", "d3", "d4", "d5", "cc", "memory" );
80}
81
29#define MB() 82#define MB()
30 83
31#endif 84#endif