diff options
Diffstat (limited to 'apps/codecs/libtremor/asm_mcf5249.h')
-rw-r--r-- | apps/codecs/libtremor/asm_mcf5249.h | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h index 66de07615f..38aa117d84 100644 --- a/apps/codecs/libtremor/asm_mcf5249.h +++ b/apps/codecs/libtremor/asm_mcf5249.h | |||
@@ -26,6 +26,59 @@ | |||
26 | #ifndef _V_WIDE_MATH | 26 | #ifndef _V_WIDE_MATH |
27 | #define _V_WIDE_MATH | 27 | #define _V_WIDE_MATH |
28 | 28 | ||
29 | #define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW | ||
30 | static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0, | ||
31 | const ogg_int32_t *src1, const ogg_int32_t *win, int len) | ||
32 | { | ||
33 | /* len is always a power of 2 and always >= 16 so this is unrolled 4 times*/ | ||
34 | ogg_int32_t *dst0 = dst, *dst1 = dst + 2*len; | ||
35 | const ogg_int32_t *win0 = win, *win1 = win + 2*len; | ||
36 | src1 += len; | ||
37 | asm volatile ("move.l (%[src0])+, %%d0\n\t" | ||
38 | "move.l -(%[win1]), %%d3\n\t" | ||
39 | "tst.l %[len]\n\t" | ||
40 | "bra.s 1f\n\t" | ||
41 | "0:\n\t" | ||
42 | "mac.l %%d0, %%d3, (%[win0])+, %%d2, %%acc0\n\t" | ||
43 | "mac.l %%d0, %%d2, -(%[src1]), %%d1, %%acc1\n\t" | ||
44 | "msac.l %%d1, %%d2, (%[src0])+, %%d0, %%acc0\n\t" | ||
45 | "mac.l %%d1, %%d3, -(%[win1]), %%d3, %%acc1\n\t" | ||
46 | "mac.l %%d0, %%d3, (%[win0])+, %%d2, %%acc2\n\t" | ||
47 | "mac.l %%d0, %%d2, -(%[src1]), %%d1, %%acc3\n\t" | ||
48 | "msac.l %%d1, %%d2, (%[src0])+, %%d0, %%acc2\n\t" | ||
49 | "mac.l %%d1, %%d3, -(%[win1]), %%d3, %%acc3\n\t" | ||
50 | "movclr.l %%acc0, %%d1\n\t" | ||
51 | "movclr.l %%acc2, %%d2\n\t" | ||
52 | "subq.l #8, %[dst1]\n\t" | ||
53 | "movclr.l %%acc1, %%d5\n\t" | ||
54 | "movclr.l %%acc3, %%d4\n\t" | ||
55 | "movem.l %%d4-%%d5, (%[dst1])\n\t" | ||
56 | "mac.l %%d0, %%d3, (%[win0])+, %%d5, %%acc0\n\t" | ||
57 | "mac.l %%d0, %%d5, -(%[src1]), %%d4, %%acc1\n\t" | ||
58 | "msac.l %%d4, %%d5, (%[src0])+, %%d0, %%acc0\n\t" | ||
59 | "mac.l %%d4, %%d3, -(%[win1]), %%d3, %%acc1\n\t" | ||
60 | "mac.l %%d0, %%d3, (%[win0])+, %%d5, %%acc2\n\t" | ||
61 | "mac.l %%d0, %%d5, -(%[src1]), %%d4, %%acc3\n\t" | ||
62 | "msac.l %%d4, %%d5, (%[src0])+, %%d0, %%acc2\n\t" /* will read one past end of src0 */ | ||
63 | "mac.l %%d4, %%d3, -(%[win1]), %%d3, %%acc3\n\t" /* will read one into win0 */ | ||
64 | "movclr.l %%acc0, %%d4\n\t" | ||
65 | "movclr.l %%acc2, %%d5\n\t" | ||
66 | "movem.l %%d1-%%d2/%%d4-%%d5, (%[dst0])\n\t" | ||
67 | "lea (16, %[dst0]), %[dst0]\n\t" | ||
68 | "subq.l #8, %[dst1]\n\t" | ||
69 | "movclr.l %%acc1, %%d2\n\t" | ||
70 | "movclr.l %%acc3, %%d1\n\t" | ||
71 | "movem.l %%d1-%%d2, (%[dst1])\n\t" | ||
72 | "subq.l #4, %[len]\n\t" | ||
73 | "1:\n\t" | ||
74 | "bgt.s 0b\n\t" | ||
75 | : [dst0] "+a" (dst0), [dst1] "+a" (dst1), | ||
76 | [src0] "+a" (src0), [src1] "+a" (src1), | ||
77 | [win0] "+a" (win0), [win1] "+a" (win1), | ||
78 | [len] "+d" (len) | ||
79 | :: "d0", "d1", "d2", "d3", "d4", "d5", "cc", "memory" ); | ||
80 | } | ||
81 | |||
29 | #define MB() | 82 | #define MB() |
30 | 83 | ||
31 | #endif | 84 | #endif |