diff options
author | Nils Wallménius <nils@rockbox.org> | 2010-07-26 11:15:25 +0000 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2010-07-26 11:15:25 +0000 |
commit | 025eed5c745513ff181762a172d2eda7b886c40d (patch) | |
tree | 79cf0e0eb23c33f25e84db1bd900a8a4b49bdd17 /apps/codecs/libwmapro | |
parent | 53b5abd93c424c6add261df52b07f4a413f1189b (diff) | |
download | rockbox-025eed5c745513ff181762a172d2eda7b886c40d.tar.gz rockbox-025eed5c745513ff181762a172d2eda7b886c40d.zip |
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwmapro')
-rw-r--r-- | apps/codecs/libwmapro/wmapro_math.h | 43 | ||||
-rw-r--r-- | apps/codecs/libwmapro/wmaprodec.c | 6 |
2 files changed, 41 insertions, 8 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 823c002c09..71cc3d33d7 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h | |||
@@ -19,10 +19,39 @@ static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt) | |||
19 | return (int32_t)temp; | 19 | return (int32_t)temp; |
20 | } | 20 | } |
21 | 21 | ||
22 | #ifdef CPU_COLDFIRE | ||
23 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | ||
24 | const int32_t *src1, const int32_t *win, | ||
25 | int len) | ||
26 | { | ||
27 | int i, j; | ||
28 | dst += len; | ||
29 | win += len; | ||
30 | src0+= len; | ||
31 | for(i=-len, j=len-1; i<0; i++, j--) { | ||
32 | int32_t s0 = src0[i]; | ||
33 | int32_t s1 = src1[j]; | ||
34 | int32_t wi = -win[i]; | ||
35 | int32_t wj = -win[j]; | ||
22 | 36 | ||
37 | asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t" | ||
38 | "msac.l %[s1], %[wi], %%acc0\n\t" | ||
39 | "mac.l %[s0], %[wi], %%acc1\n\t" | ||
40 | "mac.l %[s1], %[wj], %%acc1\n\t" | ||
41 | "movclr.l %%acc0, %[s0]\n\t" | ||
42 | "move.l %[s0], (%[dst_i])\n\t" | ||
43 | "movclr.l %%acc1, %[s0]\n\t" | ||
44 | "move.l %[s0], (%[dst_j])\n\t" | ||
45 | : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */ | ||
46 | : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), | ||
47 | [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) | ||
48 | : "cc", "memory"); | ||
49 | } | ||
50 | } | ||
51 | #else | ||
23 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | 52 | static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, |
24 | const int32_t *src1, const int32_t *win, | 53 | const int32_t *src1, const int32_t *win, |
25 | int32_t add_bias, int len) | 54 | int len) |
26 | { | 55 | { |
27 | int i, j; | 56 | int i, j; |
28 | dst += len; | 57 | dst += len; |
@@ -31,13 +60,13 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, | |||
31 | for(i=-len, j=len-1; i<0; i++, j--) { | 60 | for(i=-len, j=len-1; i<0; i++, j--) { |
32 | int32_t s0 = src0[i]; | 61 | int32_t s0 = src0[i]; |
33 | int32_t s1 = src1[j]; | 62 | int32_t s1 = src1[j]; |
34 | int32_t wi = win[i]; | 63 | int32_t wi = -win[i]; |
35 | int32_t wj = win[j]; | 64 | int32_t wj = -win[j]; |
36 | dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16); | 65 | dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31); |
37 | dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16); | 66 | dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31); |
38 | } | 67 | } |
39 | |||
40 | } | 68 | } |
69 | #endif | ||
41 | 70 | ||
42 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul, | 71 | static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul, |
43 | int len, int shift) | 72 | int len, int shift) |
diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c index 88becb7af6..b7879a2644 100644 --- a/apps/codecs/libwmapro/wmaprodec.c +++ b/apps/codecs/libwmapro/wmaprodec.c | |||
@@ -288,6 +288,10 @@ int decode_init(asf_waveformatex_t *wfx) | |||
288 | int log2_max_num_subframes; | 288 | int log2_max_num_subframes; |
289 | int num_possible_block_sizes; | 289 | int num_possible_block_sizes; |
290 | 290 | ||
291 | #if defined(CPU_COLDFIRE) | ||
292 | coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); | ||
293 | #endif | ||
294 | |||
291 | init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); | 295 | init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); |
292 | 296 | ||
293 | if (wfx->datalen >= 18) { | 297 | if (wfx->datalen >= 18) { |
@@ -1050,7 +1054,7 @@ static void wmapro_window(WMAProDecodeCtx *s) | |||
1050 | winlen >>= 1; | 1054 | winlen >>= 1; |
1051 | 1055 | ||
1052 | vector_fixmul_window(xstart, xstart, xstart + winlen, | 1056 | vector_fixmul_window(xstart, xstart, xstart + winlen, |
1053 | window, 0, winlen); | 1057 | window, winlen); |
1054 | 1058 | ||
1055 | s->channel[c].prev_block_len = s->subframe_len; | 1059 | s->channel[c].prev_block_len = s->subframe_len; |
1056 | 1060 | ||