diff options
Diffstat (limited to 'apps/codecs/Tremor/asm_mcf5249.h')
-rw-r--r-- | apps/codecs/Tremor/asm_mcf5249.h | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h new file mode 100644 index 0000000000..09c74671bc --- /dev/null +++ b/apps/codecs/Tremor/asm_mcf5249.h | |||
@@ -0,0 +1,257 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * | ||
9 | * Copyright (C) 2005 by Pedro Vasconcelos | ||
10 | * | ||
11 | * All files in this archive are subject to the GNU General Public License. | ||
12 | * See the file COPYING in the source tree root for full license agreement. | ||
13 | * | ||
14 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
15 | * KIND, either express or implied. | ||
16 | * | ||
17 | ****************************************************************************/ | ||
18 | /* asm routines for wide math on the MCF5249 */ | ||
19 | |||
20 | #include "os_types.h" | ||
21 | |||
22 | #if CONFIG_CPU == MCF5249 && !defined(SIMULATOR) | ||
23 | |||
24 | #ifndef _V_WIDE_MATH | ||
25 | #define _V_WIDE_MATH | ||
26 | |||
27 | //#define MB() asm volatile ("" : : : "memory") | ||
28 | #define MB() | ||
29 | |||
30 | static inline void mcf5249_init_mac(void) { | ||
31 | int r; | ||
32 | asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation | ||
33 | "movclr.l %%acc0, %[r];" // clear accumulators | ||
34 | "move.l %%acc0, %%acc1;" | ||
35 | "move.l %%acc0, %%acc2;" | ||
36 | "move.l %%acc0, %%acc3;" | ||
37 | : [r] "=r" (r)); | ||
38 | } | ||
39 | |||
40 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | ||
41 | ogg_int32_t r; | ||
42 | asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc | ||
43 | "movclr.l %%acc0, %[r];" // move & clear acc | ||
44 | "asr.l #1, %[r];" // no overflow test | ||
45 | : [r] "=d" (r) | ||
46 | : [x] "r" (x), [y] "r" (y) | ||
47 | : "cc"); | ||
48 | return r; | ||
49 | } | ||
50 | |||
51 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
52 | ogg_int32_t r; | ||
53 | asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply | ||
54 | "movclr.l %%acc0, %[r];" // move and clear | ||
55 | : [r] "=r" (r) | ||
56 | : [x] "r" (x), [y] "r" (y) | ||
57 | : "cc"); | ||
58 | return r; | ||
59 | } | ||
60 | |||
61 | |||
62 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
63 | ogg_int32_t r; | ||
64 | asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply | ||
65 | "movclr.l %%acc0, %[r];" // get higher half | ||
66 | "mulu.l %[y], %[x];" // get lower half | ||
67 | "asl.l #8, %[r];" // hi << 17 | ||
68 | "asl.l #8, %[r];" | ||
69 | "lsr.l #8, %[x];" // (unsigned)lo >> 15 | ||
70 | "lsr.l #7, %[x];" | ||
71 | "or.l %[x], %[r];" // or | ||
72 | : [r] "=&d" (r), [x] "+d" (x) | ||
73 | : [y] "d" (y) | ||
74 | : "cc"); | ||
75 | return r; | ||
76 | } | ||
77 | |||
78 | |||
79 | static inline | ||
80 | void XPROD31(ogg_int32_t a, ogg_int32_t b, | ||
81 | ogg_int32_t t, ogg_int32_t v, | ||
82 | ogg_int32_t *x, ogg_int32_t *y) | ||
83 | { | ||
84 | ogg_int32_t r; | ||
85 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
86 | "mac.l %[b], %[v], %%acc0;" | ||
87 | "mac.l %[b], %[t], %%acc1;" | ||
88 | "msac.l %[a], %[v], %%acc1;" | ||
89 | "movclr.l %%acc0, %[r];" | ||
90 | "move.l %[r], (%[x]);" | ||
91 | "movclr.l %%acc1, %[r];" | ||
92 | "move.l %[r], (%[y]);" | ||
93 | : [r] "=&r" (r) | ||
94 | : [x] "a" (x), [y] "a" (y), | ||
95 | [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
96 | : "cc", "memory"); | ||
97 | } | ||
98 | |||
99 | |||
100 | static inline | ||
101 | void XNPROD31(ogg_int32_t a, ogg_int32_t b, | ||
102 | ogg_int32_t t, ogg_int32_t v, | ||
103 | ogg_int32_t *x, ogg_int32_t *y) | ||
104 | { | ||
105 | ogg_int32_t r; | ||
106 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
107 | "msac.l %[b], %[v], %%acc0;" | ||
108 | "mac.l %[b], %[t], %%acc1;" | ||
109 | "mac.l %[a], %[v], %%acc1;" | ||
110 | "movclr.l %%acc0, %[r];" | ||
111 | "move.l %[r], (%[x]);" | ||
112 | "movclr.l %%acc1, %[r];" | ||
113 | "move.l %[r], (%[y]);" | ||
114 | : [r] "=&r" (r) | ||
115 | : [x] "a" (x), [y] "a" (y), | ||
116 | [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
117 | : "cc", "memory"); | ||
118 | } | ||
119 | |||
120 | |||
121 | /* no faster way of doing this using the MAC? */ | ||
122 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ | ||
123 | { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ | ||
124 | (_y)=MULT32(_b,_t)-MULT32(_a,_v); } | ||
125 | |||
126 | |||
127 | /* asm versions of vector multiplication for window.c */ | ||
128 | /* assumes MAC is initialized & accumulators cleared */ | ||
129 | static inline | ||
130 | void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
131 | { | ||
132 | asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start | ||
133 | "movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers | ||
134 | "lea.l (4*4, %[w]), %[w];" | ||
135 | "bra 1f;" // jump to loop condition | ||
136 | "0:" // loop body | ||
137 | // multiply and load next window values | ||
138 | "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" | ||
139 | "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" | ||
140 | "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" | ||
141 | "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" | ||
142 | "movclr.l %%acc0, %%d0;" // get the products | ||
143 | "movclr.l %%acc1, %%d1;" | ||
144 | "movclr.l %%acc2, %%d2;" | ||
145 | "movclr.l %%acc3, %%d3;" | ||
146 | // store and advance | ||
147 | "movem.l %%d0-%%d3, (%[d]);" | ||
148 | "lea.l (4*4, %[d]), %[d];" | ||
149 | "movem.l (%[d]), %%d0-%%d3;" | ||
150 | "subq.l #4, %[n];" // done 4 elements | ||
151 | "1: cmpi.l #4, %[n];" | ||
152 | "bge 0b;" | ||
153 | // multiply final elements | ||
154 | "tst.l %[n];" | ||
155 | "beq 1f;" // n=0 | ||
156 | "mac.l %%d0, %%a0, %%acc0;" | ||
157 | "movclr.l %%acc0, %%d0;" | ||
158 | "move.l %%d0, (%[d])+;" | ||
159 | "subq.l #1, %[n];" | ||
160 | "beq 1f;" // n=1 | ||
161 | "mac.l %%d1, %%a1, %%acc0;" | ||
162 | "movclr.l %%acc0, %%d1;" | ||
163 | "move.l %%d1, (%[d])+;" | ||
164 | "subq.l #1, %[n];" | ||
165 | "beq 1f;" // n=2 | ||
166 | // otherwise n = 3 | ||
167 | "mac.l %%d2, %%a2, %%acc0;" | ||
168 | "movclr.l %%acc0, %%d2;" | ||
169 | "move.l %%d2, (%[d])+;" | ||
170 | "1:" | ||
171 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
172 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
173 | "cc", "memory"); | ||
174 | } | ||
175 | |||
176 | static inline | ||
177 | void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
178 | { | ||
179 | asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start | ||
180 | "movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers | ||
181 | "movem.l (%[w]), %%a0-%%a3;" | ||
182 | "bra 1f;" // jump to loop condition | ||
183 | "0:" // loop body | ||
184 | // multiply and load next window value | ||
185 | "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" | ||
186 | "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" | ||
187 | "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" | ||
188 | "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" | ||
189 | "movclr.l %%acc0, %%d0;" // get the products | ||
190 | "movclr.l %%acc1, %%d1;" | ||
191 | "movclr.l %%acc2, %%d2;" | ||
192 | "movclr.l %%acc3, %%d3;" | ||
193 | // store and advance | ||
194 | "movem.l %%d0-%%d3, (%[d]);" | ||
195 | "lea.l (4*4, %[d]), %[d];" | ||
196 | "movem.l (%[d]), %%d0-%%d3;" | ||
197 | "subq.l #4, %[n];" // done 4 elements | ||
198 | "1: cmpi.l #4, %[n];" | ||
199 | "bge 0b;" | ||
200 | // multiply final elements | ||
201 | "tst.l %[n];" | ||
202 | "beq 1f;" // n=0 | ||
203 | "mac.l %%d0, %%a3, %%acc0;" | ||
204 | "movclr.l %%acc0, %%d0;" | ||
205 | "move.l %%d0, (%[d])+;" | ||
206 | "subq.l #1, %[n];" | ||
207 | "beq 1f;" // n=1 | ||
208 | "mac.l %%d1, %%a2, %%acc0;" | ||
209 | "movclr.l %%acc0, %%d1;" | ||
210 | "move.l %%d1, (%[d])+;" | ||
211 | "subq.l #1, %[n];" | ||
212 | "beq 1f;" // n=2 | ||
213 | // otherwise n = 3 | ||
214 | "mac.l %%d2, %%a1, %%acc0;" | ||
215 | "movclr.l %%acc0, %%d2;" | ||
216 | "move.l %%d2, (%[d])+;" | ||
217 | "1:" | ||
218 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
219 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
220 | "cc", "memory"); | ||
221 | } | ||
222 | |||
223 | |||
224 | static inline | ||
225 | void mcf5249_vect_zero(ogg_int32_t *ptr, int n) | ||
226 | { | ||
227 | asm volatile ("clr.l %%d0;" | ||
228 | "clr.l %%d1;" | ||
229 | "clr.l %%d2;" | ||
230 | "clr.l %%d3;" | ||
231 | // loop start | ||
232 | "tst.l %[n];" | ||
233 | "bra 1f;" | ||
234 | "0: movem.l %%d0-%%d3, (%[ptr]);" | ||
235 | "lea (4*4, %[ptr]), %[ptr];" | ||
236 | "subq.l #4, %[n];" | ||
237 | "1: bgt 0b;" | ||
238 | // remaing elements | ||
239 | "tst.l %[n];" | ||
240 | "beq 1f;" // n=0 | ||
241 | "clr.l (%[ptr])+;" | ||
242 | "subq.l #1, %[n];" | ||
243 | "beq 1f;" // n=1 | ||
244 | "clr.l (%[ptr])+;" | ||
245 | "subq.l #1, %[n];" | ||
246 | "beq 1f;" // n=2 | ||
247 | // otherwise n = 3 | ||
248 | "clr.l (%[ptr])+;" | ||
249 | "1:" | ||
250 | : [n] "+d" (n), [ptr] "+a" (ptr) | ||
251 | : | ||
252 | : "%d0","%d1","%d2","%d3","cc","memory"); | ||
253 | } | ||
254 | |||
255 | |||
256 | #endif | ||
257 | #endif | ||