diff options
Diffstat (limited to 'apps/codecs/libwma/asm_mcf5249.h')
-rw-r--r-- | apps/codecs/libwma/asm_mcf5249.h | 327 |
1 files changed, 0 insertions, 327 deletions
diff --git a/apps/codecs/libwma/asm_mcf5249.h b/apps/codecs/libwma/asm_mcf5249.h deleted file mode 100644 index 6b5c600685..0000000000 --- a/apps/codecs/libwma/asm_mcf5249.h +++ /dev/null | |||
@@ -1,327 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * | ||
9 | * Copyright (C) 2005 by Pedro Vasconcelos | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version 2 | ||
14 | * of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
17 | * KIND, either express or implied. | ||
18 | * | ||
19 | ****************************************************************************/ | ||
20 | /* asm routines for wide math on the MCF5249 */ | ||
21 | |||
22 | //#include "os_types.h" | ||
23 | |||
24 | #if defined(CPU_COLDFIRE) | ||
25 | |||
26 | /* attribute for 16-byte alignment */ | ||
27 | #define LINE_ATTR __attribute__ ((aligned (16))) | ||
28 | |||
29 | #ifndef _V_WIDE_MATH | ||
30 | #define _V_WIDE_MATH | ||
31 | |||
32 | #define MB() | ||
33 | |||
34 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | ||
35 | |||
36 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */ | ||
37 | "movclr.l %%acc0, %[x];" /* move & clear acc */ | ||
38 | "asr.l #1, %[x];" /* no overflow test */ | ||
39 | : [x] "+&d" (x) | ||
40 | : [y] "r" (y) | ||
41 | : "cc"); | ||
42 | return x; | ||
43 | } | ||
44 | |||
45 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
46 | |||
47 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
48 | "movclr.l %%acc0, %[x];" /* move and clear */ | ||
49 | : [x] "+&r" (x) | ||
50 | : [y] "r" (y) | ||
51 | : "cc"); | ||
52 | return x; | ||
53 | } | ||
54 | |||
55 | |||
56 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
57 | ogg_int32_t r; | ||
58 | |||
59 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
60 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ | ||
61 | "movclr.l %%acc0, %[r];" /* get higher half */ | ||
62 | "asl.l #8, %[r];" /* hi<<16, plus one free */ | ||
63 | "asl.l #8, %[r];" | ||
64 | "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ | ||
65 | "lsr.l #7, %[x];" | ||
66 | "or.l %[x], %[r];" /* logical-or results */ | ||
67 | : [r] "=&d" (r), [x] "+d" (x) | ||
68 | : [y] "d" (y) | ||
69 | : "cc"); | ||
70 | return r; | ||
71 | } | ||
72 | |||
73 | |||
74 | static inline | ||
75 | void XPROD31(ogg_int32_t a, ogg_int32_t b, | ||
76 | ogg_int32_t t, ogg_int32_t v, | ||
77 | ogg_int32_t *x, ogg_int32_t *y) | ||
78 | { | ||
79 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
80 | "mac.l %[b], %[v], %%acc0;" | ||
81 | "mac.l %[b], %[t], %%acc1;" | ||
82 | "msac.l %[a], %[v], %%acc1;" | ||
83 | "movclr.l %%acc0, %[a];" | ||
84 | "move.l %[a], (%[x]);" | ||
85 | "movclr.l %%acc1, %[a];" | ||
86 | "move.l %[a], (%[y]);" | ||
87 | : [a] "+&r" (a) | ||
88 | : [x] "a" (x), [y] "a" (y), | ||
89 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
90 | : "cc", "memory"); | ||
91 | } | ||
92 | |||
93 | |||
94 | static inline | ||
95 | void XNPROD31(ogg_int32_t a, ogg_int32_t b, | ||
96 | ogg_int32_t t, ogg_int32_t v, | ||
97 | ogg_int32_t *x, ogg_int32_t *y) | ||
98 | { | ||
99 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
100 | "msac.l %[b], %[v], %%acc0;" | ||
101 | "mac.l %[b], %[t], %%acc1;" | ||
102 | "mac.l %[a], %[v], %%acc1;" | ||
103 | "movclr.l %%acc0, %[a];" | ||
104 | "move.l %[a], (%[x]);" | ||
105 | "movclr.l %%acc1, %[a];" | ||
106 | "move.l %[a], (%[y]);" | ||
107 | : [a] "+&r" (a) | ||
108 | : [x] "a" (x), [y] "a" (y), | ||
109 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
110 | : "cc", "memory"); | ||
111 | } | ||
112 | |||
113 | |||
114 | #if 0 /* canonical Tremor definition */ | ||
115 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ | ||
116 | { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ | ||
117 | (_y)=MULT32(_b,_t)-MULT32(_a,_v); } | ||
118 | #endif | ||
119 | |||
120 | /* this could lose the LSB by overflow, but i don't think it'll ever happen. | ||
121 | if anyone think they can hear a bug caused by this, please try the above | ||
122 | version. */ | ||
123 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ | ||
124 | asm volatile ("mac.l %[a], %[t], %%acc0;" \ | ||
125 | "mac.l %[b], %[v], %%acc0;" \ | ||
126 | "mac.l %[b], %[t], %%acc1;" \ | ||
127 | "msac.l %[a], %[v], %%acc1;" \ | ||
128 | "movclr.l %%acc0, %[x];" \ | ||
129 | "asr.l #1, %[x];" \ | ||
130 | "movclr.l %%acc1, %[y];" \ | ||
131 | "asr.l #1, %[y];" \ | ||
132 | : [x] "=&d" (_x), [y] "=&d" (_y) \ | ||
133 | : [a] "r" (_a), [b] "r" (_b), \ | ||
134 | [t] "r" (_t), [v] "r" (_v) \ | ||
135 | : "cc"); | ||
136 | |||
137 | #ifndef _V_VECT_OPS | ||
138 | #define _V_VECT_OPS | ||
139 | |||
140 | /* asm versions of vector operations for block.c, window.c */ | ||
141 | /* assumes MAC is initialized & accumulators cleared */ | ||
142 | static inline | ||
143 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
144 | { | ||
145 | /* align to 16 bytes */ | ||
146 | while(n>0 && (int)x&16) { | ||
147 | *x++ += *y++; | ||
148 | n--; | ||
149 | } | ||
150 | asm volatile ("bra 1f;" | ||
151 | "0:" /* loop start */ | ||
152 | "movem.l (%[x]), %%d0-%%d3;" /* fetch values */ | ||
153 | "movem.l (%[y]), %%a0-%%a3;" | ||
154 | /* add */ | ||
155 | "add.l %%a0, %%d0;" | ||
156 | "add.l %%a1, %%d1;" | ||
157 | "add.l %%a2, %%d2;" | ||
158 | "add.l %%a3, %%d3;" | ||
159 | /* store and advance */ | ||
160 | "movem.l %%d0-%%d3, (%[x]);" | ||
161 | "lea.l (4*4, %[x]), %[x];" | ||
162 | "lea.l (4*4, %[y]), %[y];" | ||
163 | "subq.l #4, %[n];" /* done 4 elements */ | ||
164 | "1: cmpi.l #4, %[n];" | ||
165 | "bge 0b;" | ||
166 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
167 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
168 | "cc", "memory"); | ||
169 | /* add final elements */ | ||
170 | while (n>0) { | ||
171 | *x++ += *y++; | ||
172 | n--; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static inline | ||
177 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
178 | { | ||
179 | /* align to 16 bytes */ | ||
180 | while(n>0 && (int)x&16) { | ||
181 | *x++ = *y++; | ||
182 | n--; | ||
183 | } | ||
184 | asm volatile ("bra 1f;" | ||
185 | "0:" /* loop start */ | ||
186 | "movem.l (%[y]), %%d0-%%d3;" /* fetch values */ | ||
187 | "movem.l %%d0-%%d3, (%[x]);" /* store */ | ||
188 | "lea.l (4*4, %[x]), %[x];" /* advance */ | ||
189 | "lea.l (4*4, %[y]), %[y];" | ||
190 | "subq.l #4, %[n];" /* done 4 elements */ | ||
191 | "1: cmpi.l #4, %[n];" | ||
192 | "bge 0b;" | ||
193 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
194 | : : "%d0", "%d1", "%d2", "%d3", "cc", "memory"); | ||
195 | /* copy final elements */ | ||
196 | while (n>0) { | ||
197 | *x++ = *y++; | ||
198 | n--; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | |||
203 | static inline | ||
204 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
205 | { | ||
206 | /* ensure data is aligned to 16-bytes */ | ||
207 | while(n>0 && (int)data%16) { | ||
208 | *data = MULT31(*data, *window); | ||
209 | data++; | ||
210 | window++; | ||
211 | n--; | ||
212 | } | ||
213 | asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */ | ||
214 | "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */ | ||
215 | "lea.l (4*4, %[w]), %[w];" | ||
216 | "bra 1f;" /* jump to loop condition */ | ||
217 | "0:" /* loop body */ | ||
218 | /* multiply and load next window values */ | ||
219 | "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" | ||
220 | "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" | ||
221 | "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" | ||
222 | "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" | ||
223 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
224 | "movclr.l %%acc1, %%d1;" | ||
225 | "movclr.l %%acc2, %%d2;" | ||
226 | "movclr.l %%acc3, %%d3;" | ||
227 | /* store and advance */ | ||
228 | "movem.l %%d0-%%d3, (%[d]);" | ||
229 | "lea.l (4*4, %[d]), %[d];" | ||
230 | "movem.l (%[d]), %%d0-%%d3;" | ||
231 | "subq.l #4, %[n];" /* done 4 elements */ | ||
232 | "1: cmpi.l #4, %[n];" | ||
233 | "bge 0b;" | ||
234 | /* multiply final elements */ | ||
235 | "tst.l %[n];" | ||
236 | "beq 1f;" /* n=0 */ | ||
237 | "mac.l %%d0, %%a0, %%acc0;" | ||
238 | "movclr.l %%acc0, %%d0;" | ||
239 | "move.l %%d0, (%[d])+;" | ||
240 | "subq.l #1, %[n];" | ||
241 | "beq 1f;" /* n=1 */ | ||
242 | "mac.l %%d1, %%a1, %%acc0;" | ||
243 | "movclr.l %%acc0, %%d1;" | ||
244 | "move.l %%d1, (%[d])+;" | ||
245 | "subq.l #1, %[n];" | ||
246 | "beq 1f;" /* n=2 */ | ||
247 | /* otherwise n = 3 */ | ||
248 | "mac.l %%d2, %%a2, %%acc0;" | ||
249 | "movclr.l %%acc0, %%d2;" | ||
250 | "move.l %%d2, (%[d])+;" | ||
251 | "1:" | ||
252 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
253 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
254 | "cc", "memory"); | ||
255 | } | ||
256 | |||
257 | static inline | ||
258 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
259 | { | ||
260 | /* ensure at least data is aligned to 16-bytes */ | ||
261 | while(n>0 && (int)data%16) { | ||
262 | *data = MULT31(*data, *window); | ||
263 | data++; | ||
264 | window--; | ||
265 | n--; | ||
266 | } | ||
267 | asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */ | ||
268 | "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */ | ||
269 | "movem.l (%[w]), %%a0-%%a3;" | ||
270 | "bra 1f;" /* jump to loop condition */ | ||
271 | "0:" /* loop body */ | ||
272 | /* multiply and load next window value */ | ||
273 | "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" | ||
274 | "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" | ||
275 | "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" | ||
276 | "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" | ||
277 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
278 | "movclr.l %%acc1, %%d1;" | ||
279 | "movclr.l %%acc2, %%d2;" | ||
280 | "movclr.l %%acc3, %%d3;" | ||
281 | /* store and advance */ | ||
282 | "movem.l %%d0-%%d3, (%[d]);" | ||
283 | "lea.l (4*4, %[d]), %[d];" | ||
284 | "movem.l (%[d]), %%d0-%%d3;" | ||
285 | "subq.l #4, %[n];" /* done 4 elements */ | ||
286 | "1: cmpi.l #4, %[n];" | ||
287 | "bge 0b;" | ||
288 | /* multiply final elements */ | ||
289 | "tst.l %[n];" | ||
290 | "beq 1f;" /* n=0 */ | ||
291 | "mac.l %%d0, %%a3, %%acc0;" | ||
292 | "movclr.l %%acc0, %%d0;" | ||
293 | "move.l %%d0, (%[d])+;" | ||
294 | "subq.l #1, %[n];" | ||
295 | "beq 1f;" /* n=1 */ | ||
296 | "mac.l %%d1, %%a2, %%acc0;" | ||
297 | "movclr.l %%acc0, %%d1;" | ||
298 | "move.l %%d1, (%[d])+;" | ||
299 | "subq.l #1, %[n];" | ||
300 | "beq 1f;" /* n=2 */ | ||
301 | /* otherwise n = 3 */ | ||
302 | "mac.l %%d2, %%a1, %%acc0;" | ||
303 | "movclr.l %%acc0, %%d2;" | ||
304 | "move.l %%d2, (%[d])+;" | ||
305 | "1:" | ||
306 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
307 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
308 | "cc", "memory"); | ||
309 | } | ||
310 | |||
311 | #endif | ||
312 | |||
313 | #endif | ||
314 | |||
315 | #ifndef _V_CLIP_MATH | ||
316 | #define _V_CLIP_MATH | ||
317 | |||
318 | /* this is portable C and simple; why not use this as default? */ | ||
319 | static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) { | ||
320 | register ogg_int32_t hi=32767, lo=-32768; | ||
321 | return (x>=hi ? hi : (x<=lo ? lo : x)); | ||
322 | } | ||
323 | |||
324 | #endif | ||
325 | #else | ||
326 | #define LINE_ATTR | ||
327 | #endif | ||