diff options
Diffstat (limited to 'apps/codecs/lib/asm_mcf5249.h')
-rw-r--r-- | apps/codecs/lib/asm_mcf5249.h | 353 |
1 files changed, 0 insertions, 353 deletions
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h deleted file mode 100644 index 841c413a94..0000000000 --- a/apps/codecs/lib/asm_mcf5249.h +++ /dev/null | |||
@@ -1,353 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * | ||
9 | * Copyright (C) 2005 by Pedro Vasconcelos | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public License | ||
13 | * as published by the Free Software Foundation; either version 2 | ||
14 | * of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
17 | * KIND, either express or implied. | ||
18 | * | ||
19 | ****************************************************************************/ | ||
20 | /* asm routines for wide math on the MCF5249 */ | ||
21 | |||
22 | #if defined(CPU_COLDFIRE) | ||
23 | |||
24 | #define INCL_OPTIMIZED_MULT32 | ||
25 | static inline int32_t MULT32(int32_t x, int32_t y) { | ||
26 | |||
27 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */ | ||
28 | "movclr.l %%acc0, %[x];" /* move & clear acc */ | ||
29 | "asr.l #1, %[x];" /* no overflow test */ | ||
30 | : [x] "+&d" (x) | ||
31 | : [y] "r" (y) | ||
32 | : "cc"); | ||
33 | return x; | ||
34 | } | ||
35 | |||
36 | #define INCL_OPTIMIZED_MULT31 | ||
37 | static inline int32_t MULT31(int32_t x, int32_t y) { | ||
38 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
39 | "movclr.l %%acc0, %[x];" /* move and clear */ | ||
40 | : [x] "+&r" (x) | ||
41 | : [y] "r" (y) | ||
42 | : "cc"); | ||
43 | return x; | ||
44 | } | ||
45 | |||
46 | #define INCL_OPTIMIZED_MULT31_SHIFT15 | ||
47 | /* NOTE: this requires that the emac is *NOT* rounding */ | ||
48 | static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { | ||
49 | int32_t r; | ||
50 | |||
51 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
52 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ | ||
53 | "movclr.l %%acc0, %[r];" /* get higher half */ | ||
54 | "swap %[r];" /* hi<<16, plus one free */ | ||
55 | "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ | ||
56 | "lsr.l #7, %[x];" | ||
57 | "move.w %[x], %[r];" /* logical-or results */ | ||
58 | : [r] "=&d" (r), [x] "+d" (x) | ||
59 | : [y] "d" (y) | ||
60 | : "cc"); | ||
61 | return r; | ||
62 | } | ||
63 | |||
64 | #define INCL_OPTIMIZED_MULT31_SHIFT16 | ||
65 | static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) { | ||
66 | int32_t r; | ||
67 | |||
68 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
69 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ | ||
70 | "movclr.l %%acc0, %[r];" /* get higher half */ | ||
71 | "lsr.l #1, %[r];" /* hi >> 1, to compensate emac shift */ | ||
72 | "move.w %[r], %[x];" /* x = x & 0xffff0000 | r & 0xffff */ | ||
73 | "swap %[x];" /* x = (unsigned)x << 16 | (unsigned)x >> 16 */ | ||
74 | : [r] "=&d" (r), [x] "+d" (x) | ||
75 | : [y] "d" (y) | ||
76 | : "cc"); | ||
77 | return x; | ||
78 | } | ||
79 | |||
80 | #define INCL_OPTIMIZED_XPROD31 | ||
81 | static inline | ||
82 | void XPROD31(int32_t a, int32_t b, | ||
83 | int32_t t, int32_t v, | ||
84 | int32_t *x, int32_t *y) | ||
85 | { | ||
86 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
87 | "mac.l %[b], %[v], %%acc0;" | ||
88 | "mac.l %[b], %[t], %%acc1;" | ||
89 | "msac.l %[a], %[v], %%acc1;" | ||
90 | "movclr.l %%acc0, %[a];" | ||
91 | "move.l %[a], (%[x]);" | ||
92 | "movclr.l %%acc1, %[a];" | ||
93 | "move.l %[a], (%[y]);" | ||
94 | : [a] "+&r" (a) | ||
95 | : [x] "a" (x), [y] "a" (y), | ||
96 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
97 | : "cc", "memory"); | ||
98 | } | ||
99 | |||
100 | #define INCL_OPTIMIZED_XNPROD31 | ||
101 | static inline | ||
102 | void XNPROD31(int32_t a, int32_t b, | ||
103 | int32_t t, int32_t v, | ||
104 | int32_t *x, int32_t *y) | ||
105 | { | ||
106 | asm volatile ("mac.l %[a], %[t], %%acc0;" | ||
107 | "msac.l %[b], %[v], %%acc0;" | ||
108 | "mac.l %[b], %[t], %%acc1;" | ||
109 | "mac.l %[a], %[v], %%acc1;" | ||
110 | "movclr.l %%acc0, %[a];" | ||
111 | "move.l %[a], (%[x]);" | ||
112 | "movclr.l %%acc1, %[a];" | ||
113 | "move.l %[a], (%[y]);" | ||
114 | : [a] "+&r" (a) | ||
115 | : [x] "a" (x), [y] "a" (y), | ||
116 | [b] "r" (b), [t] "r" (t), [v] "r" (v) | ||
117 | : "cc", "memory"); | ||
118 | } | ||
119 | |||
120 | |||
121 | /* this could lose the LSB by overflow, but i don't think it'll ever happen. | ||
122 | if anyone think they can hear a bug caused by this, please try the above | ||
123 | version. */ | ||
124 | #define INCL_OPTIMIZED_XPROD32 | ||
125 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ | ||
126 | asm volatile ("mac.l %[a], %[t], %%acc0;" \ | ||
127 | "mac.l %[b], %[v], %%acc0;" \ | ||
128 | "mac.l %[b], %[t], %%acc1;" \ | ||
129 | "msac.l %[a], %[v], %%acc1;" \ | ||
130 | "movclr.l %%acc0, %[x];" \ | ||
131 | "asr.l #1, %[x];" \ | ||
132 | "movclr.l %%acc1, %[y];" \ | ||
133 | "asr.l #1, %[y];" \ | ||
134 | : [x] "=d" (_x), [y] "=d" (_y) \ | ||
135 | : [a] "r" (_a), [b] "r" (_b), \ | ||
136 | [t] "r" (_t), [v] "r" (_v) \ | ||
137 | : "cc"); | ||
138 | |||
139 | #define INCL_OPTIMIZED_XPROD31_R | ||
140 | #define XPROD31_R(_a, _b, _t, _v, _x, _y) \ | ||
141 | asm volatile ("mac.l %[a], %[t], %%acc0;" \ | ||
142 | "mac.l %[b], %[v], %%acc0;" \ | ||
143 | "mac.l %[b], %[t], %%acc1;" \ | ||
144 | "msac.l %[a], %[v], %%acc1;" \ | ||
145 | "movclr.l %%acc0, %[x];" \ | ||
146 | "movclr.l %%acc1, %[y];" \ | ||
147 | : [x] "=r" (_x), [y] "=r" (_y) \ | ||
148 | : [a] "r" (_a), [b] "r" (_b), \ | ||
149 | [t] "r" (_t), [v] "r" (_v) \ | ||
150 | : "cc"); | ||
151 | |||
152 | #define INCL_OPTIMIZED_XNPROD31_R | ||
153 | #define XNPROD31_R(_a, _b, _t, _v, _x, _y) \ | ||
154 | asm volatile ("mac.l %[a], %[t], %%acc0;" \ | ||
155 | "msac.l %[b], %[v], %%acc0;" \ | ||
156 | "mac.l %[b], %[t], %%acc1;" \ | ||
157 | "mac.l %[a], %[v], %%acc1;" \ | ||
158 | "movclr.l %%acc0, %[x];" \ | ||
159 | "movclr.l %%acc1, %[y];" \ | ||
160 | : [x] "=r" (_x), [y] "=r" (_y) \ | ||
161 | : [a] "r" (_a), [b] "r" (_b), \ | ||
162 | [t] "r" (_t), [v] "r" (_v) \ | ||
163 | : "cc"); | ||
164 | |||
165 | #ifndef _V_VECT_OPS | ||
166 | #define _V_VECT_OPS | ||
167 | |||
168 | /* asm versions of vector operations for block.c, window.c */ | ||
169 | /* assumes MAC is initialized & accumulators cleared */ | ||
170 | static inline | ||
171 | void vect_add(int32_t *x, const int32_t *y, int n) | ||
172 | { | ||
173 | /* align to 16 bytes */ | ||
174 | while(n>0 && (int)x&15) { | ||
175 | *x++ += *y++; | ||
176 | n--; | ||
177 | } | ||
178 | asm volatile ("bra 1f;" | ||
179 | "0:" /* loop start */ | ||
180 | "movem.l (%[x]), %%d0-%%d3;" /* fetch values */ | ||
181 | "movem.l (%[y]), %%a0-%%a3;" | ||
182 | /* add */ | ||
183 | "add.l %%a0, %%d0;" | ||
184 | "add.l %%a1, %%d1;" | ||
185 | "add.l %%a2, %%d2;" | ||
186 | "add.l %%a3, %%d3;" | ||
187 | /* store and advance */ | ||
188 | "movem.l %%d0-%%d3, (%[x]);" | ||
189 | "lea.l (4*4, %[x]), %[x];" | ||
190 | "lea.l (4*4, %[y]), %[y];" | ||
191 | "subq.l #4, %[n];" /* done 4 elements */ | ||
192 | "1: cmpi.l #4, %[n];" | ||
193 | "bge 0b;" | ||
194 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
195 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
196 | "cc", "memory"); | ||
197 | /* add final elements */ | ||
198 | while (n>0) { | ||
199 | *x++ += *y++; | ||
200 | n--; | ||
201 | } | ||
202 | } | ||
203 | |||
204 | static inline | ||
205 | void vect_copy(int32_t *x, const int32_t *y, int n) | ||
206 | { | ||
207 | /* align to 16 bytes */ | ||
208 | while(n>0 && (int)x&15) { | ||
209 | *x++ = *y++; | ||
210 | n--; | ||
211 | } | ||
212 | asm volatile ("bra 1f;" | ||
213 | "0:" /* loop start */ | ||
214 | "movem.l (%[y]), %%d0-%%d3;" /* fetch values */ | ||
215 | "movem.l %%d0-%%d3, (%[x]);" /* store */ | ||
216 | "lea.l (4*4, %[x]), %[x];" /* advance */ | ||
217 | "lea.l (4*4, %[y]), %[y];" | ||
218 | "subq.l #4, %[n];" /* done 4 elements */ | ||
219 | "1: cmpi.l #4, %[n];" | ||
220 | "bge 0b;" | ||
221 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
222 | : : "%d0", "%d1", "%d2", "%d3", "cc", "memory"); | ||
223 | /* copy final elements */ | ||
224 | while (n>0) { | ||
225 | *x++ = *y++; | ||
226 | n--; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | static inline | ||
231 | void vect_mult_fw(int32_t *data, const int32_t *window, int n) | ||
232 | { | ||
233 | /* ensure data is aligned to 16-bytes */ | ||
234 | while(n>0 && (int)data&15) { | ||
235 | *data = MULT31(*data, *window); | ||
236 | data++; | ||
237 | window++; | ||
238 | n--; | ||
239 | } | ||
240 | asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */ | ||
241 | "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */ | ||
242 | "lea.l (4*4, %[w]), %[w];" | ||
243 | "bra 1f;" /* jump to loop condition */ | ||
244 | "0:" /* loop body */ | ||
245 | /* multiply and load next window values */ | ||
246 | "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" | ||
247 | "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" | ||
248 | "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" | ||
249 | "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" | ||
250 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
251 | "movclr.l %%acc1, %%d1;" | ||
252 | "movclr.l %%acc2, %%d2;" | ||
253 | "movclr.l %%acc3, %%d3;" | ||
254 | /* store and advance */ | ||
255 | "movem.l %%d0-%%d3, (%[d]);" | ||
256 | "lea.l (4*4, %[d]), %[d];" | ||
257 | "movem.l (%[d]), %%d0-%%d3;" | ||
258 | "subq.l #4, %[n];" /* done 4 elements */ | ||
259 | "1: cmpi.l #4, %[n];" | ||
260 | "bge 0b;" | ||
261 | /* multiply final elements */ | ||
262 | "tst.l %[n];" | ||
263 | "beq 1f;" /* n=0 */ | ||
264 | "mac.l %%d0, %%a0, %%acc0;" | ||
265 | "movclr.l %%acc0, %%d0;" | ||
266 | "move.l %%d0, (%[d])+;" | ||
267 | "subq.l #1, %[n];" | ||
268 | "beq 1f;" /* n=1 */ | ||
269 | "mac.l %%d1, %%a1, %%acc0;" | ||
270 | "movclr.l %%acc0, %%d1;" | ||
271 | "move.l %%d1, (%[d])+;" | ||
272 | "subq.l #1, %[n];" | ||
273 | "beq 1f;" /* n=2 */ | ||
274 | /* otherwise n = 3 */ | ||
275 | "mac.l %%d2, %%a2, %%acc0;" | ||
276 | "movclr.l %%acc0, %%d2;" | ||
277 | "move.l %%d2, (%[d])+;" | ||
278 | "1:" | ||
279 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
280 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
281 | "cc", "memory"); | ||
282 | } | ||
283 | |||
284 | static inline | ||
285 | void vect_mult_bw(int32_t *data, const int32_t *window, int n) | ||
286 | { | ||
287 | /* ensure at least data is aligned to 16-bytes */ | ||
288 | while(n>0 && (int)data&15) { | ||
289 | *data = MULT31(*data, *window); | ||
290 | data++; | ||
291 | window--; | ||
292 | n--; | ||
293 | } | ||
294 | asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */ | ||
295 | "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */ | ||
296 | "movem.l (%[w]), %%a0-%%a3;" | ||
297 | "bra 1f;" /* jump to loop condition */ | ||
298 | "0:" /* loop body */ | ||
299 | /* multiply and load next window value */ | ||
300 | "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" | ||
301 | "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" | ||
302 | "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" | ||
303 | "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" | ||
304 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
305 | "movclr.l %%acc1, %%d1;" | ||
306 | "movclr.l %%acc2, %%d2;" | ||
307 | "movclr.l %%acc3, %%d3;" | ||
308 | /* store and advance */ | ||
309 | "movem.l %%d0-%%d3, (%[d]);" | ||
310 | "lea.l (4*4, %[d]), %[d];" | ||
311 | "movem.l (%[d]), %%d0-%%d3;" | ||
312 | "subq.l #4, %[n];" /* done 4 elements */ | ||
313 | "1: cmpi.l #4, %[n];" | ||
314 | "bge 0b;" | ||
315 | /* multiply final elements */ | ||
316 | "tst.l %[n];" | ||
317 | "beq 1f;" /* n=0 */ | ||
318 | "mac.l %%d0, %%a3, %%acc0;" | ||
319 | "movclr.l %%acc0, %%d0;" | ||
320 | "move.l %%d0, (%[d])+;" | ||
321 | "subq.l #1, %[n];" | ||
322 | "beq 1f;" /* n=1 */ | ||
323 | "mac.l %%d1, %%a2, %%acc0;" | ||
324 | "movclr.l %%acc0, %%d1;" | ||
325 | "move.l %%d1, (%[d])+;" | ||
326 | "subq.l #1, %[n];" | ||
327 | "beq 1f;" /* n=2 */ | ||
328 | /* otherwise n = 3 */ | ||
329 | "mac.l %%d2, %%a1, %%acc0;" | ||
330 | "movclr.l %%acc0, %%d2;" | ||
331 | "move.l %%d2, (%[d])+;" | ||
332 | "1:" | ||
333 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
334 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
335 | "cc", "memory"); | ||
336 | } | ||
337 | |||
338 | #endif | ||
339 | |||
340 | /* not used anymore */ | ||
341 | /* | ||
342 | #ifndef _V_CLIP_MATH | ||
343 | #define _V_CLIP_MATH | ||
344 | |||
345 | * this is portable C and simple; why not use this as default? | ||
346 | static inline int32_t CLIP_TO_15(register int32_t x) { | ||
347 | register int32_t hi=32767, lo=-32768; | ||
348 | return (x>=hi ? hi : (x<=lo ? lo : x)); | ||
349 | } | ||
350 | |||
351 | #endif | ||
352 | */ | ||
353 | #endif | ||