summaryrefslogtreecommitdiff
path: root/apps/codecs/lib/asm_mcf5249.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/lib/asm_mcf5249.h')
-rw-r--r--apps/codecs/lib/asm_mcf5249.h353
1 files changed, 0 insertions, 353 deletions
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h
deleted file mode 100644
index 841c413a94..0000000000
--- a/apps/codecs/lib/asm_mcf5249.h
+++ /dev/null
@@ -1,353 +0,0 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2005 by Pedro Vasconcelos
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
15 *
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
18 *
19 ****************************************************************************/
20/* asm routines for wide math on the MCF5249 */
21
22#if defined(CPU_COLDFIRE)
23
24#define INCL_OPTIMIZED_MULT32
25static inline int32_t MULT32(int32_t x, int32_t y) {
26
27 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
28 "movclr.l %%acc0, %[x];" /* move & clear acc */
29 "asr.l #1, %[x];" /* no overflow test */
30 : [x] "+&d" (x)
31 : [y] "r" (y)
32 : "cc");
33 return x;
34}
35
36#define INCL_OPTIMIZED_MULT31
37static inline int32_t MULT31(int32_t x, int32_t y) {
38 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
39 "movclr.l %%acc0, %[x];" /* move and clear */
40 : [x] "+&r" (x)
41 : [y] "r" (y)
42 : "cc");
43 return x;
44}
45
46#define INCL_OPTIMIZED_MULT31_SHIFT15
47/* NOTE: this requires that the emac is *NOT* rounding */
48static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
49 int32_t r;
50
51 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
52 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
53 "movclr.l %%acc0, %[r];" /* get higher half */
54 "swap %[r];" /* hi<<16, plus one free */
55 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
56 "lsr.l #7, %[x];"
57 "move.w %[x], %[r];" /* logical-or results */
58 : [r] "=&d" (r), [x] "+d" (x)
59 : [y] "d" (y)
60 : "cc");
61 return r;
62}
63
64#define INCL_OPTIMIZED_MULT31_SHIFT16
65static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
66 int32_t r;
67
68 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
69 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
70 "movclr.l %%acc0, %[r];" /* get higher half */
71 "lsr.l #1, %[r];" /* hi >> 1, to compensate emac shift */
72 "move.w %[r], %[x];" /* x = x & 0xffff0000 | r & 0xffff */
73 "swap %[x];" /* x = (unsigned)x << 16 | (unsigned)x >> 16 */
74 : [r] "=&d" (r), [x] "+d" (x)
75 : [y] "d" (y)
76 : "cc");
77 return x;
78}
79
80#define INCL_OPTIMIZED_XPROD31
81static inline
82void XPROD31(int32_t a, int32_t b,
83 int32_t t, int32_t v,
84 int32_t *x, int32_t *y)
85{
86 asm volatile ("mac.l %[a], %[t], %%acc0;"
87 "mac.l %[b], %[v], %%acc0;"
88 "mac.l %[b], %[t], %%acc1;"
89 "msac.l %[a], %[v], %%acc1;"
90 "movclr.l %%acc0, %[a];"
91 "move.l %[a], (%[x]);"
92 "movclr.l %%acc1, %[a];"
93 "move.l %[a], (%[y]);"
94 : [a] "+&r" (a)
95 : [x] "a" (x), [y] "a" (y),
96 [b] "r" (b), [t] "r" (t), [v] "r" (v)
97 : "cc", "memory");
98}
99
100#define INCL_OPTIMIZED_XNPROD31
101static inline
102void XNPROD31(int32_t a, int32_t b,
103 int32_t t, int32_t v,
104 int32_t *x, int32_t *y)
105{
106 asm volatile ("mac.l %[a], %[t], %%acc0;"
107 "msac.l %[b], %[v], %%acc0;"
108 "mac.l %[b], %[t], %%acc1;"
109 "mac.l %[a], %[v], %%acc1;"
110 "movclr.l %%acc0, %[a];"
111 "move.l %[a], (%[x]);"
112 "movclr.l %%acc1, %[a];"
113 "move.l %[a], (%[y]);"
114 : [a] "+&r" (a)
115 : [x] "a" (x), [y] "a" (y),
116 [b] "r" (b), [t] "r" (t), [v] "r" (v)
117 : "cc", "memory");
118}
119
120
121/* this could lose the LSB by overflow, but i don't think it'll ever happen.
122 if anyone think they can hear a bug caused by this, please try the above
123 version. */
124#define INCL_OPTIMIZED_XPROD32
125#define XPROD32(_a, _b, _t, _v, _x, _y) \
126 asm volatile ("mac.l %[a], %[t], %%acc0;" \
127 "mac.l %[b], %[v], %%acc0;" \
128 "mac.l %[b], %[t], %%acc1;" \
129 "msac.l %[a], %[v], %%acc1;" \
130 "movclr.l %%acc0, %[x];" \
131 "asr.l #1, %[x];" \
132 "movclr.l %%acc1, %[y];" \
133 "asr.l #1, %[y];" \
134 : [x] "=d" (_x), [y] "=d" (_y) \
135 : [a] "r" (_a), [b] "r" (_b), \
136 [t] "r" (_t), [v] "r" (_v) \
137 : "cc");
138
139#define INCL_OPTIMIZED_XPROD31_R
140#define XPROD31_R(_a, _b, _t, _v, _x, _y) \
141 asm volatile ("mac.l %[a], %[t], %%acc0;" \
142 "mac.l %[b], %[v], %%acc0;" \
143 "mac.l %[b], %[t], %%acc1;" \
144 "msac.l %[a], %[v], %%acc1;" \
145 "movclr.l %%acc0, %[x];" \
146 "movclr.l %%acc1, %[y];" \
147 : [x] "=r" (_x), [y] "=r" (_y) \
148 : [a] "r" (_a), [b] "r" (_b), \
149 [t] "r" (_t), [v] "r" (_v) \
150 : "cc");
151
152#define INCL_OPTIMIZED_XNPROD31_R
153#define XNPROD31_R(_a, _b, _t, _v, _x, _y) \
154 asm volatile ("mac.l %[a], %[t], %%acc0;" \
155 "msac.l %[b], %[v], %%acc0;" \
156 "mac.l %[b], %[t], %%acc1;" \
157 "mac.l %[a], %[v], %%acc1;" \
158 "movclr.l %%acc0, %[x];" \
159 "movclr.l %%acc1, %[y];" \
160 : [x] "=r" (_x), [y] "=r" (_y) \
161 : [a] "r" (_a), [b] "r" (_b), \
162 [t] "r" (_t), [v] "r" (_v) \
163 : "cc");
164
165#ifndef _V_VECT_OPS
166#define _V_VECT_OPS
167
168/* asm versions of vector operations for block.c, window.c */
169/* assumes MAC is initialized & accumulators cleared */
170static inline
171void vect_add(int32_t *x, const int32_t *y, int n)
172{
173 /* align to 16 bytes */
174 while(n>0 && (int)x&15) {
175 *x++ += *y++;
176 n--;
177 }
178 asm volatile ("bra 1f;"
179 "0:" /* loop start */
180 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
181 "movem.l (%[y]), %%a0-%%a3;"
182 /* add */
183 "add.l %%a0, %%d0;"
184 "add.l %%a1, %%d1;"
185 "add.l %%a2, %%d2;"
186 "add.l %%a3, %%d3;"
187 /* store and advance */
188 "movem.l %%d0-%%d3, (%[x]);"
189 "lea.l (4*4, %[x]), %[x];"
190 "lea.l (4*4, %[y]), %[y];"
191 "subq.l #4, %[n];" /* done 4 elements */
192 "1: cmpi.l #4, %[n];"
193 "bge 0b;"
194 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
195 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
196 "cc", "memory");
197 /* add final elements */
198 while (n>0) {
199 *x++ += *y++;
200 n--;
201 }
202}
203
204static inline
205void vect_copy(int32_t *x, const int32_t *y, int n)
206{
207 /* align to 16 bytes */
208 while(n>0 && (int)x&15) {
209 *x++ = *y++;
210 n--;
211 }
212 asm volatile ("bra 1f;"
213 "0:" /* loop start */
214 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
215 "movem.l %%d0-%%d3, (%[x]);" /* store */
216 "lea.l (4*4, %[x]), %[x];" /* advance */
217 "lea.l (4*4, %[y]), %[y];"
218 "subq.l #4, %[n];" /* done 4 elements */
219 "1: cmpi.l #4, %[n];"
220 "bge 0b;"
221 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
222 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
223 /* copy final elements */
224 while (n>0) {
225 *x++ = *y++;
226 n--;
227 }
228}
229
230static inline
231void vect_mult_fw(int32_t *data, const int32_t *window, int n)
232{
233 /* ensure data is aligned to 16-bytes */
234 while(n>0 && (int)data&15) {
235 *data = MULT31(*data, *window);
236 data++;
237 window++;
238 n--;
239 }
240 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
241 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
242 "lea.l (4*4, %[w]), %[w];"
243 "bra 1f;" /* jump to loop condition */
244 "0:" /* loop body */
245 /* multiply and load next window values */
246 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
247 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
248 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
249 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
250 "movclr.l %%acc0, %%d0;" /* get the products */
251 "movclr.l %%acc1, %%d1;"
252 "movclr.l %%acc2, %%d2;"
253 "movclr.l %%acc3, %%d3;"
254 /* store and advance */
255 "movem.l %%d0-%%d3, (%[d]);"
256 "lea.l (4*4, %[d]), %[d];"
257 "movem.l (%[d]), %%d0-%%d3;"
258 "subq.l #4, %[n];" /* done 4 elements */
259 "1: cmpi.l #4, %[n];"
260 "bge 0b;"
261 /* multiply final elements */
262 "tst.l %[n];"
263 "beq 1f;" /* n=0 */
264 "mac.l %%d0, %%a0, %%acc0;"
265 "movclr.l %%acc0, %%d0;"
266 "move.l %%d0, (%[d])+;"
267 "subq.l #1, %[n];"
268 "beq 1f;" /* n=1 */
269 "mac.l %%d1, %%a1, %%acc0;"
270 "movclr.l %%acc0, %%d1;"
271 "move.l %%d1, (%[d])+;"
272 "subq.l #1, %[n];"
273 "beq 1f;" /* n=2 */
274 /* otherwise n = 3 */
275 "mac.l %%d2, %%a2, %%acc0;"
276 "movclr.l %%acc0, %%d2;"
277 "move.l %%d2, (%[d])+;"
278 "1:"
279 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
280 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
281 "cc", "memory");
282}
283
284static inline
285void vect_mult_bw(int32_t *data, const int32_t *window, int n)
286{
287 /* ensure at least data is aligned to 16-bytes */
288 while(n>0 && (int)data&15) {
289 *data = MULT31(*data, *window);
290 data++;
291 window--;
292 n--;
293 }
294 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
295 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
296 "movem.l (%[w]), %%a0-%%a3;"
297 "bra 1f;" /* jump to loop condition */
298 "0:" /* loop body */
299 /* multiply and load next window value */
300 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
301 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
302 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
303 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
304 "movclr.l %%acc0, %%d0;" /* get the products */
305 "movclr.l %%acc1, %%d1;"
306 "movclr.l %%acc2, %%d2;"
307 "movclr.l %%acc3, %%d3;"
308 /* store and advance */
309 "movem.l %%d0-%%d3, (%[d]);"
310 "lea.l (4*4, %[d]), %[d];"
311 "movem.l (%[d]), %%d0-%%d3;"
312 "subq.l #4, %[n];" /* done 4 elements */
313 "1: cmpi.l #4, %[n];"
314 "bge 0b;"
315 /* multiply final elements */
316 "tst.l %[n];"
317 "beq 1f;" /* n=0 */
318 "mac.l %%d0, %%a3, %%acc0;"
319 "movclr.l %%acc0, %%d0;"
320 "move.l %%d0, (%[d])+;"
321 "subq.l #1, %[n];"
322 "beq 1f;" /* n=1 */
323 "mac.l %%d1, %%a2, %%acc0;"
324 "movclr.l %%acc0, %%d1;"
325 "move.l %%d1, (%[d])+;"
326 "subq.l #1, %[n];"
327 "beq 1f;" /* n=2 */
328 /* otherwise n = 3 */
329 "mac.l %%d2, %%a1, %%acc0;"
330 "movclr.l %%acc0, %%d2;"
331 "move.l %%d2, (%[d])+;"
332 "1:"
333 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
334 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
335 "cc", "memory");
336}
337
338#endif
339
340/* not used anymore */
341/*
342#ifndef _V_CLIP_MATH
343#define _V_CLIP_MATH
344
345* this is portable C and simple; why not use this as default?
346static inline int32_t CLIP_TO_15(register int32_t x) {
347 register int32_t hi=32767, lo=-32768;
348 return (x>=hi ? hi : (x<=lo ? lo : x));
349}
350
351#endif
352*/
353#endif