summaryrefslogtreecommitdiff
path: root/apps/codecs/libtremor/asm_mcf5249.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libtremor/asm_mcf5249.h')
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h327
1 files changed, 327 insertions, 0 deletions
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
new file mode 100644
index 0000000000..64dfb1b785
--- /dev/null
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -0,0 +1,327 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2005 by Pedro Vasconcelos
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
15 *
16 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
17 * KIND, either express or implied.
18 *
19 ****************************************************************************/
20/* asm routines for wide math on the MCF5249 */
21
22#include "os_types.h"
23
24#if defined(CPU_COLDFIRE)
25
26/* attribute for 16-byte alignment */
27#define LINE_ATTR __attribute__ ((aligned (16)))
28
29#ifndef _V_WIDE_MATH
30#define _V_WIDE_MATH
31
32#define MB()
33
34static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
35
36 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
37 "movclr.l %%acc0, %[x];" /* move & clear acc */
38 "asr.l #1, %[x];" /* no overflow test */
39 : [x] "+&d" (x)
40 : [y] "r" (y)
41 : "cc");
42 return x;
43}
44
45static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
46
47 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
48 "movclr.l %%acc0, %[x];" /* move and clear */
49 : [x] "+&r" (x)
50 : [y] "r" (y)
51 : "cc");
52 return x;
53}
54
55
56static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57 ogg_int32_t r;
58
59 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
60 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
61 "movclr.l %%acc0, %[r];" /* get higher half */
62 "asl.l #8, %[r];" /* hi<<16, plus one free */
63 "asl.l #8, %[r];"
64 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
65 "lsr.l #7, %[x];"
66 "or.l %[x], %[r];" /* logical-or results */
67 : [r] "=&d" (r), [x] "+d" (x)
68 : [y] "d" (y)
69 : "cc");
70 return r;
71}
72
73
74static inline
75void XPROD31(ogg_int32_t a, ogg_int32_t b,
76 ogg_int32_t t, ogg_int32_t v,
77 ogg_int32_t *x, ogg_int32_t *y)
78{
79 asm volatile ("mac.l %[a], %[t], %%acc0;"
80 "mac.l %[b], %[v], %%acc0;"
81 "mac.l %[b], %[t], %%acc1;"
82 "msac.l %[a], %[v], %%acc1;"
83 "movclr.l %%acc0, %[a];"
84 "move.l %[a], (%[x]);"
85 "movclr.l %%acc1, %[a];"
86 "move.l %[a], (%[y]);"
87 : [a] "+&r" (a)
88 : [x] "a" (x), [y] "a" (y),
89 [b] "r" (b), [t] "r" (t), [v] "r" (v)
90 : "cc", "memory");
91}
92
93
94static inline
95void XNPROD31(ogg_int32_t a, ogg_int32_t b,
96 ogg_int32_t t, ogg_int32_t v,
97 ogg_int32_t *x, ogg_int32_t *y)
98{
99 asm volatile ("mac.l %[a], %[t], %%acc0;"
100 "msac.l %[b], %[v], %%acc0;"
101 "mac.l %[b], %[t], %%acc1;"
102 "mac.l %[a], %[v], %%acc1;"
103 "movclr.l %%acc0, %[a];"
104 "move.l %[a], (%[x]);"
105 "movclr.l %%acc1, %[a];"
106 "move.l %[a], (%[y]);"
107 : [a] "+&r" (a)
108 : [x] "a" (x), [y] "a" (y),
109 [b] "r" (b), [t] "r" (t), [v] "r" (v)
110 : "cc", "memory");
111}
112
113
114#if 0 /* canonical Tremor definition */
115#define XPROD32(_a, _b, _t, _v, _x, _y) \
116 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
117 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
118#endif
119
120/* this could lose the LSB by overflow, but i don't think it'll ever happen.
121 if anyone think they can hear a bug caused by this, please try the above
122 version. */
123#define XPROD32(_a, _b, _t, _v, _x, _y) \
124 asm volatile ("mac.l %[a], %[t], %%acc0;" \
125 "mac.l %[b], %[v], %%acc0;" \
126 "mac.l %[b], %[t], %%acc1;" \
127 "msac.l %[a], %[v], %%acc1;" \
128 "movclr.l %%acc0, %[x];" \
129 "asr.l #1, %[x];" \
130 "movclr.l %%acc1, %[y];" \
131 "asr.l #1, %[y];" \
132 : [x] "=&d" (_x), [y] "=&d" (_y) \
133 : [a] "r" (_a), [b] "r" (_b), \
134 [t] "r" (_t), [v] "r" (_v) \
135 : "cc");
136
137#ifndef _V_VECT_OPS
138#define _V_VECT_OPS
139
140/* asm versions of vector operations for block.c, window.c */
141/* assumes MAC is initialized & accumulators cleared */
142static inline
143void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
144{
145 /* align to 16 bytes */
146 while(n>0 && (int)x&16) {
147 *x++ += *y++;
148 n--;
149 }
150 asm volatile ("bra 1f;"
151 "0:" /* loop start */
152 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
153 "movem.l (%[y]), %%a0-%%a3;"
154 /* add */
155 "add.l %%a0, %%d0;"
156 "add.l %%a1, %%d1;"
157 "add.l %%a2, %%d2;"
158 "add.l %%a3, %%d3;"
159 /* store and advance */
160 "movem.l %%d0-%%d3, (%[x]);"
161 "lea.l (4*4, %[x]), %[x];"
162 "lea.l (4*4, %[y]), %[y];"
163 "subq.l #4, %[n];" /* done 4 elements */
164 "1: cmpi.l #4, %[n];"
165 "bge 0b;"
166 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
167 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
168 "cc", "memory");
169 /* add final elements */
170 while (n>0) {
171 *x++ += *y++;
172 n--;
173 }
174}
175
176static inline
177void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
178{
179 /* align to 16 bytes */
180 while(n>0 && (int)x&16) {
181 *x++ = *y++;
182 n--;
183 }
184 asm volatile ("bra 1f;"
185 "0:" /* loop start */
186 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
187 "movem.l %%d0-%%d3, (%[x]);" /* store */
188 "lea.l (4*4, %[x]), %[x];" /* advance */
189 "lea.l (4*4, %[y]), %[y];"
190 "subq.l #4, %[n];" /* done 4 elements */
191 "1: cmpi.l #4, %[n];"
192 "bge 0b;"
193 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
194 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
195 /* copy final elements */
196 while (n>0) {
197 *x++ = *y++;
198 n--;
199 }
200}
201
202
203static inline
204void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
205{
206 /* ensure data is aligned to 16-bytes */
207 while(n>0 && (int)data%16) {
208 *data = MULT31(*data, *window);
209 data++;
210 window++;
211 n--;
212 }
213 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
214 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
215 "lea.l (4*4, %[w]), %[w];"
216 "bra 1f;" /* jump to loop condition */
217 "0:" /* loop body */
218 /* multiply and load next window values */
219 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
220 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
221 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
222 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
223 "movclr.l %%acc0, %%d0;" /* get the products */
224 "movclr.l %%acc1, %%d1;"
225 "movclr.l %%acc2, %%d2;"
226 "movclr.l %%acc3, %%d3;"
227 /* store and advance */
228 "movem.l %%d0-%%d3, (%[d]);"
229 "lea.l (4*4, %[d]), %[d];"
230 "movem.l (%[d]), %%d0-%%d3;"
231 "subq.l #4, %[n];" /* done 4 elements */
232 "1: cmpi.l #4, %[n];"
233 "bge 0b;"
234 /* multiply final elements */
235 "tst.l %[n];"
236 "beq 1f;" /* n=0 */
237 "mac.l %%d0, %%a0, %%acc0;"
238 "movclr.l %%acc0, %%d0;"
239 "move.l %%d0, (%[d])+;"
240 "subq.l #1, %[n];"
241 "beq 1f;" /* n=1 */
242 "mac.l %%d1, %%a1, %%acc0;"
243 "movclr.l %%acc0, %%d1;"
244 "move.l %%d1, (%[d])+;"
245 "subq.l #1, %[n];"
246 "beq 1f;" /* n=2 */
247 /* otherwise n = 3 */
248 "mac.l %%d2, %%a2, %%acc0;"
249 "movclr.l %%acc0, %%d2;"
250 "move.l %%d2, (%[d])+;"
251 "1:"
252 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
253 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
254 "cc", "memory");
255}
256
257static inline
258void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
259{
260 /* ensure at least data is aligned to 16-bytes */
261 while(n>0 && (int)data%16) {
262 *data = MULT31(*data, *window);
263 data++;
264 window--;
265 n--;
266 }
267 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
268 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
269 "movem.l (%[w]), %%a0-%%a3;"
270 "bra 1f;" /* jump to loop condition */
271 "0:" /* loop body */
272 /* multiply and load next window value */
273 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
274 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
275 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
276 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
277 "movclr.l %%acc0, %%d0;" /* get the products */
278 "movclr.l %%acc1, %%d1;"
279 "movclr.l %%acc2, %%d2;"
280 "movclr.l %%acc3, %%d3;"
281 /* store and advance */
282 "movem.l %%d0-%%d3, (%[d]);"
283 "lea.l (4*4, %[d]), %[d];"
284 "movem.l (%[d]), %%d0-%%d3;"
285 "subq.l #4, %[n];" /* done 4 elements */
286 "1: cmpi.l #4, %[n];"
287 "bge 0b;"
288 /* multiply final elements */
289 "tst.l %[n];"
290 "beq 1f;" /* n=0 */
291 "mac.l %%d0, %%a3, %%acc0;"
292 "movclr.l %%acc0, %%d0;"
293 "move.l %%d0, (%[d])+;"
294 "subq.l #1, %[n];"
295 "beq 1f;" /* n=1 */
296 "mac.l %%d1, %%a2, %%acc0;"
297 "movclr.l %%acc0, %%d1;"
298 "move.l %%d1, (%[d])+;"
299 "subq.l #1, %[n];"
300 "beq 1f;" /* n=2 */
301 /* otherwise n = 3 */
302 "mac.l %%d2, %%a1, %%acc0;"
303 "movclr.l %%acc0, %%d2;"
304 "move.l %%d2, (%[d])+;"
305 "1:"
306 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
307 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
308 "cc", "memory");
309}
310
311#endif
312
313#endif
314
315#ifndef _V_CLIP_MATH
316#define _V_CLIP_MATH
317
318/* this is portable C and simple; why not use this as default? */
319static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
320 register ogg_int32_t hi=32767, lo=-32768;
321 return (x>=hi ? hi : (x<=lo ? lo : x));
322}
323
324#endif
325#else
326#define LINE_ATTR
327#endif