summaryrefslogtreecommitdiff
path: root/apps/codecs/Tremor/asm_mcf5249.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/Tremor/asm_mcf5249.h')
-rw-r--r--apps/codecs/Tremor/asm_mcf5249.h257
1 files changed, 257 insertions, 0 deletions
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h
new file mode 100644
index 0000000000..09c74671bc
--- /dev/null
+++ b/apps/codecs/Tremor/asm_mcf5249.h
@@ -0,0 +1,257 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 *
9 * Copyright (C) 2005 by Pedro Vasconcelos
10 *
11 * All files in this archive are subject to the GNU General Public License.
12 * See the file COPYING in the source tree root for full license agreement.
13 *
14 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
15 * KIND, either express or implied.
16 *
17 ****************************************************************************/
18/* asm routines for wide math on the MCF5249 */
19
20#include "os_types.h"
21
22#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
23
24#ifndef _V_WIDE_MATH
25#define _V_WIDE_MATH
26
27//#define MB() asm volatile ("" : : : "memory")
28#define MB()
29
30static inline void mcf5249_init_mac(void) {
31 int r;
32 asm volatile ("move.l #0x20, %%macsr;" // frac, truncate, no saturation
33 "movclr.l %%acc0, %[r];" // clear accumulators
34 "move.l %%acc0, %%acc1;"
35 "move.l %%acc0, %%acc2;"
36 "move.l %%acc0, %%acc3;"
37 : [r] "=r" (r));
38}
39
40static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
41 ogg_int32_t r;
42 asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply into acc
43 "movclr.l %%acc0, %[r];" // move & clear acc
44 "asr.l #1, %[r];" // no overflow test
45 : [r] "=d" (r)
46 : [x] "r" (x), [y] "r" (y)
47 : "cc");
48 return r;
49}
50
51static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
52 ogg_int32_t r;
53 asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
54 "movclr.l %%acc0, %[r];" // move and clear
55 : [r] "=r" (r)
56 : [x] "r" (x), [y] "r" (y)
57 : "cc");
58 return r;
59}
60
61
62static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
63 ogg_int32_t r;
64 asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
65 "movclr.l %%acc0, %[r];" // get higher half
66 "mulu.l %[y], %[x];" // get lower half
67 "asl.l #8, %[r];" // hi << 17
68 "asl.l #8, %[r];"
69 "lsr.l #8, %[x];" // (unsigned)lo >> 15
70 "lsr.l #7, %[x];"
71 "or.l %[x], %[r];" // or
72 : [r] "=&d" (r), [x] "+d" (x)
73 : [y] "d" (y)
74 : "cc");
75 return r;
76}
77
78
79static inline
80void XPROD31(ogg_int32_t a, ogg_int32_t b,
81 ogg_int32_t t, ogg_int32_t v,
82 ogg_int32_t *x, ogg_int32_t *y)
83{
84 ogg_int32_t r;
85 asm volatile ("mac.l %[a], %[t], %%acc0;"
86 "mac.l %[b], %[v], %%acc0;"
87 "mac.l %[b], %[t], %%acc1;"
88 "msac.l %[a], %[v], %%acc1;"
89 "movclr.l %%acc0, %[r];"
90 "move.l %[r], (%[x]);"
91 "movclr.l %%acc1, %[r];"
92 "move.l %[r], (%[y]);"
93 : [r] "=&r" (r)
94 : [x] "a" (x), [y] "a" (y),
95 [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
96 : "cc", "memory");
97}
98
99
100static inline
101void XNPROD31(ogg_int32_t a, ogg_int32_t b,
102 ogg_int32_t t, ogg_int32_t v,
103 ogg_int32_t *x, ogg_int32_t *y)
104{
105 ogg_int32_t r;
106 asm volatile ("mac.l %[a], %[t], %%acc0;"
107 "msac.l %[b], %[v], %%acc0;"
108 "mac.l %[b], %[t], %%acc1;"
109 "mac.l %[a], %[v], %%acc1;"
110 "movclr.l %%acc0, %[r];"
111 "move.l %[r], (%[x]);"
112 "movclr.l %%acc1, %[r];"
113 "move.l %[r], (%[y]);"
114 : [r] "=&r" (r)
115 : [x] "a" (x), [y] "a" (y),
116 [a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
117 : "cc", "memory");
118}
119
120
121/* no faster way of doing this using the MAC? */
122#define XPROD32(_a, _b, _t, _v, _x, _y) \
123 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
124 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
125
126
127/* asm versions of vector multiplication for window.c */
128/* assumes MAC is initialized & accumulators cleared */
129static inline
130void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
131{
132 asm volatile ("movem.l (%[d]), %%d0-%%d3;" // loop start
133 "movem.l (%[w]), %%a0-%%a3;" // pre-fetch registers
134 "lea.l (4*4, %[w]), %[w];"
135 "bra 1f;" // jump to loop condition
136 "0:" // loop body
137 // multiply and load next window values
138 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
139 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
140 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
141 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
142 "movclr.l %%acc0, %%d0;" // get the products
143 "movclr.l %%acc1, %%d1;"
144 "movclr.l %%acc2, %%d2;"
145 "movclr.l %%acc3, %%d3;"
146 // store and advance
147 "movem.l %%d0-%%d3, (%[d]);"
148 "lea.l (4*4, %[d]), %[d];"
149 "movem.l (%[d]), %%d0-%%d3;"
150 "subq.l #4, %[n];" // done 4 elements
151 "1: cmpi.l #4, %[n];"
152 "bge 0b;"
153 // multiply final elements
154 "tst.l %[n];"
155 "beq 1f;" // n=0
156 "mac.l %%d0, %%a0, %%acc0;"
157 "movclr.l %%acc0, %%d0;"
158 "move.l %%d0, (%[d])+;"
159 "subq.l #1, %[n];"
160 "beq 1f;" // n=1
161 "mac.l %%d1, %%a1, %%acc0;"
162 "movclr.l %%acc0, %%d1;"
163 "move.l %%d1, (%[d])+;"
164 "subq.l #1, %[n];"
165 "beq 1f;" // n=2
166 // otherwise n = 3
167 "mac.l %%d2, %%a2, %%acc0;"
168 "movclr.l %%acc0, %%d2;"
169 "move.l %%d2, (%[d])+;"
170 "1:"
171 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
172 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
173 "cc", "memory");
174}
175
176static inline
177void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
178{
179 asm volatile ("lea.l (-3*4, %[w]), %[w];" // loop start
180 "movem.l (%[d]), %%d0-%%d3;" // pre-fetch registers
181 "movem.l (%[w]), %%a0-%%a3;"
182 "bra 1f;" // jump to loop condition
183 "0:" // loop body
184 // multiply and load next window value
185 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
186 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
187 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
188 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
189 "movclr.l %%acc0, %%d0;" // get the products
190 "movclr.l %%acc1, %%d1;"
191 "movclr.l %%acc2, %%d2;"
192 "movclr.l %%acc3, %%d3;"
193 // store and advance
194 "movem.l %%d0-%%d3, (%[d]);"
195 "lea.l (4*4, %[d]), %[d];"
196 "movem.l (%[d]), %%d0-%%d3;"
197 "subq.l #4, %[n];" // done 4 elements
198 "1: cmpi.l #4, %[n];"
199 "bge 0b;"
200 // multiply final elements
201 "tst.l %[n];"
202 "beq 1f;" // n=0
203 "mac.l %%d0, %%a3, %%acc0;"
204 "movclr.l %%acc0, %%d0;"
205 "move.l %%d0, (%[d])+;"
206 "subq.l #1, %[n];"
207 "beq 1f;" // n=1
208 "mac.l %%d1, %%a2, %%acc0;"
209 "movclr.l %%acc0, %%d1;"
210 "move.l %%d1, (%[d])+;"
211 "subq.l #1, %[n];"
212 "beq 1f;" // n=2
213 // otherwise n = 3
214 "mac.l %%d2, %%a1, %%acc0;"
215 "movclr.l %%acc0, %%d2;"
216 "move.l %%d2, (%[d])+;"
217 "1:"
218 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
219 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
220 "cc", "memory");
221}
222
223
224static inline
225void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
226{
227 asm volatile ("clr.l %%d0;"
228 "clr.l %%d1;"
229 "clr.l %%d2;"
230 "clr.l %%d3;"
231 // loop start
232 "tst.l %[n];"
233 "bra 1f;"
234 "0: movem.l %%d0-%%d3, (%[ptr]);"
235 "lea (4*4, %[ptr]), %[ptr];"
236 "subq.l #4, %[n];"
237 "1: bgt 0b;"
238 // remaing elements
239 "tst.l %[n];"
240 "beq 1f;" // n=0
241 "clr.l (%[ptr])+;"
242 "subq.l #1, %[n];"
243 "beq 1f;" // n=1
244 "clr.l (%[ptr])+;"
245 "subq.l #1, %[n];"
246 "beq 1f;" // n=2
247 // otherwise n = 3
248 "clr.l (%[ptr])+;"
249 "1:"
250 : [n] "+d" (n), [ptr] "+a" (ptr)
251 :
252 : "%d0","%d1","%d2","%d3","cc","memory");
253}
254
255
256#endif
257#endif