summaryrefslogtreecommitdiff
path: root/apps/codecs/lib/fft-ffmpeg_cf.h
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/lib/fft-ffmpeg_cf.h')
-rw-r--r--apps/codecs/lib/fft-ffmpeg_cf.h370
1 files changed, 0 insertions, 370 deletions
diff --git a/apps/codecs/lib/fft-ffmpeg_cf.h b/apps/codecs/lib/fft-ffmpeg_cf.h
deleted file mode 100644
index a29464a23d..0000000000
--- a/apps/codecs/lib/fft-ffmpeg_cf.h
+++ /dev/null
@@ -1,370 +0,0 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Nils Wallménius
11 *
12 * Coldfire v2 optimisations for ffmpeg's fft (used in fft-ffmpeg.c)
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23
24#ifdef CPU_COLDFIRE
25#define FFT_FFMPEG_INCL_OPTIMISED_FFT4
26static inline void fft4(FFTComplex * z)
27{
28 asm volatile ("movem.l (%[z]), %%d0-%%d7\n\t"
29 "move.l %%d0, %%a0\n\t"
30 "add.l %%d2, %%d0\n\t" /* d0 == t1 */
31 "neg.l %%d2\n\t"
32 "add.l %%a0, %%d2\n\t" /* d2 == t3, a0 free */
33 "move.l %%d6, %%a0\n\t"
34 "sub.l %%d4, %%d6\n\t" /* d6 == t8 */
35 "add.l %%d4, %%a0\n\t" /* a0 == t6 */
36
37 "move.l %%d0, %%d4\n\t"
38 "sub.l %%a0, %%d4\n\t" /* z[2].re done */
39 "add.l %%a0, %%d0\n\t" /* z[0].re done, a0 free */
40
41 "move.l %%d5, %%a0\n\t"
42 "sub.l %%d7, %%d5\n\t" /* d5 == t7 */
43 "add.l %%d7, %%a0\n\t" /* a0 == t5 */
44
45 "move.l %%d1, %%d7\n\t"
46 "sub.l %%d3, %%d7\n\t" /* d7 == t4 */
47 "add.l %%d3, %%d1\n\t" /* d1 == t2 */
48
49 "move.l %%d7, %%d3\n\t"
50 "sub.l %%d6, %%d7\n\t" /* z[3].im done */
51 "add.l %%d6, %%d3\n\t" /* z[1].im done */
52
53 "move.l %%d2, %%d6\n\t"
54 "sub.l %%d5, %%d6\n\t" /* z[3].re done */
55 "add.l %%d5, %%d2\n\t" /* z[1].re done */
56
57 "move.l %%d1, %%d5\n\t"
58 "sub.l %%a0, %%d5\n\t" /* z[2].im done */
59 "add.l %%a0, %%d1\n\t" /* z[0].im done */
60
61 "movem.l %%d0-%%d7, (%[z])\n\t"
62 : :[z] "a" (z)
63 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
64 "a0", "cc", "memory");
65
66}
67
68#define FFT_FFMPEG_INCL_OPTIMISED_FFT8
69static inline void fft8(FFTComplex *z)
70{
71 asm volatile ("movem.l (4*8, %[z]), %%d0-%%d7\n\t"
72 "move.l %%d0, %%a1\n\t"
73 "add.l %%d2, %%a1\n\t" /* a1 == t1 */
74 "sub.l %%d2, %%d0\n\t" /* d0 == z[5].re */
75
76 "move.l %%d1, %%a2\n\t"
77 "add.l %%d3, %%a2\n\t" /* a2 == t2 */
78 "sub.l %%d3, %%d1\n\t" /* d1 == z[5].im */
79
80 "move.l %%d4, %%d2\n\t"
81 "add.l %%d6, %%d2\n\t" /* d2 == t3 */
82 "sub.l %%d6, %%d4\n\t" /* d4 == z[7].re */
83
84 "move.l %%d5, %%d3\n\t"
85 "add.l %%d7, %%d3\n\t" /* d3 == t4 */
86 "sub.l %%d7, %%d5\n\t" /* d5 == z[7].im */
87
88 "move.l %%d2, %%a4\n\t"
89 "sub.l %%a1, %%a4\n\t" /* a4 == t8 */
90 "add.l %%d2, %%a1\n\t" /* a1 == t1, d2 free */
91
92 "move.l %%a2, %%a3\n\t"
93 "sub.l %%d3, %%a3\n\t" /* a3 == t7 */
94 "add.l %%d3, %%a2\n\t" /* a2 == t2, d3 free */
95
96 /* emac block from TRANSFORM_EQUAL, do this now
97 so we don't need to store and load z[5] and z[7] */
98 "move.l %[_cPI2_8], %%d2\n\t"
99 "mac.l %%d2, %%d0, %%acc0\n\t"
100 "mac.l %%d2, %%d1, %%acc1\n\t"
101 "mac.l %%d2, %%d4, %%acc2\n\t"
102 "mac.l %%d2, %%d5, %%acc3\n\t"
103
104 /* fft4, clobbers all d regs and a0 */
105 "movem.l (%[z]), %%d0-%%d7\n\t"
106 "move.l %%d0, %%a0\n\t"
107 "add.l %%d2, %%d0\n\t" /* d0 == t1 */
108 "neg.l %%d2\n\t"
109 "add.l %%a0, %%d2\n\t" /* d2 == t3, a0 free */
110 "move.l %%d6, %%a0\n\t"
111 "sub.l %%d4, %%d6\n\t" /* d6 == t8 */
112 "add.l %%d4, %%a0\n\t" /* a0 == t6 */
113
114 "move.l %%d0, %%d4\n\t"
115 "sub.l %%a0, %%d4\n\t" /* z[2].re done */
116 "add.l %%a0, %%d0\n\t" /* z[0].re done, a0 free */
117
118 "move.l %%d5, %%a0\n\t"
119 "sub.l %%d7, %%d5\n\t" /* d5 == t7 */
120 "add.l %%d7, %%a0\n\t" /* a0 == t5 */
121
122 "move.l %%d1, %%d7\n\t"
123 "sub.l %%d3, %%d7\n\t" /* d7 == t4 */
124 "add.l %%d3, %%d1\n\t" /* d1 == t2 */
125
126 "move.l %%d7, %%d3\n\t"
127 "sub.l %%d6, %%d7\n\t" /* z[3].im done */
128 "add.l %%d6, %%d3\n\t" /* z[1].im done */
129
130 "move.l %%d2, %%d6\n\t"
131 "sub.l %%d5, %%d6\n\t" /* z[3].re done */
132 "add.l %%d5, %%d2\n\t" /* z[1].re done */
133
134 "move.l %%d1, %%d5\n\t"
135 "sub.l %%a0, %%d5\n\t" /* z[2].im done */
136 "add.l %%a0, %%d1\n\t" /* z[0].im done */
137 /* end of fft4, but don't store yet */
138
139 "move.l %%d0, %%a0\n\t"
140 "add.l %%a1, %%d0\n\t"
141 "sub.l %%a1, %%a0\n\t" /* z[4].re, z[0].re done, a1 free */
142
143 "move.l %%d1, %%a1\n\t"
144 "add.l %%a2, %%d1\n\t"
145 "sub.l %%a2, %%a1\n\t" /* z[4].im, z[0].im done, a2 free */
146
147 "move.l %%d4, %%a2\n\t"
148 "add.l %%a3, %%d4\n\t"
149 "sub.l %%a3, %%a2\n\t" /* z[6].re, z[2].re done, a3 free */
150
151 "move.l %%d5, %%a3\n\t"
152 "add.l %%a4, %%d5\n\t"
153 "sub.l %%a4, %%a3\n\t" /* z[6].im, z[2].im done, a4 free */
154
155 "movem.l %%d0-%%d1, (%[z])\n\t" /* save z[0] */
156 "movem.l %%d4-%%d5, (2*8, %[z])\n\t" /* save z[2] */
157 "movem.l %%a0-%%a1, (4*8, %[z])\n\t" /* save z[4] */
158 "movem.l %%a2-%%a3, (6*8, %[z])\n\t" /* save z[6] */
159
160 /* TRANSFORM_EQUAL */
161 "movclr.l %%acc0, %%d0\n\t"
162 "movclr.l %%acc1, %%d1\n\t"
163 "movclr.l %%acc2, %%d4\n\t"
164 "movclr.l %%acc3, %%d5\n\t"
165
166 "move.l %%d1, %%a0\n\t"
167 "add.l %%d0, %%a0\n\t" /* a0 == t1 */
168 "sub.l %%d0, %%d1\n\t" /* d1 == t2 */
169
170 "move.l %%d4, %%d0\n\t"
171 "add.l %%d5, %%d0\n\t" /* d0 == t6 */
172 "sub.l %%d5, %%d4\n\t" /* d4 == t5 */
173
174 "move.l %%d4, %%a1\n\t"
175 "sub.l %%a0, %%a1\n\t" /* a1 == temp1 */
176 "add.l %%a0, %%d4\n\t" /* d4 == temp2 */
177
178 "move.l %%d2, %%a2\n\t"
179 "sub.l %%d4, %%a2\n\t" /* a2 == z[5].re */
180 "add.l %%d4, %%d2\n\t" /* z[1].re done */
181
182 "move.l %%d7, %%d5\n\t"
183 "sub.l %%a1, %%d5\n\t" /* d5 == z[7].im */
184 "add.l %%a1, %%d7\n\t" /* z[3].im done */
185
186 "move.l %%d1, %%a0\n\t"
187 "sub.l %%d0, %%a0\n\t" /* a0 == temp1 */
188 "add.l %%d0, %%d1\n\t" /* d1 == temp2 */
189
190 "move.l %%d6, %%d4\n\t"
191 "sub.l %%a0, %%d4\n\t" /* d4 == z[7].re */
192 "add.l %%a0, %%d6\n\t" /* z[3].re done */
193
194 "move.l %%d3, %%a3\n\t"
195 "sub.l %%d1, %%a3\n\t" /* a3 == z[5].im */
196 "add.l %%d1, %%d3\n\t" /* z[1].im done */
197
198 "movem.l %%d2-%%d3, (1*8, %[z])\n\t" /* save z[1] */
199 "movem.l %%d6-%%d7, (3*8, %[z])\n\t" /* save z[3] */
200 "movem.l %%a2-%%a3, (5*8, %[z])\n\t" /* save z[5] */
201 "movem.l %%d4-%%d5, (7*8, %[z])\n\t" /* save z[7] */
202 : :[z] "a" (z), [_cPI2_8] "i" (cPI2_8)
203 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
204 "a0", "a1", "a2", "a3", "a4", "cc", "memory");
205}
206
207#define FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
208
209static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim)
210{
211 asm volatile ("move.l (%[z2]), %%d5\n\t"
212 "mac.l %%d5, %[wre], (4, %[z2]), %%d4, %%acc0\n\t"
213 "mac.l %%d4, %[wim], %%acc0\n\t"
214 "mac.l %%d4, %[wre], (%[z3]), %%d6, %%acc1\n\t"
215 "msac.l %%d5, %[wim], (4, %[z3]), %%d7, %%acc1\n\t"
216 "mac.l %%d6, %[wre], (%[z])+, %%d4, %%acc2\n\t"
217 "msac.l %%d7, %[wim], (%[z])+, %%d5, %%acc2\n\t"
218 "mac.l %%d7, %[wre], %%acc3\n\t"
219 "mac.l %%d6, %[wim], %%acc3\n\t"
220
221 "movclr.l %%acc0, %[wre]\n\t" /* t1 */
222 "movclr.l %%acc2, %[wim]\n\t" /* t5 */
223
224 "move.l %%d4, %%d6\n\t"
225 "move.l %[wim], %%d7\n\t"
226 "sub.l %[wre], %[wim]\n\t" /* t5 = t5-t1 */
227 "add.l %[wre], %%d7\n\t"
228 "sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
229 "add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
230
231 "movclr.l %%acc3, %%d7\n\t" /* t6 */
232 "movclr.l %%acc1, %%d3\n\t" /* t2 */
233
234 "move.l %%d3, %[wre]\n\t"
235 "add.l %%d7, %[wre]\n\t"
236 "sub.l %%d7, %%d3\n\t" /* t2 = t6-t2 */
237 "move.l %%d5, %%d7\n\t"
238 "sub.l %[wre], %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
239
240 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
241 "add.l %[wre], %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
242 "movem.l %%d4-%%d5, (-8, %[z])\n\t" /* store z0 */
243
244 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
245 "move.l %%d4, %%d6\n\t"
246
247 "sub.l %%d3, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
248 "add.l %%d3, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
249
250 "move.l %%d5, %%d7\n\t"
251 "sub.l %[wim], %%d7\n\t"
252 "movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
253 "add.l %[wim], %%d5\n\t"
254 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
255
256 : [wre] "+r" (wre), [wim] "+r" (wim), /* we clobber these after using them */
257 [z] "+a" (z)
258 : [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
259 : "d3", "d4", "d5", "d6", "d7", "cc", "memory");
260 return z;
261}
262
263static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
264{
265 return TRANSFORM(z, n, w[0], w[1]);
266}
267
268static inline FFTComplex* TRANSFORM_W10(FFTComplex * z, unsigned int n, const FFTSample * w)
269{
270 return TRANSFORM(z, n, w[1], w[0]);
271}
272
273static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
274{
275 asm volatile("movem.l (%[z]), %%d4-%%d5\n\t" /* load z0 */
276 "move.l %%d4, %%d6\n\t"
277 "movem.l (%[z2]), %%d2-%%d3\n\t" /* load z2 */
278 "movem.l (%[z3]), %%d0-%%d1\n\t" /* load z0 */
279 "move.l %%d0, %%d7\n\t"
280 "sub.l %%d2, %%d0\n\t"
281 "add.l %%d2, %%d7\n\t"
282 "sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
283 "add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
284
285 "move.l %%d5, %%d7\n\t"
286 "move.l %%d3, %%d2\n\t"
287 "add.l %%d1, %%d2\n\t"
288 "sub.l %%d2, %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
289 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
290 "add.l %%d2, %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
291 "movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */
292
293 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
294 "move.l %%d4, %%d6\n\t"
295 "sub.l %%d1, %%d3\n\t"
296 "sub.l %%d3, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
297 "add.l %%d3, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
298
299 "move.l %%d5, %%d7\n\t"
300 "sub.l %%d0, %%d7\n\t"
301 "movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
302 "add.l %%d0, %%d5\n\t"
303
304 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
305
306 :
307 : [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
308 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
309 return z+1;
310}
311
312static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
313{
314 asm volatile ("movem.l (%[z2]), %%d0-%%d1\n\t"
315 "move.l %[_cPI2_8], %%d2\n\t"
316 "mac.l %%d0, %%d2, (%[z3]), %%d0, %%acc0\n\t"
317 "mac.l %%d1, %%d2, (4, %[z3]), %%d1, %%acc1\n\t"
318 "mac.l %%d0, %%d2, (%[z]), %%d4, %%acc2\n\t"
319 "mac.l %%d1, %%d2, (4, %[z]), %%d5, %%acc3\n\t"
320
321 "movclr.l %%acc0, %%d0\n\t"
322 "movclr.l %%acc1, %%d1\n\t"
323 "movclr.l %%acc2, %%d2\n\t"
324 "movclr.l %%acc3, %%d3\n\t"
325
326 "move.l %%d0, %%d7\n\t"
327 "add.l %%d1, %%d0\n\t" /* d0 == t1 */
328 "sub.l %%d7, %%d1\n\t" /* d1 == t2 */
329
330 "move.l %%d3, %%d7\n\t"
331 "add.l %%d2, %%d3\n\t" /* d3 == t6 */
332 "sub.l %%d7, %%d2\n\t" /* d2 == t5 */
333
334 "move.l %%d4, %%d6\n\t"
335 "move.l %%d2, %%d7\n\t"
336 "sub.l %%d0, %%d2\n\t" /* t5 = t5-t1 */
337 "add.l %%d0, %%d7\n\t"
338 "sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
339 "add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
340
341 "move.l %%d1, %%d0\n\t"
342 "add.l %%d3, %%d0\n\t"
343 "sub.l %%d3, %%d1\n\t" /* t2 = t6-t2 */
344 "move.l %%d5, %%d7\n\t"
345 "sub.l %%d0, %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
346
347 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
348 "add.l %%d0, %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
349 "movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */
350
351 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
352 "move.l %%d4, %%d6\n\t"
353
354 "sub.l %%d1, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
355 "add.l %%d1, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
356
357 "move.l %%d5, %%d7\n\t"
358 "sub.l %%d2, %%d7\n\t"
359 "movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
360 "add.l %%d2, %%d5\n\t"
361 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
362
363 :: [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]),
364 [_cPI2_8] "i" (cPI2_8)
365 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
366
367 return z+1;
368}
369
370#endif /* CPU_COLDIFRE */