summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-10-17 15:46:09 +0000
committerJens Arnold <amiconn@rockbox.org>2007-10-17 15:46:09 +0000
commite1b4bf7a4dca8cf66b184864594a0cb551d04134 (patch)
treee16989e7bdcb1d0e1a8aee325f610c8a50f03dec
parenta25b76e3bd7a7a381a23e494919f6e01e80c9655 (diff)
downloadrockbox-e1b4bf7a4dca8cf66b184864594a0cb551d04134.tar.gz
rockbox-e1b4bf7a4dca8cf66b184864594a0cb551d04134.zip
Mpegplayer: Assembler optimised motion compensation for coldfire (just the variants that are assemblerised for ARM) for a nice speedup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15168 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/plugins/mpegplayer/SOURCES5
-rw-r--r--apps/plugins/mpegplayer/motion_comp_coldfire_c.c38
-rw-r--r--apps/plugins/mpegplayer/motion_comp_coldfire_s.S434
3 files changed, 474 insertions, 3 deletions
diff --git a/apps/plugins/mpegplayer/SOURCES b/apps/plugins/mpegplayer/SOURCES
index 3d5a4c2375..e7e2a7a0de 100644
--- a/apps/plugins/mpegplayer/SOURCES
+++ b/apps/plugins/mpegplayer/SOURCES
@@ -6,11 +6,10 @@ motion_comp.c
6 6
7#ifdef CPU_COLDFIRE 7#ifdef CPU_COLDFIRE
8idct_coldfire.S 8idct_coldfire.S
9motion_comp_coldfire_c.c
10motion_comp_coldfire_s.S
9#elif defined CPU_ARM 11#elif defined CPU_ARM
10idct_arm.S 12idct_arm.S
11#endif
12
13#ifdef CPU_ARM
14motion_comp_arm_c.c 13motion_comp_arm_c.c
15motion_comp_arm_s.S 14motion_comp_arm_s.S
16#else /* other CPU or SIM */ 15#else /* other CPU or SIM */
diff --git a/apps/plugins/mpegplayer/motion_comp_coldfire_c.c b/apps/plugins/mpegplayer/motion_comp_coldfire_c.c
new file mode 100644
index 0000000000..b97e3510e7
--- /dev/null
+++ b/apps/plugins/mpegplayer/motion_comp_coldfire_c.c
@@ -0,0 +1,38 @@
1/*
2 * Based on:
3 * motion_comp_arm.c
4 * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
5 *
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
8 *
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#include <inttypes.h>
24#include "mpeg2.h"
25#include "attributes.h"
26#include "mpeg2_internal.h"
27#include "motion_comp.h"
28
29/* definitions of the actual mc functions */
30
31/* MC_FUNC (put, o) <= ASM */
32MC_FUNC (avg, o)
33/* MC_FUNC (put, x) <= ASM */
34MC_FUNC (avg, x)
35MC_FUNC (put, y)
36MC_FUNC (avg, y)
37MC_FUNC (put, xy)
38MC_FUNC (avg, xy)
diff --git a/apps/plugins/mpegplayer/motion_comp_coldfire_s.S b/apps/plugins/mpegplayer/motion_comp_coldfire_s.S
new file mode 100644
index 0000000000..ecb46c91be
--- /dev/null
+++ b/apps/plugins/mpegplayer/motion_comp_coldfire_s.S
@@ -0,0 +1,434 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007 Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20.macro LEFT8_PW dW1, dW2 | needs %d0 == 24, clobbers %d2
21 lsl.l #8, \dW1 | changes dW1, keeps dW2
22 move.l \dW2, %d2
23 lsr.l %d0, %d2
24 or.l %d2, \dW1
25.endm
26
27.macro LEFT24_PW dW1, dW2 | needs %d0 == 24, clobbers %d2
28 lsl.l %d0, \dW1 | changes dW1, keeps dW2
29 move.l \dW2, %d2
30 lsr.l #8, %d2
31 or.l %d2, \dW1
32.endm
33
34/*****************************************************************************/
35
36 .align 2
37 .global MC_put_o_8
38 .type MC_put_o_8, @function
39
40MC_put_o_8:
41 movem.l (4,%sp), %a0-%a1 | dest, source
42 move.l %a1, %d0
43 and.l #3, %d0
44 sub.l %d0, %a1 | align source
45 jmp.l (2, %pc, %d0.l*4)
46 bra.w .po8_0
47 bra.w .po8_1
48 bra.w .po8_2
49 | last table entry coincides with target
50
51.po8_3:
52 lea.l (-5*4,%sp), %sp
53 movem.l %d2-%d5/%a2, (%sp) | save some registers
54 move.l (5*4+12,%sp), %a2 | stride
55 move.l (5*4+16,%sp), %d1 | height
56 moveq.l #24, %d0 | shift amount
571:
58 movem.l (%a1), %d3-%d5
59 add.l %a2, %a1
60 LEFT24_PW %d3, %d4
61 lsl.l %d0, %d4
62 lsr.l #8, %d5
63 or.l %d5, %d4
64 movem.l %d3-%d4, (%a0)
65 add.l %a2, %a0
66 subq.l #1, %d1
67 bne.s 1b
68 movem.l (%sp), %d2-%d5/%a2
69 lea.l (5*4,%sp), %sp
70 rts
71
72.po8_2:
73 lea.l (-3*4,%sp), %sp
74 movem.l %d2-%d4, (%sp) | save some registers
75 movem.l (3*4+12,%sp), %d0-%d1 | stride, height
761:
77 movem.l (%a1), %d2-%d4
78 add.l %d0, %a1
79 swap %d2
80 swap %d3
81 move.w %d3, %d2
82 swap %d4
83 move.w %d4, %d3
84 movem.l %d2-%d3, (%a0)
85 add.l %d0, %a0
86 subq.l #1, %d1
87 bne.s 1b
88 movem.l (%sp), %d2-%d4
89 lea.l (3*4,%sp), %sp
90 rts
91
92.po8_1:
93 lea.l (-5*4,%sp), %sp
94 movem.l %d2-%d5/%a2, (%sp) | save some registers
95 move.l (5*4+12,%sp), %a2 | stride
96 move.l (5*4+16,%sp), %d1 | height
97 moveq.l #24, %d0 | shift amount
981:
99 movem.l (%a1), %d3-%d5
100 add.l %a2, %a1
101 LEFT8_PW %d3, %d4
102 lsl.l #8, %d4
103 lsr.l %d0, %d5
104 or.l %d5, %d4
105 movem.l %d3-%d4, (%a0)
106 add.l %a2, %a0
107 subq.l #1, %d1
108 bne.s 1b
109 movem.l (%sp), %d2-%d5/%a2
110 lea.l (5*4,%sp), %sp
111 rts
112
113.po8_0:
114 movem.l (12,%sp), %d0-%d1 | stride, height
115 subq.l #4, %d0 | adjust for increment within the loop
1161:
117 move.l (%a1)+, (%a0)+
118 move.l (%a1), (%a0)
119 add.l %d0, %a0
120 add.l %d0, %a1
121 subq.l #1, %d1
122 bne.s 1b
123 rts
124
125/*****************************************************************************/
126
127 .align 2
128 .global MC_put_o_16
129 .type MC_put_o_16, @function
130
131MC_put_o_16:
132 lea.l (-7*4,%sp), %sp
133 movem.l %d2-%d7/%a2, (%sp) | save some registers
134 movem.l (7*4+4,%sp), %a0-%a2| dest, source, stride
135 move.l (7*4+16,%sp), %d1 | height
136 move.l %a1, %d0
137 and.l #3, %d0
138 sub.l %d0, %a1
139 jmp.l (2, %pc, %d0.l*4)
140 bra.w .po16_0
141 bra.w .po16_1
142 bra.w .po16_2
143 | last table entry coincides with target
144
145.po16_3:
146 moveq.l #24, %d0 | shift amount
1471:
148 movem.l (%a1), %d3-%d7
149 add.l %a2, %a1
150 LEFT24_PW %d3, %d4
151 LEFT24_PW %d4, %d5
152 LEFT24_PW %d5, %d6
153 lsl.l %d0, %d6
154 lsr.l #8, %d7
155 or.l %d7, %d6
156 movem.l %d3-%d6, (%a0)
157 add.l %a2, %a0
158 subq.l #1, %d1
159 bne.s 1b
160 movem.l (%sp), %d2-%d7/%a2
161 lea.l (7*4,%sp), %sp
162 rts
163
164.po16_2:
1651:
166 movem.l (%a1), %d3-%d7
167 add.l %a2, %a1
168 swap %d3
169 swap %d4
170 move.w %d4, %d3
171 swap %d5
172 move.w %d5, %d4
173 swap %d6
174 move.w %d6, %d5
175 swap %d7
176 move.w %d7, %d6
177 movem.l %d3-%d6, (%a0)
178 add.l %a2, %a0
179 subq.l #1, %d1
180 bne.s 1b
181 movem.l (%sp), %d2-%d7/%a2
182 lea.l (7*4,%sp), %sp
183 rts
184
185.po16_1:
186 moveq.l #24, %d0 | shift amount
1871:
188 movem.l (%a1), %d3-%d7
189 add.l %a2, %a1
190 LEFT8_PW %d3, %d4
191 LEFT8_PW %d4, %d5
192 LEFT8_PW %d5, %d6
193 lsl.l #8, %d6
194 lsr.l %d0, %d7
195 or.l %d7, %d6
196 movem.l %d3-%d6, (%a0)
197 add.l %a2, %a0
198 subq.l #1, %d1
199 bne.s 1b
200 movem.l (%sp), %d2-%d7/%a2
201 lea.l (7*4,%sp), %sp
202 rts
203
204.po16_0:
2051:
206 movem.l (%a1), %d3-%d6
207 add.l %a2, %a1
208 movem.l %d3-%d6, (%a0)
209 add.l %a2, %a0
210 subq.l #1, %d1
211 bne.s 1b
212 movem.l (%sp), %d2-%d7/%a2
213 lea.l (7*4,%sp), %sp
214 rts
215
216/*****************************************************************************/
217
218.macro AVG_PW dW1, dW2 | needs %d0 == 24, clobbers %d1, %d2,
219 move.l \dW1, %d1 | changes dW1, keeps dW2
220 lsl.l #8, \dW1
221 move.l \dW2, %d2
222 lsr.l %d0, %d2
223 or.l %d2, \dW1
224 move.l %d1, %d2
225 eor.l \dW1, %d1
226 and.l %d2, \dW1
227 move.l #0xfefefefe, %d2
228 and.l %d1, %d2
229 eor.l %d2, %d1
230 lsr.l #1, %d2
231 add.l %d2, \dW1
232 add.l %d1, \dW1
233.endm
234
235/*****************************************************************************/
236
237 .align 2
238 .global MC_put_x_8
239 .type MC_put_x_8, @function
240
241MC_put_x_8:
242 lea.l (-6*4,%sp), %sp
243 movem.l %d2-%d6/%a2, (%sp) | save some registers
244 movem.l (6*4+4,%sp), %a0-%a2| dest, source, stride
245 move.l (6*4+16,%sp), %d6 | height
246 move.l %a1, %d0
247 and.l #3, %d0
248 sub.l %d0, %a1
249 jmp.l (2, %pc, %d0.l*4)
250 bra.w .px8_0
251 bra.w .px8_1
252 bra.w .px8_2
253 | last table entry coincides with target
254
255.px8_3:
256 moveq.l #24, %d0
2571:
258 movem.l (%a1), %d3-%d5
259 add.l %a2, %a1
260 LEFT24_PW %d3, %d4
261 LEFT24_PW %d4, %d5
262 lsl.l %d0, %d5
263 AVG_PW %d3, %d4
264 AVG_PW %d4, %d5
265 movem.l %d3-%d4, (%a0)
266 add.l %a2, %a0
267 subq.l #1, %d6
268 bne.s 1b
269 movem.l (%sp), %d2-%d6/%a2
270 lea.l (6*4,%sp), %sp
271 rts
272
273.px8_2:
274 moveq.l #24, %d0
2751:
276 movem.l (%a1), %d3-%d5
277 add.l %a2, %a1
278 swap %d3
279 swap %d4
280 move.w %d4, %d3
281 swap %d5
282 move.w %d5, %d4
283 AVG_PW %d3, %d4
284 AVG_PW %d4, %d5
285 movem.l %d3-%d4, (%a0)
286 add.l %a2, %a0
287 subq.l #1, %d6
288 bne.s 1b
289 movem.l (%sp), %d2-%d6/%a2
290 lea.l (6*4,%sp), %sp
291 rts
292
293.px8_1:
294 moveq.l #24, %d0
2951:
296 movem.l (%a1), %d3-%d5
297 add.l %a2, %a1
298 LEFT8_PW %d3, %d4
299 LEFT8_PW %d4, %d5
300 lsl.l #8, %d5
301 AVG_PW %d3, %d4
302 AVG_PW %d4, %d5
303 movem.l %d3-%d4, (%a0)
304 add.l %a2, %a0
305 subq.l #1, %d6
306 bne.s 1b
307 movem.l (%sp), %d2-%d6/%a2
308 lea.l (6*4,%sp), %sp
309 rts
310
311.px8_0:
312 moveq.l #24, %d0
3131:
314 movem.l (%a1), %d3-%d5
315 add.l %a2, %a1
316 AVG_PW %d3, %d4
317 AVG_PW %d4, %d5
318 movem.l %d3-%d4, (%a0)
319 add.l %a2, %a0
320 subq.l #1, %d6
321 bne.s 1b
322 movem.l (%sp), %d2-%d6/%a2
323 lea.l (6*4,%sp), %sp
324 rts
325
326/*****************************************************************************/
327
328 .align 2
329 .global MC_put_x_16
330 .type MC_put_x_16, @function
331
332MC_put_x_16:
333 lea.l (-8*4,%sp), %sp
334 movem.l %d2-%d7/%a2-%a3, (%sp) | save some registers
335 movem.l (8*4+4,%sp), %a0-%a3 | dest, source, stride, height
336 move.l %a1, %d0
337 and.l #3, %d0
338 sub.l %d0, %a1
339 jmp.l (2, %pc, %d0.l*4)
340 bra.w .px16_0
341 bra.w .px16_1
342 bra.w .px16_2
343 | last table entry coincides with target
344
345.px16_3:
346 moveq.l #24, %d0
3471:
348 movem.l (%a1), %d3-%d7
349 add.l %a2, %a1
350 LEFT24_PW %d3, %d4
351 LEFT24_PW %d4, %d5
352 LEFT24_PW %d5, %d6
353 LEFT24_PW %d6, %d7
354 lsl.l %d0, %d7
355 AVG_PW %d3, %d4
356 AVG_PW %d4, %d5
357 AVG_PW %d5, %d6
358 AVG_PW %d6, %d7
359 movem.l %d3-%d6, (%a0)
360 add.l %a2, %a0
361 subq.l #1, %a3
362 tst.l %a3
363 bne.w 1b
364 movem.l (%sp), %d2-%d7/%a2-%a3
365 lea.l (8*4,%sp), %sp
366 rts
367
368.px16_2:
369 moveq.l #24, %d0
3701:
371 movem.l (%a1), %d3-%d7
372 add.l %a2, %a1
373 swap %d3
374 swap %d4
375 move.w %d4, %d3
376 swap %d5
377 move.w %d5, %d4
378 swap %d6
379 move.w %d6, %d5
380 swap %d7
381 move.w %d7, %d6
382 AVG_PW %d3, %d4
383 AVG_PW %d4, %d5
384 AVG_PW %d5, %d6
385 AVG_PW %d6, %d7
386 movem.l %d3-%d6, (%a0)
387 add.l %a2, %a0
388 subq.l #1, %a3
389 tst.l %a3
390 bne.w 1b
391 movem.l (%sp), %d2-%d7/%a2-%a3
392 lea.l (8*4,%sp), %sp
393 rts
394
395.px16_1:
396 moveq.l #24, %d0
3971:
398 movem.l (%a1), %d3-%d7
399 add.l %a2, %a1
400 LEFT8_PW %d3, %d4
401 LEFT8_PW %d4, %d5
402 LEFT8_PW %d5, %d6
403 LEFT8_PW %d6, %d7
404 lsl.l #8, %d7
405 AVG_PW %d3, %d4
406 AVG_PW %d4, %d5
407 AVG_PW %d5, %d6
408 AVG_PW %d6, %d7
409 movem.l %d3-%d6, (%a0)
410 add.l %a2, %a0
411 subq.l #1, %a3
412 tst.l %a3
413 bne.w 1b
414 movem.l (%sp), %d2-%d7/%a2-%a3
415 lea.l (8*4,%sp), %sp
416 rts
417
418.px16_0:
419 moveq.l #24, %d0
4201:
421 movem.l (%a1), %d3-%d7
422 add.l %a2, %a1
423 AVG_PW %d3, %d4
424 AVG_PW %d4, %d5
425 AVG_PW %d5, %d6
426 AVG_PW %d6, %d7
427 movem.l %d3-%d6, (%a0)
428 add.l %a2, %a0
429 subq.l #1, %a3
430 tst.l %a3
431 bne.w 1b
432 movem.l (%sp), %d2-%d7/%a2-%a3
433 lea.l (8*4,%sp), %sp
434 rts