diff options
Diffstat (limited to 'apps/plugins')
-rw-r--r-- | apps/plugins/mpegplayer/SOURCES | 4 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/decode.c | 12 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/idct.c | 17 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/idct_coldfire.S | 574 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/mpeg2_internal.h | 6 |
5 files changed, 611 insertions, 2 deletions
diff --git a/apps/plugins/mpegplayer/SOURCES b/apps/plugins/mpegplayer/SOURCES index 6629cf7a4c..004c6395a2 100644 --- a/apps/plugins/mpegplayer/SOURCES +++ b/apps/plugins/mpegplayer/SOURCES | |||
@@ -13,6 +13,10 @@ idct.c | |||
13 | motion_comp_c.c | 13 | motion_comp_c.c |
14 | #endif /* CPU_* */ | 14 | #endif /* CPU_* */ |
15 | 15 | ||
16 | #ifdef CPU_COLDFIRE | ||
17 | idct_coldfire.S | ||
18 | #endif | ||
19 | |||
16 | slice.c | 20 | slice.c |
17 | video_out_rockbox.c | 21 | video_out_rockbox.c |
18 | mpeg_settings.c | 22 | mpeg_settings.c |
diff --git a/apps/plugins/mpegplayer/decode.c b/apps/plugins/mpegplayer/decode.c index 299abc9663..ca3d29a952 100644 --- a/apps/plugins/mpegplayer/decode.c +++ b/apps/plugins/mpegplayer/decode.c | |||
@@ -401,6 +401,12 @@ void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) | |||
401 | 401 | ||
402 | } | 402 | } |
403 | 403 | ||
404 | #ifdef CPU_COLDFIRE | ||
405 | /* twice as large as on other targets because coldfire uses | ||
406 | * a secondary, transposed buffer for optimisation */ | ||
407 | static int16_t static_dct_block[128] IBSS_ATTR ATTR_ALIGN(16); | ||
408 | #endif | ||
409 | |||
404 | mpeg2dec_t * mpeg2_init (void) | 410 | mpeg2dec_t * mpeg2_init (void) |
405 | { | 411 | { |
406 | mpeg2dec_t * mpeg2dec; | 412 | mpeg2dec_t * mpeg2dec; |
@@ -410,7 +416,11 @@ mpeg2dec_t * mpeg2_init (void) | |||
410 | mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), | 416 | mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), |
411 | MPEG2_ALLOC_MPEG2DEC); | 417 | MPEG2_ALLOC_MPEG2DEC); |
412 | if (mpeg2dec == NULL) | 418 | if (mpeg2dec == NULL) |
413 | return NULL; | 419 | return NULL; |
420 | |||
421 | #ifdef CPU_COLDFIRE | ||
422 | mpeg2dec->decoder.DCTblock = static_dct_block; | ||
423 | #endif | ||
414 | 424 | ||
415 | rb->memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); | 425 | rb->memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); |
416 | rb->memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); | 426 | rb->memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); |
diff --git a/apps/plugins/mpegplayer/idct.c b/apps/plugins/mpegplayer/idct.c index bf705c6a2f..bf7097401e 100644 --- a/apps/plugins/mpegplayer/idct.c +++ b/apps/plugins/mpegplayer/idct.c | |||
@@ -76,6 +76,14 @@ uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR; | |||
76 | #define CLIP(i) ((mpeg2_clip + 3840)[i]) | 76 | #define CLIP(i) ((mpeg2_clip + 3840)[i]) |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | #ifdef CPU_COLDFIRE | ||
80 | /* assembler functions */ | ||
81 | extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest, | ||
82 | const int stride); | ||
83 | extern void mpeg2_idct_add_coldfire(const int last, int16_t * block, | ||
84 | uint8_t * dest, const int stride); | ||
85 | #else /* !CPU_COLDFIE */ | ||
86 | |||
79 | #if 0 | 87 | #if 0 |
80 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | 88 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ |
81 | do { \ | 89 | do { \ |
@@ -258,6 +266,8 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, | |||
258 | } | 266 | } |
259 | } | 267 | } |
260 | 268 | ||
269 | #endif /* !CPU_COLDFIRE */ | ||
270 | |||
261 | void mpeg2_idct_init (void) | 271 | void mpeg2_idct_init (void) |
262 | { | 272 | { |
263 | extern uint8_t default_mpeg2_scan_norm[64]; | 273 | extern uint8_t default_mpeg2_scan_norm[64]; |
@@ -266,8 +276,13 @@ void mpeg2_idct_init (void) | |||
266 | extern uint8_t mpeg2_scan_alt[64]; | 276 | extern uint8_t mpeg2_scan_alt[64]; |
267 | int i, j; | 277 | int i, j; |
268 | 278 | ||
279 | #ifdef CPU_COLDFIRE | ||
280 | mpeg2_idct_copy = mpeg2_idct_copy_coldfire; | ||
281 | mpeg2_idct_add = mpeg2_idct_add_coldfire; | ||
282 | #else | ||
269 | mpeg2_idct_copy = mpeg2_idct_copy_c; | 283 | mpeg2_idct_copy = mpeg2_idct_copy_c; |
270 | mpeg2_idct_add = mpeg2_idct_add_c; | 284 | mpeg2_idct_add = mpeg2_idct_add_c; |
285 | #endif | ||
271 | 286 | ||
272 | #if !defined(CPU_COLDFIRE) && !defined(CPU_ARM) | 287 | #if !defined(CPU_COLDFIRE) && !defined(CPU_ARM) |
273 | for (i = -3840; i < 3840 + 256; i++) | 288 | for (i = -3840; i < 3840 + 256; i++) |
diff --git a/apps/plugins/mpegplayer/idct_coldfire.S b/apps/plugins/mpegplayer/idct_coldfire.S new file mode 100644 index 0000000000..007c1a3e98 --- /dev/null +++ b/apps/plugins/mpegplayer/idct_coldfire.S | |||
@@ -0,0 +1,574 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id $ | ||
9 | * | ||
10 | * Copyright (C) 2007 Jens Arnold | ||
11 | * Based on the work of Karim Boucher and Rani Hod | ||
12 | * | ||
13 | * All files in this archive are subject to the GNU General Public License. | ||
14 | * See the file COPYING in the source tree root for full license agreement. | ||
15 | * | ||
16 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
17 | * KIND, either express or implied. | ||
18 | * | ||
19 | ****************************************************************************/ | ||
20 | |||
21 | .global mpeg2_idct_copy_coldfire | ||
22 | .type mpeg2_idct_copy_coldfire, @function | ||
23 | .global mpeg2_idct_add_coldfire | ||
24 | .type mpeg2_idct_add_coldfire, @function | ||
25 | |||
26 | /* The IDCT itself. | ||
27 | * Input: %a0: block pointer | ||
28 | * All registers are preserved. */ | ||
29 | .align 2 | ||
30 | .idct: | ||
31 | lea.l (-15*4,%sp), %sp | ||
32 | movem.l %d0-%d7/%a0-%a6, (%sp) | save all registers | ||
33 | move.l %a0, %a6 | ||
34 | |||
35 | move.l #0, %macsr | signed integer mode | ||
36 | |||
37 | move.l #((2048<<16)+2841), %a0 | W0, W1 | ||
38 | move.l #((2676<<16)+2408), %a1 | W2, W3 | ||
39 | move.l #((2048<<16)+1609), %a2 | W4, W5 | ||
40 | move.l #((1108<<16)+ 565), %a3 | W6, W7 | ||
41 | |||
42 | lea.l (128,%a6), %a4 | secondary, transposed temp buffer | ||
43 | moveq.l #8, %d3 | loop counter | ||
44 | |||
45 | .row_loop: | ||
46 | movem.l (%a6), %d0-%d2/%a5 | fetch (f0, f2, f4, f6, f1, f3, f5, f7) | ||
47 | |||
48 | mac.w %a0l, %d2u, %acc0 | %acc0 = W1 * f1 | ||
49 | mac.w %a1l, %d2l, %acc0 | + W3 * f3 | ||
50 | mac.w %a2l, %a5u, %acc0 | + W5 * f5 | ||
51 | mac.w %a3l, %a5l, %acc0 | + W7 * f7 | ||
52 | |||
53 | mac.w %a1l, %d2u, %acc1 | %acc1 = W3 * f1 | ||
54 | msac.w %a3l, %d2l, %acc1 | - W7 * f3 | ||
55 | msac.w %a0l, %a5u, %acc1 | - W1 * f5 | ||
56 | msac.w %a2l, %a5l, %acc1 | - W5 * f7 | ||
57 | |||
58 | mac.w %a2l, %d2u, %acc2 | %acc2 = W5 * f1 | ||
59 | msac.w %a0l, %d2l, %acc2 | - W1 * f3 | ||
60 | mac.w %a3l, %a5u, %acc2 | + W7 * f5 | ||
61 | mac.w %a1l, %a5l, %acc2 | + W3 * f7 | ||
62 | |||
63 | mac.w %a3l, %d2u, %acc3 | %acc3 = W7 * f1 | ||
64 | msac.w %a2l, %d2l, %acc3 | - W5 * f3 | ||
65 | mac.w %a1l, %a5u, %acc3 | + W3 * f5 | ||
66 | msac.w %a0l, %a5l, %acc3 | - W1 * f7 | ||
67 | |||
68 | lea.l (16,%a6), %a6 | Advance to next row; put here to fill EMAC latency | ||
69 | add.l #(1<<16), %d0 | f0 += 1; | ||
70 | |||
71 | movclr.l %acc0, %d4 | b0 | ||
72 | movclr.l %acc1, %d5 | b1 | ||
73 | movclr.l %acc2, %d6 | b2 | ||
74 | movclr.l %acc3, %d7 | b3 | ||
75 | |||
76 | mac.w %a0u, %d0u, %acc0 | %acc0 = W0 * f0 | ||
77 | mac.w %a2u, %d1u, %acc0 | + W4 * f4 | ||
78 | move.l %acc0, %acc3 | ||
79 | mac.w %a1u, %d0l, %acc0 | + W2 * f2 | ||
80 | mac.w %a3u, %d1l, %acc0 | + W6 * f6 | ||
81 | |||
82 | mac.w %a0u, %d0u, %acc1 | %acc1 = W0 * f0 | ||
83 | msac.w %a2u, %d1u, %acc1 | - W4 * f4 | ||
84 | move.l %acc1, %acc2 | ||
85 | mac.w %a3u, %d0l, %acc1 | + W6 * f2 | ||
86 | msac.w %a1u, %d1l, %acc1 | - W2 * f6 | ||
87 | |||
88 | | ^ move.l %acc0, %acc3 %acc2 = W0 * f0 - W4 * f4 | ||
89 | msac.w %a3u, %d0l, %acc2 | - W6 * f2 | ||
90 | mac.w %a1u, %d1l, %acc2 | + W2 * f6 | ||
91 | |||
92 | | ^ move.l %acc1, %acc2 %acc3 = W0 * f0 + W4 * f4 | ||
93 | msac.w %a1u, %d0l, %acc3 | - W2 * f2 | ||
94 | msac.w %a3u, %d1l, %acc3 | - W6 * f6 | ||
95 | |||
96 | moveq.l #12, %d1 | shift amount | ||
97 | |||
98 | move.l %acc0, %d0 | block[7] = (a0 | ||
99 | sub.l %d4,%d0 | - b0) | ||
100 | asr.l %d1, %d0 | >> 12 | ||
101 | move.w %d0, (7*16,%a4) | ||
102 | |||
103 | move.l %acc1, %d0 | block[6] = (a1 | ||
104 | sub.l %d5,%d0 | - b1) | ||
105 | asr.l %d1, %d0 | >> 12 | ||
106 | move.w %d0, (6*16,%a4) | ||
107 | |||
108 | move.l %acc2, %d0 | block[5] = (a2 | ||
109 | sub.l %d6,%d0 | - b2) | ||
110 | asr.l %d1, %d0 | >> 12 | ||
111 | move.w %d0, (5*16,%a4) | ||
112 | |||
113 | move.l %acc3, %d0 | block[4] = (a3 | ||
114 | sub.l %d7,%d0 | - b3) | ||
115 | asr.l %d1, %d0 | >> 12 | ||
116 | move.w %d0, (4*16,%a4) | ||
117 | |||
118 | movclr.l %acc3, %d0 | block[3] = (a3 | ||
119 | add.l %d7, %d0 | + b3) | ||
120 | asr.l %d1, %d0 | >> 12 | ||
121 | move.w %d0, (3*16,%a4) | ||
122 | |||
123 | movclr.l %acc2, %d0 | block[2] = (a2 | ||
124 | add.l %d6, %d0 | + b2) | ||
125 | asr.l %d1, %d0 | >> 12 | ||
126 | move.w %d0, (2*16,%a4) | ||
127 | |||
128 | movclr.l %acc1, %d0 | block[1] = (a1 | ||
129 | add.l %d5, %d0 | + b1) | ||
130 | asr.l %d1, %d0 | >> 12 | ||
131 | move.w %d0, (1*16,%a4) | ||
132 | |||
133 | movclr.l %acc0, %d0 | block[0] = (a0 | ||
134 | add.l %d4, %d0 | + b0) | ||
135 | asr.l %d1, %d0 | >> 12 | ||
136 | move.w %d0, (%a4)+ | advance to next temp column | ||
137 | |||
138 | subq.l #1, %d3 | loop 8 times | ||
139 | bne.w .row_loop | ||
140 | |||
141 | | %a6 now points to the temp buffer, where we need it. | ||
142 | lea.l (-16-128,%a4), %a4 | point %a4 back to the input block | ||
143 | moveq.l #8, %d3 | loop counter | ||
144 | |||
145 | .col_loop: | ||
146 | movem.l (%a6), %d0-%d2/%a5 | fetch (f0, f2, f4, f6, f1, f3, f5, f7) | ||
147 | |||
148 | mac.w %a0l, %d2u, %acc0 | %acc0 = W1 * f1 | ||
149 | mac.w %a1l, %d2l, %acc0 | + W3 * f3 | ||
150 | mac.w %a2l, %a5u, %acc0 | + W5 * f5 | ||
151 | mac.w %a3l, %a5l, %acc0 | + W7 * f7 | ||
152 | |||
153 | mac.w %a1l, %d2u, %acc1 | %acc1 = W3 * f1 | ||
154 | msac.w %a3l, %d2l, %acc1 | - W7 * f3 | ||
155 | msac.w %a0l, %a5u, %acc1 | - W1 * f5 | ||
156 | msac.w %a2l, %a5l, %acc1 | - W5 * f7 | ||
157 | |||
158 | mac.w %a2l, %d2u, %acc2 | %acc2 = W5 * f1 | ||
159 | msac.w %a0l, %d2l, %acc2 | - W1 * f3 | ||
160 | mac.w %a3l, %a5u, %acc2 | + W7 * f5 | ||
161 | mac.w %a1l, %a5l, %acc2 | + W3 * f7 | ||
162 | |||
163 | mac.w %a3l, %d2u, %acc3 | %acc3 = W7 * f1 | ||
164 | msac.w %a2l, %d2l, %acc3 | - W5 * f3 | ||
165 | mac.w %a1l, %a5u, %acc3 | + W3 * f5 | ||
166 | msac.w %a0l, %a5l, %acc3 | - W1 * f7 | ||
167 | |||
168 | lea.l (16,%a6), %a6 | Advance to next row; put here to fill EMAC latency | ||
169 | add.l #(32<<16), %d0 | DC offset: 0.5 | ||
170 | |||
171 | movclr.l %acc0, %d4 | b0 | ||
172 | movclr.l %acc1, %d5 | b1 | ||
173 | movclr.l %acc2, %d6 | b2 | ||
174 | movclr.l %acc3, %d7 | b3 | ||
175 | |||
176 | mac.w %a0u, %d0u, %acc0 | %acc0 = W0 * f0 | ||
177 | mac.w %a2u, %d1u, %acc0 | + W4 * f4 | ||
178 | move.l %acc0, %acc3 | ||
179 | mac.w %a1u, %d0l, %acc0 | + W2 * f2 | ||
180 | mac.w %a3u, %d1l, %acc0 | + W6 * f6 | ||
181 | |||
182 | mac.w %a0u, %d0u, %acc1 | %acc1 = W0 * f0 | ||
183 | msac.w %a2u, %d1u, %acc1 | - W4 * f4 | ||
184 | move.l %acc1, %acc2 | ||
185 | mac.w %a3u, %d0l, %acc1 | + W6 * f2 | ||
186 | msac.w %a1u, %d1l, %acc1 | - W2 * f6 | ||
187 | |||
188 | | ^ move.l %acc1, %acc2 %acc2 = W0 * f0 - W4 * f4 | ||
189 | msac.w %a3u, %d0l, %acc2 | - W6 * f2 | ||
190 | mac.w %a1u, %d1l, %acc2 | + W2 * f6 | ||
191 | |||
192 | | ^ move.l %acc0, %acc3 %acc3 = W0 * f0 + W4 * f4 | ||
193 | msac.w %a1u, %d0l, %acc3 | - W2 * f2 | ||
194 | msac.w %a3u, %d1l, %acc3 | - W6 * f6 | ||
195 | |||
196 | moveq.l #17, %d1 | shift amount | ||
197 | |||
198 | move.l %acc0, %d0 | block[7] = (a0 | ||
199 | sub.l %d4,%d0 | - b0) | ||
200 | asr.l %d1, %d0 | >> 17 | ||
201 | move.w %d0, (7*16,%a4) | ||
202 | |||
203 | move.l %acc1, %d0 | block[6] = (a1 | ||
204 | sub.l %d5,%d0 | - b1) | ||
205 | asr.l %d1, %d0 | >> 17 | ||
206 | move.w %d0, (6*16,%a4) | ||
207 | |||
208 | move.l %acc2, %d0 | block[5] = (a2 | ||
209 | sub.l %d6,%d0 | - b2) | ||
210 | asr.l %d1, %d0 | >> 17 | ||
211 | move.w %d0, (5*16,%a4) | ||
212 | |||
213 | move.l %acc3, %d0 | block[4] = (a3 | ||
214 | sub.l %d7,%d0 | - b3) | ||
215 | asr.l %d1, %d0 | >> 17 | ||
216 | move.w %d0, (4*16,%a4) | ||
217 | |||
218 | movclr.l %acc3, %d0 | block[3] = (a3 | ||
219 | add.l %d7, %d0 | + b3) | ||
220 | asr.l %d1, %d0 | >> 17 | ||
221 | move.w %d0, (3*16,%a4) | ||
222 | |||
223 | movclr.l %acc2, %d0 | block[2] = (a2 | ||
224 | add.l %d6, %d0 | + b2) | ||
225 | asr.l %d1, %d0 | >> 17 | ||
226 | move.w %d0, (2*16,%a4) | ||
227 | |||
228 | movclr.l %acc1, %d0 | block[1] = (a1 | ||
229 | add.l %d5, %d0 | + b1) | ||
230 | asr.l %d1, %d0 | >> 17 | ||
231 | move.w %d0, (1*16,%a4) | ||
232 | |||
233 | movclr.l %acc0, %d0 | block[0] = (a0 | ||
234 | add.l %d4, %d0 | + b0) | ||
235 | asr.l %d1, %d0 | >> 17 | ||
236 | move.w %d0, (%a4)+ | advance to next column | ||
237 | |||
238 | subq.l #1, %d3 | loop 8 times | ||
239 | bne.w .col_loop | ||
240 | |||
241 | movem.l (%sp), %d0-%d7/%a0-%a6 | restore all registers | ||
242 | lea.l (15*4,%sp), %sp | ||
243 | rts | ||
244 | |||
245 | .align 2 | ||
246 | |||
247 | mpeg2_idct_copy_coldfire: | ||
248 | lea.l (-4*4,%sp), %sp | ||
249 | movem.l %d2-%d4/%a2, (%sp) | save some registers | ||
250 | movem.l (4*4+4,%sp), %a0-%a2| %a0 - block pointer | ||
251 | | %a1 - destination pointer | ||
252 | | %a2 - stride | ||
253 | |||
254 | bsr.w .idct | apply idct to block | ||
255 | |||
256 | move.l #255, %d1 | preload constant for clipping | ||
257 | moveq.l #8, %d4 | loop counter | ||
258 | |||
259 | .copy_clip_loop: | ||
260 | move.w (%a0), %d0 | load block[0] | ||
261 | ext.l %d0 | sign extend | ||
262 | cmp.l %d1, %d0 | overflow? | ||
263 | bls.b 1f | ||
264 | spl.b %d0 | yes: set appropriate limit value in low byte | ||
265 | 1: | ||
266 | move.b %d0, %d2 | collect output bytes 0..3 in %d2 | ||
267 | lsl.l #8, %d2 | ||
268 | |||
269 | move.w (2,%a0), %d0 | load block[1] | ||
270 | ext.l %d0 | sign extend | ||
271 | cmp.l %d1, %d0 | overflow? | ||
272 | bls.b 1f | ||
273 | spl.b %d0 | yes: set appropriate limit value in low byte | ||
274 | 1: | ||
275 | move.b %d0, %d2 | collect output bytes 0..3 in %d2 | ||
276 | lsl.l #8, %d2 | ||
277 | clr.l (%a0)+ | clear block[0] and block[1], | ||
278 | | %a0 now pointing to block[2] | ||
279 | move.w (%a0), %d0 | do b2 and b3 | ||
280 | ext.l %d0 | ||
281 | cmp.l %d1, %d0 | ||
282 | bls.b 1f | ||
283 | spl.b %d0 | ||
284 | 1: | ||
285 | move.b %d0, %d2 | ||
286 | lsl.l #8, %d2 | ||
287 | |||
288 | move.w (2,%a0), %d0 | ||
289 | ext.l %d0 | ||
290 | cmp.l %d1, %d0 | ||
291 | bls.b 1f | ||
292 | spl.b %d0 | ||
293 | 1: | ||
294 | move.b %d0, %d2 | ||
295 | clr.l (%a0)+ | ||
296 | |||
297 | move.w (%a0), %d0 | do b4 and b5 | ||
298 | ext.l %d0 | ||
299 | cmp.l %d1, %d0 | ||
300 | bls.b 1f | ||
301 | spl.b %d0 | ||
302 | 1: | ||
303 | move.b %d0, %d3 | ||
304 | lsl.l #8, %d3 | ||
305 | |||
306 | move.w (2,%a0), %d0 | ||
307 | ext.l %d0 | ||
308 | cmp.l %d1, %d0 | ||
309 | bls.b 1f | ||
310 | spl.b %d0 | ||
311 | 1: | ||
312 | move.b %d0, %d3 | ||
313 | lsl.l #8, %d3 | ||
314 | clr.l (%a0)+ | ||
315 | |||
316 | move.w (%a0), %d0 | do b6 and b7 | ||
317 | ext.l %d0 | ||
318 | cmp.l %d1, %d0 | ||
319 | bls.b 1f | ||
320 | spl.b %d0 | ||
321 | 1: | ||
322 | move.b %d0, %d3 | ||
323 | lsl.l #8, %d3 | ||
324 | |||
325 | move.w (2,%a0), %d0 | ||
326 | ext.l %d0 | ||
327 | cmp.l %d1, %d0 | ||
328 | bls.b 1f | ||
329 | spl.b %d0 | ||
330 | 1: | ||
331 | move.b %d0, %d3 | ||
332 | clr.l (%a0)+ | ||
333 | |||
334 | movem.l %d2-%d3, (%a1) | write all 8 output bytes at once | ||
335 | lea.l (%a2,%a1), %a1 | advance output pointer | ||
336 | subq.l #1, %d4 | loop 8 times | ||
337 | bne.w .copy_clip_loop | ||
338 | |||
339 | movem.l (%sp), %d2-%d4/%a2 | restore registers | ||
340 | lea.l (4*4,%sp), %sp | ||
341 | rts | ||
342 | |||
343 | .align 2 | ||
344 | |||
345 | mpeg2_idct_add_coldfire: | ||
346 | lea.l (-7*4,%sp), %sp | ||
347 | movem.l %d2-%d7/%a2, (%sp) | save some registers | ||
348 | movem.l (7*4+4,%sp), %d0/%a0-%a2| %d0 - last value | ||
349 | | %a0 - block pointer | ||
350 | | %a1 - destination pointer | ||
351 | | %a2 - stride | ||
352 | cmp.l #129, %d0 | last == 129 ? | ||
353 | bne.b .idct_add | no: perform idct + addition | ||
354 | move.w (%a0), %d0 | ||
355 | ext.l %d0 | ((block[0] | ||
356 | asr.l #4, %d0 | >> 4) | ||
357 | and.l #7, %d0 | & 7) | ||
358 | subq.l #4, %d0 | - 4 == 0 ? | ||
359 | bne.w .dc_add | no: just perform addition | ||
360 | |||
361 | .idct_add: | ||
362 | bsr.w .idct | apply idct | ||
363 | |||
364 | move.l #255, %d2 | preload constant for clipping | ||
365 | clr.l %d3 | used for splitting input words into bytes | ||
366 | moveq.l #8, %d4 | loop counter | ||
367 | |||
368 | .add_clip_loop: | ||
369 | movem.l (%a1), %d6-%d7 | fetch (b0 b1 b2 b3) (b4 b5 b6 b7) | ||
370 | swap %d6 | (b2 b3 b0 b1) | ||
371 | swap %d7 | (b6 b7 b4 b5) | ||
372 | |||
373 | move.w (2,%a0), %d0 | load block[1] | ||
374 | ext.l %d0 | sign extend | ||
375 | move.b %d6, %d3 | copy b1 | ||
376 | lsr.l #8, %d6 | prepare 1st buffer for next byte | ||
377 | add.l %d3, %d0 | add b1 | ||
378 | cmp.l %d2, %d0 | overflow ? | ||
379 | bls.b 1f | ||
380 | spl.b %d0 | yes: set appropriate limit value in low byte | ||
381 | 1: | ||
382 | move.w (%a0), %d1 | load block[0] | ||
383 | ext.l %d1 | sign extend | ||
384 | move.b %d6, %d3 | copy b0 | ||
385 | lsr.l #8, %d6 | prepare 1st buffer for next byte | ||
386 | add.l %d3, %d1 | add b0 | ||
387 | cmp.l %d2, %d1 | overflow ? | ||
388 | bls.b 1f | ||
389 | spl.b %d1 | yes: set appropriate limit value in low byte | ||
390 | 1: | ||
391 | move.b %d1, %d5 | collect output bytes 0..3 in %d5 | ||
392 | lsl.l #8, %d5 | ||
393 | move.b %d0, %d5 | ||
394 | lsl.l #8, %d5 | ||
395 | clr.l (%a0)+ | clear block[0] and block[1] | ||
396 | | %a0 now pointing to block[2] | ||
397 | move.w (2,%a0), %d0 | do b3 and b2 | ||
398 | ext.l %d0 | ||
399 | move.b %d6, %d3 | ||
400 | lsr.l #8, %d6 | ||
401 | add.l %d3, %d0 | ||
402 | cmp.l %d2, %d0 | ||
403 | bls.b 1f | ||
404 | spl.b %d0 | ||
405 | 1: | ||
406 | move.w (%a0), %d1 | ||
407 | ext.l %d1 | ||
408 | add.l %d6, %d1 | ||
409 | cmp.l %d2, %d1 | ||
410 | bls.b 1f | ||
411 | spl.b %d1 | ||
412 | 1: | ||
413 | move.b %d1, %d5 | ||
414 | lsl.l #8, %d5 | ||
415 | move.b %d0, %d5 | ||
416 | clr.l (%a0)+ | ||
417 | |||
418 | move.w (2,%a0), %d0 | do b5 and b4 | ||
419 | ext.l %d0 | ||
420 | move.b %d7, %d3 | ||
421 | lsr.l #8, %d7 | ||
422 | add.l %d3, %d0 | ||
423 | cmp.l %d2, %d0 | ||
424 | bls.b 1f | ||
425 | spl.b %d0 | ||
426 | 1: | ||
427 | move.w (%a0), %d1 | ||
428 | ext.l %d1 | ||
429 | move.b %d7, %d3 | ||
430 | lsr.l #8, %d7 | ||
431 | add.l %d3, %d1 | ||
432 | cmp.l %d2, %d1 | ||
433 | bls.b 1f | ||
434 | spl.b %d1 | ||
435 | 1: | ||
436 | move.b %d1, %d6 | ||
437 | lsl.l #8, %d6 | ||
438 | move.b %d0, %d6 | ||
439 | lsl.l #8, %d6 | ||
440 | clr.l (%a0)+ | ||
441 | |||
442 | move.w (2,%a0), %d0 | do b7 and b6 | ||
443 | ext.l %d0 | ||
444 | move.b %d7, %d3 | ||
445 | lsr.l #8, %d7 | ||
446 | add.l %d3, %d0 | ||
447 | cmp.l %d2, %d0 | ||
448 | bls.b 1f | ||
449 | spl.b %d0 | ||
450 | 1: | ||
451 | move.w (%a0), %d1 | ||
452 | ext.l %d1 | ||
453 | add.l %d7, %d1 | ||
454 | cmp.l %d2, %d1 | ||
455 | bls.b 1f | ||
456 | spl.b %d1 | ||
457 | 1: | ||
458 | move.b %d1, %d6 | ||
459 | lsl.l #8, %d6 | ||
460 | move.b %d0, %d6 | ||
461 | clr.l (%a0)+ | ||
462 | |||
463 | movem.l %d5-%d6, (%a1) | write all 8 output bytes at once | ||
464 | lea.l (%a2,%a1), %a1 | advance output pointer | ||
465 | subq.l #1, %d4 | loop 8 times | ||
466 | bne.w .add_clip_loop | ||
467 | |||
468 | bra.w .idct_add_end | ||
469 | |||
470 | .dc_add: | ||
471 | move.w (%a0), %d0 | ||
472 | ext.l %d0 | %d0 = (block[0] | ||
473 | add.l #64, %d0 | + 64) | ||
474 | asr.l #7, %d0 | >> 7 | ||
475 | clr.w (%a0) | clear block[0] | ||
476 | clr.w (63*2,%a0) | and block[63] | ||
477 | move.l %d0, %a0 | DC value in %a0 | ||
478 | |||
479 | move.l #255, %d2 | preload constant for clipping | ||
480 | clr.l %d3 | for splitting input words into bytes | ||
481 | moveq.l #8, %d4 | loop counter | ||
482 | |||
483 | .dc_clip_loop: | ||
484 | movem.l (%a1), %d6-%d7 | (b0 b1 b2 b3) (b4 b5 b6 b7) | ||
485 | swap %d6 | (b2 b3 b0 b1) | ||
486 | swap %d7 | (b6 b7 b4 b5) | ||
487 | |||
488 | move.l %a0, %d0 | copy DC | ||
489 | move.b %d6, %d3 | copy b1 | ||
490 | lsr.l #8, %d6 | prepare 1st buffer for next byte | ||
491 | add.l %d3, %d0 | add b1 | ||
492 | cmp.l %d2, %d0 | overflow ? | ||
493 | bls.b 1f | ||
494 | spl.b %d0 | yes: set appropriate limit value in low byte | ||
495 | 1: | ||
496 | move.l %a0, %d1 | copy DC | ||
497 | move.b %d6, %d3 | copy b0 | ||
498 | lsr.l #8, %d6 | prepare 1st buffer for next byte | ||
499 | add.l %d3, %d1 | add b0 | ||
500 | cmp.l %d2, %d1 | overflow ? | ||
501 | bls.b 1f | ||
502 | spl.b %d1 | yes: set appropriate limit value in low byte | ||
503 | 1: | ||
504 | move.b %d1, %d5 | collect output bytes 0..3 in %d5 | ||
505 | lsl.l #8, %d5 | ||
506 | move.b %d0, %d5 | ||
507 | lsl.l #8, %d5 | ||
508 | |||
509 | move.l %a0, %d0 | do b3 and b2 | ||
510 | move.b %d6, %d3 | ||
511 | lsr.l #8, %d6 | ||
512 | add.l %d3, %d0 | ||
513 | cmp.l %d2, %d0 | ||
514 | bls.b 1f | ||
515 | spl.b %d0 | ||
516 | 1: | ||
517 | move.l %a0, %d1 | ||
518 | add.l %d6, %d1 | ||
519 | cmp.l %d2, %d1 | ||
520 | bls.b 1f | ||
521 | spl.b %d1 | ||
522 | 1: | ||
523 | move.b %d1, %d5 | ||
524 | lsl.l #8, %d5 | ||
525 | move.b %d0, %d5 | ||
526 | |||
527 | move.l %a0, %d0 | do b5 and b4 | ||
528 | move.b %d7, %d3 | ||
529 | lsr.l #8, %d7 | ||
530 | add.l %d3, %d0 | ||
531 | cmp.l %d2, %d0 | ||
532 | bls.b 1f | ||
533 | spl.b %d0 | ||
534 | 1: | ||
535 | move.l %a0, %d1 | ||
536 | move.b %d7, %d3 | ||
537 | lsr.l #8, %d7 | ||
538 | add.l %d3, %d1 | ||
539 | cmp.l %d2, %d1 | ||
540 | bls.b 1f | ||
541 | spl.b %d1 | ||
542 | 1: | ||
543 | move.b %d1, %d6 | do b7 and b6 | ||
544 | lsl.l #8, %d6 | ||
545 | move.b %d0, %d6 | ||
546 | lsl.l #8, %d6 | ||
547 | |||
548 | move.l %a0, %d0 | ||
549 | move.b %d7, %d3 | ||
550 | lsr.l #8, %d7 | ||
551 | add.l %d3, %d0 | ||
552 | cmp.l %d2, %d0 | ||
553 | bls.b 1f | ||
554 | spl.b %d0 | ||
555 | 1: | ||
556 | move.l %a0, %d1 | ||
557 | add.l %d7, %d1 | ||
558 | cmp.l %d2, %d1 | ||
559 | bls.b 1f | ||
560 | spl.b %d1 | ||
561 | 1: | ||
562 | move.b %d1, %d6 | ||
563 | lsl.l #8, %d6 | ||
564 | move.b %d0, %d6 | ||
565 | |||
566 | movem.l %d5-%d6, (%a1) | write all 8 output bytes at once | ||
567 | lea.l (%a2,%a1), %a1 | advance output pointer | ||
568 | subq.l #1, %d4 | loop 8 times | ||
569 | bne.w .dc_clip_loop | ||
570 | |||
571 | .idct_add_end: | ||
572 | movem.l (%sp), %d2-%d7/%a2 | restore registers | ||
573 | lea.l (7*4,%sp), %sp | ||
574 | rts | ||
diff --git a/apps/plugins/mpegplayer/mpeg2_internal.h b/apps/plugins/mpegplayer/mpeg2_internal.h index 0c552b766f..1ec85c60f1 100644 --- a/apps/plugins/mpegplayer/mpeg2_internal.h +++ b/apps/plugins/mpegplayer/mpeg2_internal.h | |||
@@ -20,6 +20,8 @@ | |||
20 | * along with this program; if not, write to the Free Software | 20 | * along with this program; if not, write to the Free Software |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 | */ | 22 | */ |
23 | |||
24 | #include "config.h" /* for Rockbox CPU_ #defines */ | ||
23 | 25 | ||
24 | /* macroblock modes */ | 26 | /* macroblock modes */ |
25 | #define MACROBLOCK_INTRA 1 | 27 | #define MACROBLOCK_INTRA 1 |
@@ -92,7 +94,11 @@ struct mpeg2_decoder_s { | |||
92 | int16_t dc_dct_pred[3]; | 94 | int16_t dc_dct_pred[3]; |
93 | 95 | ||
94 | /* DCT coefficients */ | 96 | /* DCT coefficients */ |
97 | #ifdef CPU_COLDFIRE | ||
98 | int16_t *DCTblock; /* put buffer separately to have it in IRAM */ | ||
99 | #else | ||
95 | int16_t DCTblock[64] ATTR_ALIGN(64); | 100 | int16_t DCTblock[64] ATTR_ALIGN(64); |
101 | #endif | ||
96 | 102 | ||
97 | uint8_t * picture_dest[3]; | 103 | uint8_t * picture_dest[3]; |
98 | void (* convert) (void * convert_id, uint8_t * const * src, | 104 | void (* convert) (void * convert_id, uint8_t * const * src, |