summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-09-25 03:14:06 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-09-25 03:14:06 +0000
commit7e891aa4d398b85c05c43eaa7792935f6df56a94 (patch)
treee7fb06781fd750c73fde02f11bc22da7819230df
parent287d6223d3e4f90043e5fd36cf49d7267b53023d (diff)
downloadrockbox-7e891aa4d398b85c05c43eaa7792935f6df56a94.tar.gz
rockbox-7e891aa4d398b85c05c43eaa7792935f6df56a94.zip
Oops. Actually add the IDCT file for ARM. :p
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14852 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/plugins/mpegplayer/idct_arm_c.c527
1 files changed, 527 insertions, 0 deletions
diff --git a/apps/plugins/mpegplayer/idct_arm_c.c b/apps/plugins/mpegplayer/idct_arm_c.c
new file mode 100644
index 0000000000..be9971f5c3
--- /dev/null
+++ b/apps/plugins/mpegplayer/idct_arm_c.c
@@ -0,0 +1,527 @@
1/*
2 * idct.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 *
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
8 *
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include "plugin.h"
25
26#include "mpeg2dec_config.h"
27
28#include "mpeg2.h"
29#include "attributes.h"
30#include "mpeg2_internal.h"
31
32/* 101100011001 */
33#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
34/* 101001110100 */
35#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
36/* 100101101000 */
37#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
38/* 011001001001 */
39#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
40/* 010001010100 */
41#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
42/* 001000110101 */
43#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
44
45/* idct main entry point */
46void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
47void (* mpeg2_idct_add) (int last, int16_t * block,
48 uint8_t * dest, int stride);
49
50/*
51 * In legal streams, the IDCT output should be between -384 and +384.
52 * In corrupted streams, it is possible to force the IDCT output to go
53 * to +-3826 - this is the worst case for a column IDCT where the
54 * column inputs are 16-bit values.
55 */
56#if 0
57#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
58 do { \
59 t0 = W0 * d0 + W1 * d1; \
60 t1 = W0 * d1 - W1 * d0; \
61 } while (0)
62#else
63#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
64 do { \
65 int tmp = W0 * (d0 + d1); \
66 t0 = tmp + (W1 - W0) * d1; \
67 t1 = tmp - (W1 + W0) * d0; \
68 } while (0)
69#endif
70
71/* Custom calling convention:
72 * r0 contains block pointer and is non-volatile
73 * all non-volatile c context saved and restored on its behalf
74 */
75static void idct (int16_t * block) __attribute__((naked,used));
76static void idct (int16_t * block)
77{
78 asm volatile (
79 "add r12, r0, #128 \n"
80 "1: \n"
81 "ldrsh r1, [r0, #0] \n" /* d0 */
82 "ldrsh r2, [r0, #2] \n" /* d1 */
83 "ldrsh r3, [r0, #4] \n" /* d2 */
84 "ldrsh r4, [r0, #6] \n" /* d3 */
85 "ldrsh r5, [r0, #8] \n" /* d0 */
86 "ldrsh r6, [r0, #10] \n" /* d1 */
87 "ldrsh r7, [r0, #12] \n" /* d2 */
88 "ldrsh r8, [r0, #14] \n" /* d3 */
89 "orrs r9, r2, r3 \n"
90 "orreqs r9, r4, r5 \n"
91 "orreqs r9, r6, r7 \n"
92 "cmpeq r8, #0 \n"
93 "bne 2f \n"
94 "mov r1, r1, asl #15 \n"
95 "bic r1, r1, #0x8000 \n"
96 "orr r1, r1, r1, lsr #16 \n"
97 "str r1, [r0], #4 \n"
98 "str r1, [r0], #4 \n"
99 "str r1, [r0], #4 \n"
100 "str r1, [r0], #4 \n"
101 "cmp r0, r12 \n"
102 "blo 1b \n"
103 "b 3f \n"
104 "2: \n"
105 "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */
106 "add r1, r1, #2048 \n"
107 "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + (block[2] << 11) */
108 "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - (block[2] << 11) */
109
110 "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */
111 "add r10, r9, r9, asl #2 \n"
112 "add r10, r10, r9, asl #4 \n"
113 "add r9, r10, r9, asl #8 \n"
114
115 "add r10, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */
116 "add r2, r10, r2, asl #5 \n"
117 "add r2, r9, r2, asl #3 \n"
118
119 "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */
120 "rsb r10, r10, r4, asl #6 \n"
121 "add r4, r4, r10, asl #3 \n"
122 "sub r4, r9, r4, asl #1 \n"
123 /* t2 & t3 are 1/4 final value here */
124 "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */
125 "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */
126 "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */
127 "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */
128
129 "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */
130 "add r10, r9, r9, asl #4 \n"
131 "add r10, r10, r10, asl #5 \n"
132 "add r9, r10, r9, asl #2 \n"
133
134 "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
135 "add r10, r10, r10, asl #5 \n"
136 "add r5, r10, r5, asl #3 \n"
137 "add r5, r9, r5, asl #2 \n"
138
139 "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
140 "add r10, r10, r10, asl #4 \n"
141 "add r10, r10, r8, asl #7 \n"
142 "rsb r8, r8, r10, asl #3 \n"
143 "sub r8, r9, r8, asl #1 \n"
144
145 "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */
146 "add r10, r9, r9, asl #3 \n"
147 "add r10, r10, r10, asl #5 \n"
148 "add r9, r10, r9, asl #2 \n"
149
150 "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */
151 "add r10, r10, r7, asl #4 \n"
152 "rsb r7, r7, r10, asl #5 \n"
153 "rsb r7, r7, r9, asl #3 \n"
154
155 "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */
156 "sub r10, r10, r6, asl #6 \n"
157 "add r10, r10, r6, asl #12 \n"
158 "add r6, r10, r6 \n"
159 "rsb r6, r6, r9, asl #3 \n"
160 /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/
161 "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */
162 "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */
163 "sub r5, r5, r7 \n" /* t0 -= t2 */
164 "sub r8, r8, r6 \n" /* t1 -= t3 */
165 "add r6, r5, r8 \n" /* r6 = t0 + t1 */
166 "sub r7, r5, r8 \n" /* r7 = t0 - t1 */
167
168 "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r6*(181/128) */
169 "add r11, r11, r11, asr #5 \n"
170 "add r6, r11, r6, asr #3 \n"
171 "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r7*(181/128) */
172 "add r11, r11, r11, asr #5 \n"
173 "add r7, r11, r7, asr #3 \n"
174 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
175 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
176 "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 12 */
177 "mov r5, r5, asr #12 \n"
178 "strh r5, [r0], #2 \n"
179 "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 12 */
180 "mov r8, r8, asr #12 \n"
181 "strh r8, [r0], #2 \n"
182 "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 12 */
183 "mov r5, r5, asr #12 \n"
184 "strh r5, [r0], #2 \n"
185 "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 12 */
186 "mov r8, r8, asr #12 \n"
187 "strh r8, [r0], #2 \n"
188 "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 12 */
189 "mov r5, r5, asr #12 \n"
190 "strh r5, [r0], #2 \n"
191 "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 12 */
192 "mov r8, r8, asr #12 \n"
193 "strh r8, [r0], #2 \n"
194 "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 12 */
195 "mov r5, r5, asr #12 \n"
196 "strh r5, [r0], #2 \n"
197 "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 12 */
198 "mov r8, r8, asr #12 \n"
199 "strh r8, [r0], #2 \n"
200 "cmp r0, r12 \n"
201 "blo 1b \n"
202 "3: \n"
203 "sub r0, r0, #128 \n"
204 "add r12, r0, #16 \n"
205 "4: \n"
206 "ldrsh r1, [r0, #0*8] \n" /* d0 */
207 "ldrsh r2, [r0, #2*8] \n" /* d1 */
208 "ldrsh r3, [r0, #4*8] \n" /* d2 */
209 "ldrsh r4, [r0, #6*8] \n" /* d3 */
210 "ldrsh r5, [r0, #8*8] \n" /* d0 */
211 "ldrsh r6, [r0, #10*8] \n" /* d1 */
212 "ldrsh r7, [r0, #12*8] \n" /* d2 */
213 "ldrsh r8, [r0, #14*8] \n" /* d3 */
214
215 "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */
216 "add r1, r1, #65536 \n"
217 "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + d2:(block[2] << 11) */
218 "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - d2:(block[2] << 11) */
219
220 "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */
221 "add r10, r9, r9, asl #2 \n"
222 "add r10, r10, r9, asl #4 \n"
223 "add r9, r10, r9, asl #8 \n"
224
225 "add r11, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */
226 "add r2, r11, r2, asl #5 \n"
227 "add r2, r9, r2, asl #3 \n"
228
229 "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */
230 "rsb r10, r10, r4, asl #6 \n"
231 "add r4, r4, r10, asl #3 \n"
232 "sub r4, r9, r4, asl #1 \n"
233 /* t2 & t3 are 1/4 final value here */
234 "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */
235 "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */
236 "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */
237 "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */
238
239 "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */
240 "add r10, r9, r9, asl #4 \n"
241 "add r10, r10, r10, asl #5 \n"
242 "add r9, r10, r9, asl #2 \n"
243
244 "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
245 "add r10, r10, r10, asl #5 \n"
246 "add r5, r10, r5, asl #3 \n"
247 "add r5, r9, r5, asl #2 \n"
248
249 "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
250 "add r10, r10, r10, asl #4 \n"
251 "add r10, r10, r8, asl #7 \n"
252 "rsb r8, r8, r10, asl #3 \n"
253 "sub r8, r9, r8, asl #1 \n"
254
255 "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */
256 "add r10, r9, r9, asl #3 \n"
257 "add r10, r10, r10, asl #5 \n"
258 "add r9, r10, r9, asl #2 \n"
259
260 "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */
261 "add r10, r10, r7, asl #4 \n"
262 "rsb r7, r7, r10, asl #5 \n"
263 "rsb r7, r7, r9, asl #3 \n"
264
265 "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */
266 "sub r10, r10, r6, asl #6 \n"
267 "add r10, r10, r6, asl #12 \n"
268 "add r6, r10, r6 \n"
269 "rsb r6, r6, r9, asl #3 \n"
270 /* t0=r5, t1=r8, t2=r7, t3=r6*/
271 "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */
272 "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */
273 "sub r5, r5, r7 \n" /* t0 -= t2 */
274 "sub r8, r8, r6 \n" /* t1 -= t3 */
275 "add r6, r5, r8 \n" /* r6 = t0 + t1 */
276 "sub r7, r5, r8 \n" /* r7 = t0 - t1 */
277
278 "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r5*(181/128) */
279 "add r11, r11, r11, asr #5 \n"
280 "add r6, r11, r6, asr #3 \n"
281 "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r6*(181/128) */
282 "add r11, r11, r11, asr #5 \n"
283 "add r7, r11, r7, asr #3 \n"
284 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
285 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
286 "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 17 */
287 "mov r5, r5, asr #17 \n"
288 "strh r5, [r0, #0*8] \n"
289 "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 17 */
290 "mov r8, r8, asr #17 \n"
291 "strh r8, [r0, #2*8] \n"
292 "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 17 */
293 "mov r5, r5, asr #17 \n"
294 "strh r5, [r0, #4*8] \n"
295 "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 17 */
296 "mov r8, r8, asr #17 \n"
297 "strh r8, [r0, #6*8] \n"
298 "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 17 */
299 "mov r5, r5, asr #17 \n"
300 "strh r5, [r0, #8*8] \n"
301 "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 17 */
302 "mov r8, r8, asr #17 \n"
303 "strh r8, [r0, #10*8] \n"
304 "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 17 */
305 "mov r5, r5, asr #17 \n"
306 "strh r5, [r0, #12*8] \n"
307 "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 17 */
308 "mov r8, r8, asr #17 \n"
309 "strh r8, [r0, #14*8] \n"
310 "add r0, r0, #2 \n"
311 "cmp r0, r12 \n"
312 "blo 4b \n"
313 "sub r0, r0, #16 \n"
314 "bx lr \n"
315 );
316 (void)block;
317}
318
319static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
320 const int stride) __attribute__((naked));
321static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
322 const int stride)
323{
324 asm volatile(
325 "stmfd sp!, { r1-r2, \
326 r4-r12, lr } \n"
327 "bl idct \n"
328 "ldmfd sp!, { r1-r2 } \n"
329 "mov r11, #0 \n"
330 "add r12, r0, #128 \n"
331 "1: \n"
332 "ldrsh r3, [r0, #0] \n"
333 "ldrsh r4, [r0, #2] \n"
334 "ldrsh r5, [r0, #4] \n"
335 "ldrsh r6, [r0, #6] \n"
336 "ldrsh r7, [r0, #8] \n"
337 "ldrsh r8, [r0, #10] \n"
338 "ldrsh r9, [r0, #12] \n"
339 "ldrsh r10, [r0, #14] \n"
340 "cmp r3, #255 \n"
341 "mvnhi r3, r3, asr #31 \n"
342 "strb r3, [r1, #0] \n"
343 "str r11, [r0], #4 \n"
344 "cmp r4, #255 \n"
345 "mvnhi r4, r4, asr #31 \n"
346 "strb r4, [r1, #1] \n"
347 "cmp r5, #255 \n"
348 "mvnhi r5, r5, asr #31 \n"
349 "strb r5, [r1, #2] \n"
350 "str r11, [r0], #4 \n"
351 "cmp r6, #255 \n"
352 "mvnhi r6, r6, asr #31 \n"
353 "strb r6, [r1, #3] \n"
354 "cmp r7, #255 \n"
355 "mvnhi r7, r7, asr #31 \n"
356 "strb r7, [r1, #4] \n"
357 "str r11, [r0], #4 \n"
358 "cmp r8, #255 \n"
359 "mvnhi r8, r8, asr #31 \n"
360 "strb r8, [r1, #5] \n"
361 "cmp r9, #255 \n"
362 "mvnhi r9, r9, asr #31 \n"
363 "strb r9, [r1, #6] \n"
364 "str r11, [r0], #4 \n"
365 "cmp r10, #255 \n"
366 "mvnhi r10, r10, asr #31 \n"
367 "strb r10, [r1, #7] \n"
368 "add r1, r1, r2 \n"
369 "cmp r0, r12 \n"
370 "blo 1b \n"
371 "ldmfd sp!, { r4-r12, pc } \n"
372 );
373 (void)block; (void)dest; (void)stride;
374}
375
376static void mpeg2_idct_add_c (int last, int16_t * block,
377 uint8_t * dest, const int stride) __attribute__((naked));
378static void mpeg2_idct_add_c (int last, int16_t * block,
379 uint8_t * dest, const int stride)
380{
381 asm volatile (
382 "cmp r0, #129 \n"
383 "mov r0, r1 \n"
384 "ldreqsh r1, [r0, #0] \n"
385 "bne 1f \n"
386 "and r1, r1, #0x70 \n"
387 "cmp r1, #0x40 \n"
388 "bne 3f \n"
389 "1: \n"
390 "stmfd sp!, { r2-r12, lr } \n"
391 "bl idct \n"
392 "ldmfd sp!, { r1-r2 } \n"
393 "mov r11, #0 \n"
394 "add r12, r0, #128 \n"
395 "2: \n"
396 "ldrb r3, [r1, #0] \n"
397 "ldrb r4, [r1, #1] \n"
398 "ldrb r5, [r1, #2] \n"
399 "ldrb r6, [r1, #3] \n"
400 "ldrsh r7, [r0, #0] \n"
401 "ldrsh r8, [r0, #2] \n"
402 "ldrsh r9, [r0, #4] \n"
403 "ldrsh r10, [r0, #6] \n"
404 "add r7, r7, r3 \n"
405 "ldrb r3, [r1, #4] \n"
406 "cmp r7, #255 \n"
407 "mvnhi r7, r7, asr #31 \n"
408 "strb r7, [r1, #0] \n"
409 "ldrsh r7, [r0, #8] \n"
410 "add r8, r8, r4 \n"
411 "ldrb r4, [r1, #5] \n"
412 "cmp r8, #255 \n"
413 "mvnhi r8, r8, asr #31 \n"
414 "strb r8, [r1, #1] \n"
415 "ldrsh r8, [r0, #10] \n"
416 "add r9, r9, r5 \n"
417 "ldrb r5, [r1, #6] \n"
418 "cmp r9, #255 \n"
419 "mvnhi r9, r9, asr #31 \n"
420 "strb r9, [r1, #2] \n"
421 "ldrsh r9, [r0, #12] \n"
422 "add r10, r10, r6 \n"
423 "ldrb r6, [r1, #7] \n"
424 "cmp r10, #255 \n"
425 "mvnhi r10, r10, asr #31 \n"
426 "strb r10, [r1, #3] \n"
427 "ldrsh r10, [r0, #14] \n"
428 "str r11, [r0], #4 \n"
429 "add r7, r7, r3 \n"
430 "cmp r7, #255 \n"
431 "mvnhi r7, r7, asr #31 \n"
432 "strb r7, [r1, #4] \n"
433 "str r11, [r0], #4 \n"
434 "add r8, r8, r4 \n"
435 "cmp r8, #255 \n"
436 "mvnhi r8, r8, asr #31 \n"
437 "strb r8, [r1, #5] \n"
438 "str r11, [r0], #4 \n"
439 "add r9, r9, r5 \n"
440 "cmp r9, #255 \n"
441 "mvnhi r9, r9, asr #31 \n"
442 "strb r9, [r1, #6] \n"
443 "add r10, r10, r6 \n"
444 "cmp r10, #255 \n"
445 "mvnhi r10, r10, asr #31 \n"
446 "strb r10, [r1, #7] \n"
447 "str r11, [r0], #4 \n"
448 "add r1, r1, r2 \n"
449 "cmp r0, r12 \n"
450 "blo 2b \n"
451 "ldmfd sp!, { r4-r12, pc } \n"
452 "3: \n"
453 "stmfd sp!, { r4-r11 } \n"
454 "ldrsh r1, [r0, #0] \n" /* r1 = block[0] */
455 "mov r11, #0 \n"
456 "strh r11, [r0, #0] \n" /* block[0] = 0 */
457 "strh r11, [r0, #126] \n" /* block[63] = 0 */
458 "add r1, r1, #64 \n" /* r1 = DC << 7 */
459 "add r0, r2, r3, asl #3 \n"
460 "4: \n"
461 "ldrb r4, [r2, #0] \n"
462 "ldrb r5, [r2, #1] \n"
463 "ldrb r6, [r2, #2] \n"
464 "ldrb r7, [r2, #3] \n"
465 "ldrb r8, [r2, #4] \n"
466 "ldrb r9, [r2, #5] \n"
467 "ldrb r10, [r2, #6] \n"
468 "ldrb r11, [r2, #7] \n"
469 "add r4, r4, r1, asr #7 \n"
470 "cmp r4, #255 \n"
471 "mvnhi r4, r4, asr #31 \n"
472 "strb r4, [r2, #0] \n"
473 "add r5, r5, r1, asr #7 \n"
474 "cmp r5, #255 \n"
475 "mvnhi r5, r5, asr #31 \n"
476 "strb r5, [r2, #1] \n"
477 "add r6, r6, r1, asr #7 \n"
478 "cmp r6, #255 \n"
479 "mvnhi r6, r6, asr #31 \n"
480 "strb r6, [r2, #2] \n"
481 "add r7, r7, r1, asr #7 \n"
482 "cmp r7, #255 \n"
483 "mvnhi r7, r7, asr #31 \n"
484 "strb r7, [r2, #3] \n"
485 "add r8, r8, r1, asr #7 \n"
486 "cmp r8, #255 \n"
487 "mvnhi r8, r8, asr #31 \n"
488 "strb r8, [r2, #4] \n"
489 "add r9, r9, r1, asr #7 \n"
490 "cmp r9, #255 \n"
491 "mvnhi r9, r9, asr #31 \n"
492 "strb r9, [r2, #5] \n"
493 "add r10, r10, r1, asr #7 \n"
494 "cmp r10, #255 \n"
495 "mvnhi r10, r10, asr #31 \n"
496 "strb r10, [r2, #6] \n"
497 "add r11, r11, r1, asr #7 \n"
498 "cmp r11, #255 \n"
499 "mvnhi r11, r11, asr #31 \n"
500 "strb r11, [r2, #7] \n"
501 "add r2, r2, r3 \n"
502 "cmp r2, r0 \n"
503 "blo 4b \n"
504 "ldmfd sp!, { r4-r11 } \n"
505 "bx lr \n"
506 );
507 (void)last; (void)block; (void)dest; (void)stride;
508}
509
510void mpeg2_idct_init (void)
511{
512 extern uint8_t mpeg2_scan_norm[64];
513 extern uint8_t mpeg2_scan_alt[64];
514 int i, j;
515
516 mpeg2_idct_copy = mpeg2_idct_copy_c;
517 mpeg2_idct_add = mpeg2_idct_add_c;
518
519 for (i = 0; i < 64; i++)
520 {
521 j = mpeg2_scan_norm[i];
522 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
523
524 j = mpeg2_scan_alt[i];
525 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
526 }
527}