summaryrefslogtreecommitdiff
path: root/apps/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins')
-rw-r--r--apps/plugins/mpegplayer/SOURCES14
-rw-r--r--apps/plugins/mpegplayer/idct.c67
-rw-r--r--apps/plugins/mpegplayer/idct_arm.S440
-rw-r--r--apps/plugins/mpegplayer/idct_arm_c.c529
4 files changed, 472 insertions, 578 deletions
diff --git a/apps/plugins/mpegplayer/SOURCES b/apps/plugins/mpegplayer/SOURCES
index 004c6395a2..3d5a4c2375 100644
--- a/apps/plugins/mpegplayer/SOURCES
+++ b/apps/plugins/mpegplayer/SOURCES
@@ -1,22 +1,22 @@
1alloc.c 1alloc.c
2decode.c 2decode.c
3header.c 3header.c
4 4idct.c
5motion_comp.c 5motion_comp.c
6 6
7#ifdef CPU_COLDFIRE
8idct_coldfire.S
9#elif defined CPU_ARM
10idct_arm.S
11#endif
12
7#ifdef CPU_ARM 13#ifdef CPU_ARM
8idct_arm_c.c
9motion_comp_arm_c.c 14motion_comp_arm_c.c
10motion_comp_arm_s.S 15motion_comp_arm_s.S
11#else /* other CPU or SIM */ 16#else /* other CPU or SIM */
12idct.c
13motion_comp_c.c 17motion_comp_c.c
14#endif /* CPU_* */ 18#endif /* CPU_* */
15 19
16#ifdef CPU_COLDFIRE
17idct_coldfire.S
18#endif
19
20slice.c 20slice.c
21video_out_rockbox.c 21video_out_rockbox.c
22mpeg_settings.c 22mpeg_settings.c
diff --git a/apps/plugins/mpegplayer/idct.c b/apps/plugins/mpegplayer/idct.c
index bf7097401e..de192e3549 100644
--- a/apps/plugins/mpegplayer/idct.c
+++ b/apps/plugins/mpegplayer/idct.c
@@ -29,6 +29,27 @@
29#include "attributes.h" 29#include "attributes.h"
30#include "mpeg2_internal.h" 30#include "mpeg2_internal.h"
31 31
32/* idct main entry point */
33void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
34void (* mpeg2_idct_add) (int last, int16_t * block,
35 uint8_t * dest, int stride);
36
37#ifdef CPU_COLDFIRE
38/* assembler functions */
39extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest,
40 const int stride);
41extern void mpeg2_idct_add_coldfire(const int last, int16_t * block,
42 uint8_t * dest, const int stride);
43
44#elif defined CPU_ARM
45/* assembler functions */
46extern void mpeg2_idct_copy_arm(int16_t * block, uint8_t * dest,
47 const int stride);
48extern void mpeg2_idct_add_arm(const int last, int16_t * block,
49 uint8_t * dest, const int stride);
50
51#else /* !CPU_COLDFIE, !CPU_ARM */
52
32#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ 53#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
33#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ 54#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
34#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ 55#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
@@ -36,53 +57,14 @@
36#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ 57#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
37#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ 58#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
38 59
39/* idct main entry point */
40void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
41void (* mpeg2_idct_add) (int last, int16_t * block,
42 uint8_t * dest, int stride);
43
44/* 60/*
45 * In legal streams, the IDCT output should be between -384 and +384. 61 * In legal streams, the IDCT output should be between -384 and +384.
46 * In corrupted streams, it is possible to force the IDCT output to go 62 * In corrupted streams, it is possible to force the IDCT output to go
47 * to +-3826 - this is the worst case for a column IDCT where the 63 * to +-3826 - this is the worst case for a column IDCT where the
48 * column inputs are 16-bit values. 64 * column inputs are 16-bit values.
49 */ 65 */
50#ifdef CPU_COLDFIRE
51static inline unsigned CLIP(int value)
52{
53 asm ( /* Note: Uses knowledge that only the low byte of the result is used */
54 "cmp.l #255,%[v] \n" /* overflow? */
55 "bls.b 1f \n" /* no: return value */
56 "spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
57 "1: \n"
58 : /* outputs */
59 [v]"+d"(value)
60 );
61 return value;
62}
63#elif defined CPU_ARM
64static inline unsigned CLIP(int value)
65{
66 asm volatile ( /* Note: Uses knowledge that only the low byte of the result is used */
67 "cmp %[v], #255 \n"
68 "mvnhi %[v], %[v], asr #31 \n"
69 : /* outputs */
70 [v]"+r"(value)
71 );
72 return value;
73}
74#else
75uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR; 66uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR;
76#define CLIP(i) ((mpeg2_clip + 3840)[i]) 67#define CLIP(i) ((mpeg2_clip + 3840)[i])
77#endif
78
79#ifdef CPU_COLDFIRE
80/* assembler functions */
81extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest,
82 const int stride);
83extern void mpeg2_idct_add_coldfire(const int last, int16_t * block,
84 uint8_t * dest, const int stride);
85#else /* !CPU_COLDFIE */
86 68
87#if 0 69#if 0
88#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ 70#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
@@ -266,7 +248,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block,
266 } 248 }
267} 249}
268 250
269#endif /* !CPU_COLDFIRE */ 251#endif /* CPU selection */
270 252
271void mpeg2_idct_init (void) 253void mpeg2_idct_init (void)
272{ 254{
@@ -279,12 +261,13 @@ void mpeg2_idct_init (void)
279#ifdef CPU_COLDFIRE 261#ifdef CPU_COLDFIRE
280 mpeg2_idct_copy = mpeg2_idct_copy_coldfire; 262 mpeg2_idct_copy = mpeg2_idct_copy_coldfire;
281 mpeg2_idct_add = mpeg2_idct_add_coldfire; 263 mpeg2_idct_add = mpeg2_idct_add_coldfire;
264#elif defined CPU_ARM
265 mpeg2_idct_copy = mpeg2_idct_copy_arm;
266 mpeg2_idct_add = mpeg2_idct_add_arm;
282#else 267#else
283 mpeg2_idct_copy = mpeg2_idct_copy_c; 268 mpeg2_idct_copy = mpeg2_idct_copy_c;
284 mpeg2_idct_add = mpeg2_idct_add_c; 269 mpeg2_idct_add = mpeg2_idct_add_c;
285#endif
286 270
287#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
288 for (i = -3840; i < 3840 + 256; i++) 271 for (i = -3840; i < 3840 + 256; i++)
289 CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); 272 CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
290#endif 273#endif
diff --git a/apps/plugins/mpegplayer/idct_arm.S b/apps/plugins/mpegplayer/idct_arm.S
new file mode 100644
index 0000000000..a340f40a07
--- /dev/null
+++ b/apps/plugins/mpegplayer/idct_arm.S
@@ -0,0 +1,440 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id $
9 *
10 * Copyright (C) 2007 by Michael Sevakis
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20 .global mpeg2_idct_copy_arm
21 .type mpeg2_idct_copy_arm, %function
22 .global mpeg2_idct_add_arm
23 .type mpeg2_idct_add_arm, %function
24
25
26/* Custom calling convention:
27 * r0 contains block pointer and is non-volatile
28 * all non-volatile c context saved and restored on its behalf
29 */
30.idct:
31 add r12, r0, #128
321:
33 ldrsh r1, [r0, #0] /* d0 */
34 ldrsh r2, [r0, #2] /* d1 */
35 ldrsh r3, [r0, #4] /* d2 */
36 ldrsh r4, [r0, #6] /* d3 */
37 ldrsh r5, [r0, #8] /* d0 */
38 ldrsh r6, [r0, #10] /* d1 */
39 ldrsh r7, [r0, #12] /* d2 */
40 ldrsh r8, [r0, #14] /* d3 */
41 orrs r9, r2, r3
42 orreqs r9, r4, r5
43 orreqs r9, r6, r7
44 cmpeq r8, #0
45 bne 2f
46 mov r1, r1, asl #15
47 bic r1, r1, #0x8000
48 orr r1, r1, r1, lsr #16
49 str r1, [r0], #4
50 str r1, [r0], #4
51 str r1, [r0], #4
52 str r1, [r0], #4
53 cmp r0, r12
54 blo 1b
55 b 3f
562:
57 mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */
58 add r1, r1, #2048
59 add r1, r1, r3, asl #11 /* r1 = t0 = d0 + (block[2] << 11) */
60 sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - (block[2] << 11) */
61
62 add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */
63 add r10, r9, r9, asl #2
64 add r10, r10, r9, asl #4
65 add r9, r10, r9, asl #8
66
67 add r10, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */
68 add r2, r10, r2, asl #5
69 add r2, r9, r2, asl #3
70
71 add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */
72 rsb r10, r10, r4, asl #6
73 add r4, r4, r10, asl #3
74 sub r4, r9, r4, asl #1
75 /* t2 & t3 are 1/4 final value here */
76 add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */
77 sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */
78 add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */
79 sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */
80
81 add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */
82 add r10, r9, r9, asl #4
83 add r10, r10, r10, asl #5
84 add r9, r10, r9, asl #2
85
86 add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
87 add r10, r10, r10, asl #5
88 add r5, r10, r5, asl #3
89 add r5, r9, r5, asl #2
90
91 add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
92 add r10, r10, r10, asl #4
93 add r10, r10, r8, asl #7
94 rsb r8, r8, r10, asl #3
95 sub r8, r9, r8, asl #1
96
97 add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */
98 add r10, r9, r9, asl #3
99 add r10, r10, r10, asl #5
100 add r9, r10, r9, asl #2
101
102 add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */
103 add r10, r10, r7, asl #4
104 rsb r7, r7, r10, asl #5
105 rsb r7, r7, r9, asl #3
106
107 sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */
108 sub r10, r10, r6, asl #6
109 add r10, r10, r6, asl #12
110 add r6, r10, r6
111 rsb r6, r6, r9, asl #3
112 /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/
113 add r9, r5, r7 /* r9 = b0 = t0 + t2 */
114 add r10, r8, r6 /* r10 = b3 = t1 + t3 */
115 sub r5, r5, r7 /* t0 -= t2 */
116 sub r8, r8, r6 /* t1 -= t3 */
117 add r6, r5, r8 /* r6 = t0 + t1 */
118 sub r7, r5, r8 /* r7 = t0 - t1 */
119
120 add r11, r6, r6, asr #2 /* r6 = b1 = r6*(181/128) */
121 add r11, r11, r11, asr #5
122 add r6, r11, r6, asr #3
123 add r11, r7, r7, asr #2 /* r7 = b2 = r7*(181/128) */
124 add r11, r11, r11, asr #5
125 add r7, r11, r7, asr #3
126 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
127 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
128 add r5, r1, r9 /* block[0] = (a0 + b0) >> 12 */
129 mov r5, r5, asr #12
130 strh r5, [r0], #2
131 add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 12 */
132 mov r8, r8, asr #12
133 strh r8, [r0], #2
134 add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 12 */
135 mov r5, r5, asr #12
136 strh r5, [r0], #2
137 add r8, r2, r10 /* block[3] = (a3 + b3) >> 12 */
138 mov r8, r8, asr #12
139 strh r8, [r0], #2
140 sub r5, r2, r10 /* block[4] = (a3 - b3) >> 12 */
141 mov r5, r5, asr #12
142 strh r5, [r0], #2
143 sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 12 */
144 mov r8, r8, asr #12
145 strh r8, [r0], #2
146 sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 12 */
147 mov r5, r5, asr #12
148 strh r5, [r0], #2
149 sub r8, r1, r9 /* block[7] = (a0 - b0) >> 12 */
150 mov r8, r8, asr #12
151 strh r8, [r0], #2
152 cmp r0, r12
153 blo 1b
1543:
155 sub r0, r0, #128
156 add r12, r0, #16
1574:
158 ldrsh r1, [r0, #0*8] /* d0 */
159 ldrsh r2, [r0, #2*8] /* d1 */
160 ldrsh r3, [r0, #4*8] /* d2 */
161 ldrsh r4, [r0, #6*8] /* d3 */
162 ldrsh r5, [r0, #8*8] /* d0 */
163 ldrsh r6, [r0, #10*8] /* d1 */
164 ldrsh r7, [r0, #12*8] /* d2 */
165 ldrsh r8, [r0, #14*8] /* d3 */
166
167 mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */
168 add r1, r1, #65536
169 add r1, r1, r3, asl #11 /* r1 = t0 = d0 + d2:(block[2] << 11) */
170 sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - d2:(block[2] << 11) */
171
172 add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */
173 add r10, r9, r9, asl #2
174 add r10, r10, r9, asl #4
175 add r9, r10, r9, asl #8
176
177 add r11, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */
178 add r2, r11, r2, asl #5
179 add r2, r9, r2, asl #3
180
181 add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */
182 rsb r10, r10, r4, asl #6
183 add r4, r4, r10, asl #3
184 sub r4, r9, r4, asl #1
185 /* t2 & t3 are 1/4 final value here */
186 add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */
187 sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */
188 add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */
189 sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */
190
191 add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */
192 add r10, r9, r9, asl #4
193 add r10, r10, r10, asl #5
194 add r9, r10, r9, asl #2
195
196 add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
197 add r10, r10, r10, asl #5
198 add r5, r10, r5, asl #3
199 add r5, r9, r5, asl #2
200
201 add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
202 add r10, r10, r10, asl #4
203 add r10, r10, r8, asl #7
204 rsb r8, r8, r10, asl #3
205 sub r8, r9, r8, asl #1
206
207 add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */
208 add r10, r9, r9, asl #3
209 add r10, r10, r10, asl #5
210 add r9, r10, r9, asl #2
211
212 add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */
213 add r10, r10, r7, asl #4
214 rsb r7, r7, r10, asl #5
215 rsb r7, r7, r9, asl #3
216
217 sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */
218 sub r10, r10, r6, asl #6
219 add r10, r10, r6, asl #12
220 add r6, r10, r6
221 rsb r6, r6, r9, asl #3
222 /* t0=r5, t1=r8, t2=r7, t3=r6*/
223 add r9, r5, r7 /* r9 = b0 = t0 + t2 */
224 add r10, r8, r6 /* r10 = b3 = t1 + t3 */
225 sub r5, r5, r7 /* t0 -= t2 */
226 sub r8, r8, r6 /* t1 -= t3 */
227 add r6, r5, r8 /* r6 = t0 + t1 */
228 sub r7, r5, r8 /* r7 = t0 - t1 */
229
230 add r11, r6, r6, asr #2 /* r6 = b1 = r5*(181/128) */
231 add r11, r11, r11, asr #5
232 add r6, r11, r6, asr #3
233 add r11, r7, r7, asr #2 /* r7 = b2 = r6*(181/128) */
234 add r11, r11, r11, asr #5
235 add r7, r11, r7, asr #3
236 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
237 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
238 add r5, r1, r9 /* block[0] = (a0 + b0) >> 17 */
239 mov r5, r5, asr #17
240 strh r5, [r0, #0*8]
241 add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 17 */
242 mov r8, r8, asr #17
243 strh r8, [r0, #2*8]
244 add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 17 */
245 mov r5, r5, asr #17
246 strh r5, [r0, #4*8]
247 add r8, r2, r10 /* block[3] = (a3 + b3) >> 17 */
248 mov r8, r8, asr #17
249 strh r8, [r0, #6*8]
250 sub r5, r2, r10 /* block[4] = (a3 - b3) >> 17 */
251 mov r5, r5, asr #17
252 strh r5, [r0, #8*8]
253 sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 17 */
254 mov r8, r8, asr #17
255 strh r8, [r0, #10*8]
256 sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 17 */
257 mov r5, r5, asr #17
258 strh r5, [r0, #12*8]
259 sub r8, r1, r9 /* block[7] = (a0 - b0) >> 17 */
260 mov r8, r8, asr #17
261 strh r8, [r0, #14*8]
262 add r0, r0, #2
263 cmp r0, r12
264 blo 4b
265 sub r0, r0, #16
266 bx lr
267
268mpeg2_idct_copy_arm:
269 stmfd sp!, { r1-r2, r4-r12, lr }
270 bl .idct
271 ldmfd sp!, { r1-r2 }
272 mov r11, #0
273 add r12, r0, #128
2741:
275 ldrsh r3, [r0, #0]
276 ldrsh r4, [r0, #2]
277 ldrsh r5, [r0, #4]
278 ldrsh r6, [r0, #6]
279 ldrsh r7, [r0, #8]
280 ldrsh r8, [r0, #10]
281 ldrsh r9, [r0, #12]
282 ldrsh r10, [r0, #14]
283 cmp r3, #255
284 mvnhi r3, r3, asr #31
285 strb r3, [r1, #0]
286 str r11, [r0], #4
287 cmp r4, #255
288 mvnhi r4, r4, asr #31
289 strb r4, [r1, #1]
290 cmp r5, #255
291 mvnhi r5, r5, asr #31
292 strb r5, [r1, #2]
293 str r11, [r0], #4
294 cmp r6, #255
295 mvnhi r6, r6, asr #31
296 strb r6, [r1, #3]
297 cmp r7, #255
298 mvnhi r7, r7, asr #31
299 strb r7, [r1, #4]
300 str r11, [r0], #4
301 cmp r8, #255
302 mvnhi r8, r8, asr #31
303 strb r8, [r1, #5]
304 cmp r9, #255
305 mvnhi r9, r9, asr #31
306 strb r9, [r1, #6]
307 str r11, [r0], #4
308 cmp r10, #255
309 mvnhi r10, r10, asr #31
310 strb r10, [r1, #7]
311 add r1, r1, r2
312 cmp r0, r12
313 blo 1b
314 ldmfd sp!, { r4-r12, pc }
315
316mpeg2_idct_add_arm:
317 cmp r0, #129
318 mov r0, r1
319 ldreqsh r1, [r0, #0]
320 bne 1f
321 and r1, r1, #0x70
322 cmp r1, #0x40
323 bne 3f
3241:
325 stmfd sp!, { r2-r12, lr }
326 bl .idct
327 ldmfd sp!, { r1-r2 }
328 mov r11, #0
329 add r12, r0, #128
3302:
331 ldrb r3, [r1, #0]
332 ldrb r4, [r1, #1]
333 ldrb r5, [r1, #2]
334 ldrb r6, [r1, #3]
335 ldrsh r7, [r0, #0]
336 ldrsh r8, [r0, #2]
337 ldrsh r9, [r0, #4]
338 ldrsh r10, [r0, #6]
339 add r7, r7, r3
340 ldrb r3, [r1, #4]
341 cmp r7, #255
342 mvnhi r7, r7, asr #31
343 strb r7, [r1, #0]
344 ldrsh r7, [r0, #8]
345 add r8, r8, r4
346 ldrb r4, [r1, #5]
347 cmp r8, #255
348 mvnhi r8, r8, asr #31
349 strb r8, [r1, #1]
350 ldrsh r8, [r0, #10]
351 add r9, r9, r5
352 ldrb r5, [r1, #6]
353 cmp r9, #255
354 mvnhi r9, r9, asr #31
355 strb r9, [r1, #2]
356 ldrsh r9, [r0, #12]
357 add r10, r10, r6
358 ldrb r6, [r1, #7]
359 cmp r10, #255
360 mvnhi r10, r10, asr #31
361 strb r10, [r1, #3]
362 ldrsh r10, [r0, #14]
363 str r11, [r0], #4
364 add r7, r7, r3
365 cmp r7, #255
366 mvnhi r7, r7, asr #31
367 strb r7, [r1, #4]
368 str r11, [r0], #4
369 add r8, r8, r4
370 cmp r8, #255
371 mvnhi r8, r8, asr #31
372 strb r8, [r1, #5]
373 str r11, [r0], #4
374 add r9, r9, r5
375 cmp r9, #255
376 mvnhi r9, r9, asr #31
377 strb r9, [r1, #6]
378 add r10, r10, r6
379 cmp r10, #255
380 mvnhi r10, r10, asr #31
381 strb r10, [r1, #7]
382 str r11, [r0], #4
383 add r1, r1, r2
384 cmp r0, r12
385 blo 2b
386 ldmfd sp!, { r4-r12, pc }
3873:
388 stmfd sp!, { r4-r11 }
389 ldrsh r1, [r0, #0] /* r1 = block[0] */
390 mov r11, #0
391 strh r11, [r0, #0] /* block[0] = 0 */
392 strh r11, [r0, #126] /* block[63] = 0 */
393 add r1, r1, #64 /* r1 = DC << 7 */
394 add r0, r2, r3, asl #3
3954:
396 ldrb r4, [r2, #0]
397 ldrb r5, [r2, #1]
398 ldrb r6, [r2, #2]
399 ldrb r7, [r2, #3]
400 ldrb r8, [r2, #4]
401 ldrb r9, [r2, #5]
402 ldrb r10, [r2, #6]
403 ldrb r11, [r2, #7]
404 add r4, r4, r1, asr #7
405 cmp r4, #255
406 mvnhi r4, r4, asr #31
407 strb r4, [r2, #0]
408 add r5, r5, r1, asr #7
409 cmp r5, #255
410 mvnhi r5, r5, asr #31
411 strb r5, [r2, #1]
412 add r6, r6, r1, asr #7
413 cmp r6, #255
414 mvnhi r6, r6, asr #31
415 strb r6, [r2, #2]
416 add r7, r7, r1, asr #7
417 cmp r7, #255
418 mvnhi r7, r7, asr #31
419 strb r7, [r2, #3]
420 add r8, r8, r1, asr #7
421 cmp r8, #255
422 mvnhi r8, r8, asr #31
423 strb r8, [r2, #4]
424 add r9, r9, r1, asr #7
425 cmp r9, #255
426 mvnhi r9, r9, asr #31
427 strb r9, [r2, #5]
428 add r10, r10, r1, asr #7
429 cmp r10, #255
430 mvnhi r10, r10, asr #31
431 strb r10, [r2, #6]
432 add r11, r11, r1, asr #7
433 cmp r11, #255
434 mvnhi r11, r11, asr #31
435 strb r11, [r2, #7]
436 add r2, r2, r3
437 cmp r2, r0
438 blo 4b
439 ldmfd sp!, { r4-r11 }
440 bx lr
diff --git a/apps/plugins/mpegplayer/idct_arm_c.c b/apps/plugins/mpegplayer/idct_arm_c.c
deleted file mode 100644
index 9805f421a6..0000000000
--- a/apps/plugins/mpegplayer/idct_arm_c.c
+++ /dev/null
@@ -1,529 +0,0 @@
1/*
2 * idct.c
3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 *
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
8 *
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include "plugin.h"
25
26#include "mpeg2dec_config.h"
27
28#include "mpeg2.h"
29#include "attributes.h"
30#include "mpeg2_internal.h"
31
32/* 101100011001 */
33#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
34/* 101001110100 */
35#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
36/* 100101101000 */
37#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
38/* 011001001001 */
39#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
40/* 010001010100 */
41#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
42/* 001000110101 */
43#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
44
45/* idct main entry point */
46void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
47void (* mpeg2_idct_add) (int last, int16_t * block,
48 uint8_t * dest, int stride);
49
50/*
51 * In legal streams, the IDCT output should be between -384 and +384.
52 * In corrupted streams, it is possible to force the IDCT output to go
53 * to +-3826 - this is the worst case for a column IDCT where the
54 * column inputs are 16-bit values.
55 */
56#if 0
57#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
58 do { \
59 t0 = W0 * d0 + W1 * d1; \
60 t1 = W0 * d1 - W1 * d0; \
61 } while (0)
62#else
63#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
64 do { \
65 int tmp = W0 * (d0 + d1); \
66 t0 = tmp + (W1 - W0) * d1; \
67 t1 = tmp - (W1 + W0) * d0; \
68 } while (0)
69#endif
70
71/* Custom calling convention:
72 * r0 contains block pointer and is non-volatile
73 * all non-volatile c context saved and restored on its behalf
74 */
75static void idct (int16_t * block) __attribute__((naked,used));
76static void idct (int16_t * block)
77{
78 asm volatile (
79 "add r12, r0, #128 \n"
80 "1: \n"
81 "ldrsh r1, [r0, #0] \n" /* d0 */
82 "ldrsh r2, [r0, #2] \n" /* d1 */
83 "ldrsh r3, [r0, #4] \n" /* d2 */
84 "ldrsh r4, [r0, #6] \n" /* d3 */
85 "ldrsh r5, [r0, #8] \n" /* d0 */
86 "ldrsh r6, [r0, #10] \n" /* d1 */
87 "ldrsh r7, [r0, #12] \n" /* d2 */
88 "ldrsh r8, [r0, #14] \n" /* d3 */
89 "orrs r9, r2, r3 \n"
90 "orreqs r9, r4, r5 \n"
91 "orreqs r9, r6, r7 \n"
92 "cmpeq r8, #0 \n"
93 "bne 2f \n"
94 "mov r1, r1, asl #15 \n"
95 "bic r1, r1, #0x8000 \n"
96 "orr r1, r1, r1, lsr #16 \n"
97 "str r1, [r0], #4 \n"
98 "str r1, [r0], #4 \n"
99 "str r1, [r0], #4 \n"
100 "str r1, [r0], #4 \n"
101 "cmp r0, r12 \n"
102 "blo 1b \n"
103 "b 3f \n"
104 "2: \n"
105 "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */
106 "add r1, r1, #2048 \n"
107 "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + (block[2] << 11) */
108 "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - (block[2] << 11) */
109
110 "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */
111 "add r10, r9, r9, asl #2 \n"
112 "add r10, r10, r9, asl #4 \n"
113 "add r9, r10, r9, asl #8 \n"
114
115 "add r10, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */
116 "add r2, r10, r2, asl #5 \n"
117 "add r2, r9, r2, asl #3 \n"
118
119 "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */
120 "rsb r10, r10, r4, asl #6 \n"
121 "add r4, r4, r10, asl #3 \n"
122 "sub r4, r9, r4, asl #1 \n"
123 /* t2 & t3 are 1/4 final value here */
124 "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */
125 "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */
126 "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */
127 "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */
128
129 "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */
130 "add r10, r9, r9, asl #4 \n"
131 "add r10, r10, r10, asl #5 \n"
132 "add r9, r10, r9, asl #2 \n"
133
134 "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
135 "add r10, r10, r10, asl #5 \n"
136 "add r5, r10, r5, asl #3 \n"
137 "add r5, r9, r5, asl #2 \n"
138
139 "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
140 "add r10, r10, r10, asl #4 \n"
141 "add r10, r10, r8, asl #7 \n"
142 "rsb r8, r8, r10, asl #3 \n"
143 "sub r8, r9, r8, asl #1 \n"
144
145 "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */
146 "add r10, r9, r9, asl #3 \n"
147 "add r10, r10, r10, asl #5 \n"
148 "add r9, r10, r9, asl #2 \n"
149
150 "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */
151 "add r10, r10, r7, asl #4 \n"
152 "rsb r7, r7, r10, asl #5 \n"
153 "rsb r7, r7, r9, asl #3 \n"
154
155 "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */
156 "sub r10, r10, r6, asl #6 \n"
157 "add r10, r10, r6, asl #12 \n"
158 "add r6, r10, r6 \n"
159 "rsb r6, r6, r9, asl #3 \n"
160 /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/
161 "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */
162 "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */
163 "sub r5, r5, r7 \n" /* t0 -= t2 */
164 "sub r8, r8, r6 \n" /* t1 -= t3 */
165 "add r6, r5, r8 \n" /* r6 = t0 + t1 */
166 "sub r7, r5, r8 \n" /* r7 = t0 - t1 */
167
168 "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r6*(181/128) */
169 "add r11, r11, r11, asr #5 \n"
170 "add r6, r11, r6, asr #3 \n"
171 "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r7*(181/128) */
172 "add r11, r11, r11, asr #5 \n"
173 "add r7, r11, r7, asr #3 \n"
174 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
175 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
176 "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 12 */
177 "mov r5, r5, asr #12 \n"
178 "strh r5, [r0], #2 \n"
179 "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 12 */
180 "mov r8, r8, asr #12 \n"
181 "strh r8, [r0], #2 \n"
182 "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 12 */
183 "mov r5, r5, asr #12 \n"
184 "strh r5, [r0], #2 \n"
185 "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 12 */
186 "mov r8, r8, asr #12 \n"
187 "strh r8, [r0], #2 \n"
188 "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 12 */
189 "mov r5, r5, asr #12 \n"
190 "strh r5, [r0], #2 \n"
191 "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 12 */
192 "mov r8, r8, asr #12 \n"
193 "strh r8, [r0], #2 \n"
194 "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 12 */
195 "mov r5, r5, asr #12 \n"
196 "strh r5, [r0], #2 \n"
197 "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 12 */
198 "mov r8, r8, asr #12 \n"
199 "strh r8, [r0], #2 \n"
200 "cmp r0, r12 \n"
201 "blo 1b \n"
202 "3: \n"
203 "sub r0, r0, #128 \n"
204 "add r12, r0, #16 \n"
205 "4: \n"
206 "ldrsh r1, [r0, #0*8] \n" /* d0 */
207 "ldrsh r2, [r0, #2*8] \n" /* d1 */
208 "ldrsh r3, [r0, #4*8] \n" /* d2 */
209 "ldrsh r4, [r0, #6*8] \n" /* d3 */
210 "ldrsh r5, [r0, #8*8] \n" /* d0 */
211 "ldrsh r6, [r0, #10*8] \n" /* d1 */
212 "ldrsh r7, [r0, #12*8] \n" /* d2 */
213 "ldrsh r8, [r0, #14*8] \n" /* d3 */
214
215 "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */
216 "add r1, r1, #65536 \n"
217 "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + d2:(block[2] << 11) */
218 "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - d2:(block[2] << 11) */
219
220 "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */
221 "add r10, r9, r9, asl #2 \n"
222 "add r10, r10, r9, asl #4 \n"
223 "add r9, r10, r9, asl #8 \n"
224
225 "add r11, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */
226 "add r2, r11, r2, asl #5 \n"
227 "add r2, r9, r2, asl #3 \n"
228
229 "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */
230 "rsb r10, r10, r4, asl #6 \n"
231 "add r4, r4, r10, asl #3 \n"
232 "sub r4, r9, r4, asl #1 \n"
233 /* t2 & t3 are 1/4 final value here */
234 "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */
235 "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */
236 "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */
237 "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */
238
239 "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */
240 "add r10, r9, r9, asl #4 \n"
241 "add r10, r10, r10, asl #5 \n"
242 "add r9, r10, r9, asl #2 \n"
243
244 "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */
245 "add r10, r10, r10, asl #5 \n"
246 "add r5, r10, r5, asl #3 \n"
247 "add r5, r9, r5, asl #2 \n"
248
249 "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */
250 "add r10, r10, r10, asl #4 \n"
251 "add r10, r10, r8, asl #7 \n"
252 "rsb r8, r8, r10, asl #3 \n"
253 "sub r8, r9, r8, asl #1 \n"
254
255 "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */
256 "add r10, r9, r9, asl #3 \n"
257 "add r10, r10, r10, asl #5 \n"
258 "add r9, r10, r9, asl #2 \n"
259
260 "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */
261 "add r10, r10, r7, asl #4 \n"
262 "rsb r7, r7, r10, asl #5 \n"
263 "rsb r7, r7, r9, asl #3 \n"
264
265 "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */
266 "sub r10, r10, r6, asl #6 \n"
267 "add r10, r10, r6, asl #12 \n"
268 "add r6, r10, r6 \n"
269 "rsb r6, r6, r9, asl #3 \n"
270 /* t0=r5, t1=r8, t2=r7, t3=r6*/
271 "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */
272 "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */
273 "sub r5, r5, r7 \n" /* t0 -= t2 */
274 "sub r8, r8, r6 \n" /* t1 -= t3 */
275 "add r6, r5, r8 \n" /* r6 = t0 + t1 */
276 "sub r7, r5, r8 \n" /* r7 = t0 - t1 */
277
278 "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r5*(181/128) */
279 "add r11, r11, r11, asr #5 \n"
280 "add r6, r11, r6, asr #3 \n"
281 "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r6*(181/128) */
282 "add r11, r11, r11, asr #5 \n"
283 "add r7, r11, r7, asr #3 \n"
284 /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */
285 /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */
286 "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 17 */
287 "mov r5, r5, asr #17 \n"
288 "strh r5, [r0, #0*8] \n"
289 "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 17 */
290 "mov r8, r8, asr #17 \n"
291 "strh r8, [r0, #2*8] \n"
292 "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 17 */
293 "mov r5, r5, asr #17 \n"
294 "strh r5, [r0, #4*8] \n"
295 "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 17 */
296 "mov r8, r8, asr #17 \n"
297 "strh r8, [r0, #6*8] \n"
298 "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 17 */
299 "mov r5, r5, asr #17 \n"
300 "strh r5, [r0, #8*8] \n"
301 "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 17 */
302 "mov r8, r8, asr #17 \n"
303 "strh r8, [r0, #10*8] \n"
304 "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 17 */
305 "mov r5, r5, asr #17 \n"
306 "strh r5, [r0, #12*8] \n"
307 "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 17 */
308 "mov r8, r8, asr #17 \n"
309 "strh r8, [r0, #14*8] \n"
310 "add r0, r0, #2 \n"
311 "cmp r0, r12 \n"
312 "blo 4b \n"
313 "sub r0, r0, #16 \n"
314 "bx lr \n"
315 );
316 (void)block;
317}
318
319static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
320 const int stride) __attribute__((naked));
321static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
322 const int stride)
323{
324 asm volatile(
325 "stmfd sp!, { r1-r2, \
326 r4-r12, lr } \n"
327 "bl idct \n"
328 "ldmfd sp!, { r1-r2 } \n"
329 "mov r11, #0 \n"
330 "add r12, r0, #128 \n"
331 "1: \n"
332 "ldrsh r3, [r0, #0] \n"
333 "ldrsh r4, [r0, #2] \n"
334 "ldrsh r5, [r0, #4] \n"
335 "ldrsh r6, [r0, #6] \n"
336 "ldrsh r7, [r0, #8] \n"
337 "ldrsh r8, [r0, #10] \n"
338 "ldrsh r9, [r0, #12] \n"
339 "ldrsh r10, [r0, #14] \n"
340 "cmp r3, #255 \n"
341 "mvnhi r3, r3, asr #31 \n"
342 "strb r3, [r1, #0] \n"
343 "str r11, [r0], #4 \n"
344 "cmp r4, #255 \n"
345 "mvnhi r4, r4, asr #31 \n"
346 "strb r4, [r1, #1] \n"
347 "cmp r5, #255 \n"
348 "mvnhi r5, r5, asr #31 \n"
349 "strb r5, [r1, #2] \n"
350 "str r11, [r0], #4 \n"
351 "cmp r6, #255 \n"
352 "mvnhi r6, r6, asr #31 \n"
353 "strb r6, [r1, #3] \n"
354 "cmp r7, #255 \n"
355 "mvnhi r7, r7, asr #31 \n"
356 "strb r7, [r1, #4] \n"
357 "str r11, [r0], #4 \n"
358 "cmp r8, #255 \n"
359 "mvnhi r8, r8, asr #31 \n"
360 "strb r8, [r1, #5] \n"
361 "cmp r9, #255 \n"
362 "mvnhi r9, r9, asr #31 \n"
363 "strb r9, [r1, #6] \n"
364 "str r11, [r0], #4 \n"
365 "cmp r10, #255 \n"
366 "mvnhi r10, r10, asr #31 \n"
367 "strb r10, [r1, #7] \n"
368 "add r1, r1, r2 \n"
369 "cmp r0, r12 \n"
370 "blo 1b \n"
371 "ldmfd sp!, { r4-r12, pc } \n"
372 );
373 (void)block; (void)dest; (void)stride;
374}
375
376static void mpeg2_idct_add_c (int last, int16_t * block,
377 uint8_t * dest, const int stride) __attribute__((naked));
378static void mpeg2_idct_add_c (int last, int16_t * block,
379 uint8_t * dest, const int stride)
380{
381 asm volatile (
382 "cmp r0, #129 \n"
383 "mov r0, r1 \n"
384 "ldreqsh r1, [r0, #0] \n"
385 "bne 1f \n"
386 "and r1, r1, #0x70 \n"
387 "cmp r1, #0x40 \n"
388 "bne 3f \n"
389 "1: \n"
390 "stmfd sp!, { r2-r12, lr } \n"
391 "bl idct \n"
392 "ldmfd sp!, { r1-r2 } \n"
393 "mov r11, #0 \n"
394 "add r12, r0, #128 \n"
395 "2: \n"
396 "ldrb r3, [r1, #0] \n"
397 "ldrb r4, [r1, #1] \n"
398 "ldrb r5, [r1, #2] \n"
399 "ldrb r6, [r1, #3] \n"
400 "ldrsh r7, [r0, #0] \n"
401 "ldrsh r8, [r0, #2] \n"
402 "ldrsh r9, [r0, #4] \n"
403 "ldrsh r10, [r0, #6] \n"
404 "add r7, r7, r3 \n"
405 "ldrb r3, [r1, #4] \n"
406 "cmp r7, #255 \n"
407 "mvnhi r7, r7, asr #31 \n"
408 "strb r7, [r1, #0] \n"
409 "ldrsh r7, [r0, #8] \n"
410 "add r8, r8, r4 \n"
411 "ldrb r4, [r1, #5] \n"
412 "cmp r8, #255 \n"
413 "mvnhi r8, r8, asr #31 \n"
414 "strb r8, [r1, #1] \n"
415 "ldrsh r8, [r0, #10] \n"
416 "add r9, r9, r5 \n"
417 "ldrb r5, [r1, #6] \n"
418 "cmp r9, #255 \n"
419 "mvnhi r9, r9, asr #31 \n"
420 "strb r9, [r1, #2] \n"
421 "ldrsh r9, [r0, #12] \n"
422 "add r10, r10, r6 \n"
423 "ldrb r6, [r1, #7] \n"
424 "cmp r10, #255 \n"
425 "mvnhi r10, r10, asr #31 \n"
426 "strb r10, [r1, #3] \n"
427 "ldrsh r10, [r0, #14] \n"
428 "str r11, [r0], #4 \n"
429 "add r7, r7, r3 \n"
430 "cmp r7, #255 \n"
431 "mvnhi r7, r7, asr #31 \n"
432 "strb r7, [r1, #4] \n"
433 "str r11, [r0], #4 \n"
434 "add r8, r8, r4 \n"
435 "cmp r8, #255 \n"
436 "mvnhi r8, r8, asr #31 \n"
437 "strb r8, [r1, #5] \n"
438 "str r11, [r0], #4 \n"
439 "add r9, r9, r5 \n"
440 "cmp r9, #255 \n"
441 "mvnhi r9, r9, asr #31 \n"
442 "strb r9, [r1, #6] \n"
443 "add r10, r10, r6 \n"
444 "cmp r10, #255 \n"
445 "mvnhi r10, r10, asr #31 \n"
446 "strb r10, [r1, #7] \n"
447 "str r11, [r0], #4 \n"
448 "add r1, r1, r2 \n"
449 "cmp r0, r12 \n"
450 "blo 2b \n"
451 "ldmfd sp!, { r4-r12, pc } \n"
452 "3: \n"
453 "stmfd sp!, { r4-r11 } \n"
454 "ldrsh r1, [r0, #0] \n" /* r1 = block[0] */
455 "mov r11, #0 \n"
456 "strh r11, [r0, #0] \n" /* block[0] = 0 */
457 "strh r11, [r0, #126] \n" /* block[63] = 0 */
458 "add r1, r1, #64 \n" /* r1 = DC << 7 */
459 "add r0, r2, r3, asl #3 \n"
460 "4: \n"
461 "ldrb r4, [r2, #0] \n"
462 "ldrb r5, [r2, #1] \n"
463 "ldrb r6, [r2, #2] \n"
464 "ldrb r7, [r2, #3] \n"
465 "ldrb r8, [r2, #4] \n"
466 "ldrb r9, [r2, #5] \n"
467 "ldrb r10, [r2, #6] \n"
468 "ldrb r11, [r2, #7] \n"
469 "add r4, r4, r1, asr #7 \n"
470 "cmp r4, #255 \n"
471 "mvnhi r4, r4, asr #31 \n"
472 "strb r4, [r2, #0] \n"
473 "add r5, r5, r1, asr #7 \n"
474 "cmp r5, #255 \n"
475 "mvnhi r5, r5, asr #31 \n"
476 "strb r5, [r2, #1] \n"
477 "add r6, r6, r1, asr #7 \n"
478 "cmp r6, #255 \n"
479 "mvnhi r6, r6, asr #31 \n"
480 "strb r6, [r2, #2] \n"
481 "add r7, r7, r1, asr #7 \n"
482 "cmp r7, #255 \n"
483 "mvnhi r7, r7, asr #31 \n"
484 "strb r7, [r2, #3] \n"
485 "add r8, r8, r1, asr #7 \n"
486 "cmp r8, #255 \n"
487 "mvnhi r8, r8, asr #31 \n"
488 "strb r8, [r2, #4] \n"
489 "add r9, r9, r1, asr #7 \n"
490 "cmp r9, #255 \n"
491 "mvnhi r9, r9, asr #31 \n"
492 "strb r9, [r2, #5] \n"
493 "add r10, r10, r1, asr #7 \n"
494 "cmp r10, #255 \n"
495 "mvnhi r10, r10, asr #31 \n"
496 "strb r10, [r2, #6] \n"
497 "add r11, r11, r1, asr #7 \n"
498 "cmp r11, #255 \n"
499 "mvnhi r11, r11, asr #31 \n"
500 "strb r11, [r2, #7] \n"
501 "add r2, r2, r3 \n"
502 "cmp r2, r0 \n"
503 "blo 4b \n"
504 "ldmfd sp!, { r4-r11 } \n"
505 "bx lr \n"
506 );
507 (void)last; (void)block; (void)dest; (void)stride;
508}
509
510void mpeg2_idct_init (void)
511{
512 extern uint8_t default_mpeg2_scan_norm[64];
513 extern uint8_t default_mpeg2_scan_alt[64];
514 extern uint8_t mpeg2_scan_norm[64];
515 extern uint8_t mpeg2_scan_alt[64];
516 int i, j;
517
518 mpeg2_idct_copy = mpeg2_idct_copy_c;
519 mpeg2_idct_add = mpeg2_idct_add_c;
520
521 for (i = 0; i < 64; i++)
522 {
523 j = default_mpeg2_scan_norm[i];
524 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
525
526 j = default_mpeg2_scan_alt[i];
527 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
528 }
529}