diff options
author | Jens Arnold <amiconn@rockbox.org> | 2007-10-17 00:29:44 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2007-10-17 00:29:44 +0000 |
commit | 45d4345405d06360370c90f2b3e4036e7df968d1 (patch) | |
tree | 981fe8735085d2af9aa4f6a0737b8fe893f7c5a2 /apps/plugins/mpegplayer/idct_arm_c.c | |
parent | 4829f7835a9f1965263438f2ee9587dc19408397 (diff) | |
download | rockbox-45d4345405d06360370c90f2b3e4036e7df968d1.tar.gz rockbox-45d4345405d06360370c90f2b3e4036e7df968d1.zip |
Mpegplayer: Convert the assembler optimised ARM IDCT into a proper assembler file. Clean up the CPU selection in idct.c.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15158 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/mpegplayer/idct_arm_c.c')
-rw-r--r-- | apps/plugins/mpegplayer/idct_arm_c.c | 529 |
1 files changed, 0 insertions, 529 deletions
diff --git a/apps/plugins/mpegplayer/idct_arm_c.c b/apps/plugins/mpegplayer/idct_arm_c.c deleted file mode 100644 index 9805f421a6..0000000000 --- a/apps/plugins/mpegplayer/idct_arm_c.c +++ /dev/null | |||
@@ -1,529 +0,0 @@ | |||
1 | /* | ||
2 | * idct.c | ||
3 | * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> | ||
4 | * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | ||
5 | * | ||
6 | * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | ||
7 | * See http://libmpeg2.sourceforge.net/ for updates. | ||
8 | * | ||
9 | * mpeg2dec is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * mpeg2dec is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #include "plugin.h" | ||
25 | |||
26 | #include "mpeg2dec_config.h" | ||
27 | |||
28 | #include "mpeg2.h" | ||
29 | #include "attributes.h" | ||
30 | #include "mpeg2_internal.h" | ||
31 | |||
32 | /* 101100011001 */ | ||
33 | #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ | ||
34 | /* 101001110100 */ | ||
35 | #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ | ||
36 | /* 100101101000 */ | ||
37 | #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ | ||
38 | /* 011001001001 */ | ||
39 | #define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ | ||
40 | /* 010001010100 */ | ||
41 | #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ | ||
42 | /* 001000110101 */ | ||
43 | #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ | ||
44 | |||
45 | /* idct main entry point */ | ||
46 | void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); | ||
47 | void (* mpeg2_idct_add) (int last, int16_t * block, | ||
48 | uint8_t * dest, int stride); | ||
49 | |||
50 | /* | ||
51 | * In legal streams, the IDCT output should be between -384 and +384. | ||
52 | * In corrupted streams, it is possible to force the IDCT output to go | ||
53 | * to +-3826 - this is the worst case for a column IDCT where the | ||
54 | * column inputs are 16-bit values. | ||
55 | */ | ||
56 | #if 0 | ||
57 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | ||
58 | do { \ | ||
59 | t0 = W0 * d0 + W1 * d1; \ | ||
60 | t1 = W0 * d1 - W1 * d0; \ | ||
61 | } while (0) | ||
62 | #else | ||
63 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | ||
64 | do { \ | ||
65 | int tmp = W0 * (d0 + d1); \ | ||
66 | t0 = tmp + (W1 - W0) * d1; \ | ||
67 | t1 = tmp - (W1 + W0) * d0; \ | ||
68 | } while (0) | ||
69 | #endif | ||
70 | |||
71 | /* Custom calling convention: | ||
72 | * r0 contains block pointer and is non-volatile | ||
73 | * all non-volatile c context saved and restored on its behalf | ||
74 | */ | ||
75 | static void idct (int16_t * block) __attribute__((naked,used)); | ||
76 | static void idct (int16_t * block) | ||
77 | { | ||
78 | asm volatile ( | ||
79 | "add r12, r0, #128 \n" | ||
80 | "1: \n" | ||
81 | "ldrsh r1, [r0, #0] \n" /* d0 */ | ||
82 | "ldrsh r2, [r0, #2] \n" /* d1 */ | ||
83 | "ldrsh r3, [r0, #4] \n" /* d2 */ | ||
84 | "ldrsh r4, [r0, #6] \n" /* d3 */ | ||
85 | "ldrsh r5, [r0, #8] \n" /* d0 */ | ||
86 | "ldrsh r6, [r0, #10] \n" /* d1 */ | ||
87 | "ldrsh r7, [r0, #12] \n" /* d2 */ | ||
88 | "ldrsh r8, [r0, #14] \n" /* d3 */ | ||
89 | "orrs r9, r2, r3 \n" | ||
90 | "orreqs r9, r4, r5 \n" | ||
91 | "orreqs r9, r6, r7 \n" | ||
92 | "cmpeq r8, #0 \n" | ||
93 | "bne 2f \n" | ||
94 | "mov r1, r1, asl #15 \n" | ||
95 | "bic r1, r1, #0x8000 \n" | ||
96 | "orr r1, r1, r1, lsr #16 \n" | ||
97 | "str r1, [r0], #4 \n" | ||
98 | "str r1, [r0], #4 \n" | ||
99 | "str r1, [r0], #4 \n" | ||
100 | "str r1, [r0], #4 \n" | ||
101 | "cmp r0, r12 \n" | ||
102 | "blo 1b \n" | ||
103 | "b 3f \n" | ||
104 | "2: \n" | ||
105 | "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
106 | "add r1, r1, #2048 \n" | ||
107 | "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + (block[2] << 11) */ | ||
108 | "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - (block[2] << 11) */ | ||
109 | |||
110 | "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
111 | "add r10, r9, r9, asl #2 \n" | ||
112 | "add r10, r10, r9, asl #4 \n" | ||
113 | "add r9, r10, r9, asl #8 \n" | ||
114 | |||
115 | "add r10, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
116 | "add r2, r10, r2, asl #5 \n" | ||
117 | "add r2, r9, r2, asl #3 \n" | ||
118 | |||
119 | "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
120 | "rsb r10, r10, r4, asl #6 \n" | ||
121 | "add r4, r4, r10, asl #3 \n" | ||
122 | "sub r4, r9, r4, asl #1 \n" | ||
123 | /* t2 & t3 are 1/4 final value here */ | ||
124 | "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */ | ||
125 | "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */ | ||
126 | "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */ | ||
127 | "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */ | ||
128 | |||
129 | "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */ | ||
130 | "add r10, r9, r9, asl #4 \n" | ||
131 | "add r10, r10, r10, asl #5 \n" | ||
132 | "add r9, r10, r9, asl #2 \n" | ||
133 | |||
134 | "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
135 | "add r10, r10, r10, asl #5 \n" | ||
136 | "add r5, r10, r5, asl #3 \n" | ||
137 | "add r5, r9, r5, asl #2 \n" | ||
138 | |||
139 | "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
140 | "add r10, r10, r10, asl #4 \n" | ||
141 | "add r10, r10, r8, asl #7 \n" | ||
142 | "rsb r8, r8, r10, asl #3 \n" | ||
143 | "sub r8, r9, r8, asl #1 \n" | ||
144 | |||
145 | "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
146 | "add r10, r9, r9, asl #3 \n" | ||
147 | "add r10, r10, r10, asl #5 \n" | ||
148 | "add r9, r10, r9, asl #2 \n" | ||
149 | |||
150 | "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
151 | "add r10, r10, r7, asl #4 \n" | ||
152 | "rsb r7, r7, r10, asl #5 \n" | ||
153 | "rsb r7, r7, r9, asl #3 \n" | ||
154 | |||
155 | "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
156 | "sub r10, r10, r6, asl #6 \n" | ||
157 | "add r10, r10, r6, asl #12 \n" | ||
158 | "add r6, r10, r6 \n" | ||
159 | "rsb r6, r6, r9, asl #3 \n" | ||
160 | /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/ | ||
161 | "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */ | ||
162 | "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */ | ||
163 | "sub r5, r5, r7 \n" /* t0 -= t2 */ | ||
164 | "sub r8, r8, r6 \n" /* t1 -= t3 */ | ||
165 | "add r6, r5, r8 \n" /* r6 = t0 + t1 */ | ||
166 | "sub r7, r5, r8 \n" /* r7 = t0 - t1 */ | ||
167 | |||
168 | "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r6*(181/128) */ | ||
169 | "add r11, r11, r11, asr #5 \n" | ||
170 | "add r6, r11, r6, asr #3 \n" | ||
171 | "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r7*(181/128) */ | ||
172 | "add r11, r11, r11, asr #5 \n" | ||
173 | "add r7, r11, r7, asr #3 \n" | ||
174 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
175 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
176 | "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 12 */ | ||
177 | "mov r5, r5, asr #12 \n" | ||
178 | "strh r5, [r0], #2 \n" | ||
179 | "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 12 */ | ||
180 | "mov r8, r8, asr #12 \n" | ||
181 | "strh r8, [r0], #2 \n" | ||
182 | "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 12 */ | ||
183 | "mov r5, r5, asr #12 \n" | ||
184 | "strh r5, [r0], #2 \n" | ||
185 | "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 12 */ | ||
186 | "mov r8, r8, asr #12 \n" | ||
187 | "strh r8, [r0], #2 \n" | ||
188 | "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 12 */ | ||
189 | "mov r5, r5, asr #12 \n" | ||
190 | "strh r5, [r0], #2 \n" | ||
191 | "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 12 */ | ||
192 | "mov r8, r8, asr #12 \n" | ||
193 | "strh r8, [r0], #2 \n" | ||
194 | "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 12 */ | ||
195 | "mov r5, r5, asr #12 \n" | ||
196 | "strh r5, [r0], #2 \n" | ||
197 | "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 12 */ | ||
198 | "mov r8, r8, asr #12 \n" | ||
199 | "strh r8, [r0], #2 \n" | ||
200 | "cmp r0, r12 \n" | ||
201 | "blo 1b \n" | ||
202 | "3: \n" | ||
203 | "sub r0, r0, #128 \n" | ||
204 | "add r12, r0, #16 \n" | ||
205 | "4: \n" | ||
206 | "ldrsh r1, [r0, #0*8] \n" /* d0 */ | ||
207 | "ldrsh r2, [r0, #2*8] \n" /* d1 */ | ||
208 | "ldrsh r3, [r0, #4*8] \n" /* d2 */ | ||
209 | "ldrsh r4, [r0, #6*8] \n" /* d3 */ | ||
210 | "ldrsh r5, [r0, #8*8] \n" /* d0 */ | ||
211 | "ldrsh r6, [r0, #10*8] \n" /* d1 */ | ||
212 | "ldrsh r7, [r0, #12*8] \n" /* d2 */ | ||
213 | "ldrsh r8, [r0, #14*8] \n" /* d3 */ | ||
214 | |||
215 | "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
216 | "add r1, r1, #65536 \n" | ||
217 | "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + d2:(block[2] << 11) */ | ||
218 | "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - d2:(block[2] << 11) */ | ||
219 | |||
220 | "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
221 | "add r10, r9, r9, asl #2 \n" | ||
222 | "add r10, r10, r9, asl #4 \n" | ||
223 | "add r9, r10, r9, asl #8 \n" | ||
224 | |||
225 | "add r11, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
226 | "add r2, r11, r2, asl #5 \n" | ||
227 | "add r2, r9, r2, asl #3 \n" | ||
228 | |||
229 | "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
230 | "rsb r10, r10, r4, asl #6 \n" | ||
231 | "add r4, r4, r10, asl #3 \n" | ||
232 | "sub r4, r9, r4, asl #1 \n" | ||
233 | /* t2 & t3 are 1/4 final value here */ | ||
234 | "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */ | ||
235 | "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */ | ||
236 | "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */ | ||
237 | "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */ | ||
238 | |||
239 | "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */ | ||
240 | "add r10, r9, r9, asl #4 \n" | ||
241 | "add r10, r10, r10, asl #5 \n" | ||
242 | "add r9, r10, r9, asl #2 \n" | ||
243 | |||
244 | "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
245 | "add r10, r10, r10, asl #5 \n" | ||
246 | "add r5, r10, r5, asl #3 \n" | ||
247 | "add r5, r9, r5, asl #2 \n" | ||
248 | |||
249 | "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
250 | "add r10, r10, r10, asl #4 \n" | ||
251 | "add r10, r10, r8, asl #7 \n" | ||
252 | "rsb r8, r8, r10, asl #3 \n" | ||
253 | "sub r8, r9, r8, asl #1 \n" | ||
254 | |||
255 | "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
256 | "add r10, r9, r9, asl #3 \n" | ||
257 | "add r10, r10, r10, asl #5 \n" | ||
258 | "add r9, r10, r9, asl #2 \n" | ||
259 | |||
260 | "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
261 | "add r10, r10, r7, asl #4 \n" | ||
262 | "rsb r7, r7, r10, asl #5 \n" | ||
263 | "rsb r7, r7, r9, asl #3 \n" | ||
264 | |||
265 | "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
266 | "sub r10, r10, r6, asl #6 \n" | ||
267 | "add r10, r10, r6, asl #12 \n" | ||
268 | "add r6, r10, r6 \n" | ||
269 | "rsb r6, r6, r9, asl #3 \n" | ||
270 | /* t0=r5, t1=r8, t2=r7, t3=r6*/ | ||
271 | "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */ | ||
272 | "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */ | ||
273 | "sub r5, r5, r7 \n" /* t0 -= t2 */ | ||
274 | "sub r8, r8, r6 \n" /* t1 -= t3 */ | ||
275 | "add r6, r5, r8 \n" /* r6 = t0 + t1 */ | ||
276 | "sub r7, r5, r8 \n" /* r7 = t0 - t1 */ | ||
277 | |||
278 | "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r5*(181/128) */ | ||
279 | "add r11, r11, r11, asr #5 \n" | ||
280 | "add r6, r11, r6, asr #3 \n" | ||
281 | "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r6*(181/128) */ | ||
282 | "add r11, r11, r11, asr #5 \n" | ||
283 | "add r7, r11, r7, asr #3 \n" | ||
284 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
285 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
286 | "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 17 */ | ||
287 | "mov r5, r5, asr #17 \n" | ||
288 | "strh r5, [r0, #0*8] \n" | ||
289 | "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 17 */ | ||
290 | "mov r8, r8, asr #17 \n" | ||
291 | "strh r8, [r0, #2*8] \n" | ||
292 | "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 17 */ | ||
293 | "mov r5, r5, asr #17 \n" | ||
294 | "strh r5, [r0, #4*8] \n" | ||
295 | "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 17 */ | ||
296 | "mov r8, r8, asr #17 \n" | ||
297 | "strh r8, [r0, #6*8] \n" | ||
298 | "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 17 */ | ||
299 | "mov r5, r5, asr #17 \n" | ||
300 | "strh r5, [r0, #8*8] \n" | ||
301 | "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 17 */ | ||
302 | "mov r8, r8, asr #17 \n" | ||
303 | "strh r8, [r0, #10*8] \n" | ||
304 | "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 17 */ | ||
305 | "mov r5, r5, asr #17 \n" | ||
306 | "strh r5, [r0, #12*8] \n" | ||
307 | "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 17 */ | ||
308 | "mov r8, r8, asr #17 \n" | ||
309 | "strh r8, [r0, #14*8] \n" | ||
310 | "add r0, r0, #2 \n" | ||
311 | "cmp r0, r12 \n" | ||
312 | "blo 4b \n" | ||
313 | "sub r0, r0, #16 \n" | ||
314 | "bx lr \n" | ||
315 | ); | ||
316 | (void)block; | ||
317 | } | ||
318 | |||
319 | static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, | ||
320 | const int stride) __attribute__((naked)); | ||
321 | static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, | ||
322 | const int stride) | ||
323 | { | ||
324 | asm volatile( | ||
325 | "stmfd sp!, { r1-r2, \ | ||
326 | r4-r12, lr } \n" | ||
327 | "bl idct \n" | ||
328 | "ldmfd sp!, { r1-r2 } \n" | ||
329 | "mov r11, #0 \n" | ||
330 | "add r12, r0, #128 \n" | ||
331 | "1: \n" | ||
332 | "ldrsh r3, [r0, #0] \n" | ||
333 | "ldrsh r4, [r0, #2] \n" | ||
334 | "ldrsh r5, [r0, #4] \n" | ||
335 | "ldrsh r6, [r0, #6] \n" | ||
336 | "ldrsh r7, [r0, #8] \n" | ||
337 | "ldrsh r8, [r0, #10] \n" | ||
338 | "ldrsh r9, [r0, #12] \n" | ||
339 | "ldrsh r10, [r0, #14] \n" | ||
340 | "cmp r3, #255 \n" | ||
341 | "mvnhi r3, r3, asr #31 \n" | ||
342 | "strb r3, [r1, #0] \n" | ||
343 | "str r11, [r0], #4 \n" | ||
344 | "cmp r4, #255 \n" | ||
345 | "mvnhi r4, r4, asr #31 \n" | ||
346 | "strb r4, [r1, #1] \n" | ||
347 | "cmp r5, #255 \n" | ||
348 | "mvnhi r5, r5, asr #31 \n" | ||
349 | "strb r5, [r1, #2] \n" | ||
350 | "str r11, [r0], #4 \n" | ||
351 | "cmp r6, #255 \n" | ||
352 | "mvnhi r6, r6, asr #31 \n" | ||
353 | "strb r6, [r1, #3] \n" | ||
354 | "cmp r7, #255 \n" | ||
355 | "mvnhi r7, r7, asr #31 \n" | ||
356 | "strb r7, [r1, #4] \n" | ||
357 | "str r11, [r0], #4 \n" | ||
358 | "cmp r8, #255 \n" | ||
359 | "mvnhi r8, r8, asr #31 \n" | ||
360 | "strb r8, [r1, #5] \n" | ||
361 | "cmp r9, #255 \n" | ||
362 | "mvnhi r9, r9, asr #31 \n" | ||
363 | "strb r9, [r1, #6] \n" | ||
364 | "str r11, [r0], #4 \n" | ||
365 | "cmp r10, #255 \n" | ||
366 | "mvnhi r10, r10, asr #31 \n" | ||
367 | "strb r10, [r1, #7] \n" | ||
368 | "add r1, r1, r2 \n" | ||
369 | "cmp r0, r12 \n" | ||
370 | "blo 1b \n" | ||
371 | "ldmfd sp!, { r4-r12, pc } \n" | ||
372 | ); | ||
373 | (void)block; (void)dest; (void)stride; | ||
374 | } | ||
375 | |||
376 | static void mpeg2_idct_add_c (int last, int16_t * block, | ||
377 | uint8_t * dest, const int stride) __attribute__((naked)); | ||
378 | static void mpeg2_idct_add_c (int last, int16_t * block, | ||
379 | uint8_t * dest, const int stride) | ||
380 | { | ||
381 | asm volatile ( | ||
382 | "cmp r0, #129 \n" | ||
383 | "mov r0, r1 \n" | ||
384 | "ldreqsh r1, [r0, #0] \n" | ||
385 | "bne 1f \n" | ||
386 | "and r1, r1, #0x70 \n" | ||
387 | "cmp r1, #0x40 \n" | ||
388 | "bne 3f \n" | ||
389 | "1: \n" | ||
390 | "stmfd sp!, { r2-r12, lr } \n" | ||
391 | "bl idct \n" | ||
392 | "ldmfd sp!, { r1-r2 } \n" | ||
393 | "mov r11, #0 \n" | ||
394 | "add r12, r0, #128 \n" | ||
395 | "2: \n" | ||
396 | "ldrb r3, [r1, #0] \n" | ||
397 | "ldrb r4, [r1, #1] \n" | ||
398 | "ldrb r5, [r1, #2] \n" | ||
399 | "ldrb r6, [r1, #3] \n" | ||
400 | "ldrsh r7, [r0, #0] \n" | ||
401 | "ldrsh r8, [r0, #2] \n" | ||
402 | "ldrsh r9, [r0, #4] \n" | ||
403 | "ldrsh r10, [r0, #6] \n" | ||
404 | "add r7, r7, r3 \n" | ||
405 | "ldrb r3, [r1, #4] \n" | ||
406 | "cmp r7, #255 \n" | ||
407 | "mvnhi r7, r7, asr #31 \n" | ||
408 | "strb r7, [r1, #0] \n" | ||
409 | "ldrsh r7, [r0, #8] \n" | ||
410 | "add r8, r8, r4 \n" | ||
411 | "ldrb r4, [r1, #5] \n" | ||
412 | "cmp r8, #255 \n" | ||
413 | "mvnhi r8, r8, asr #31 \n" | ||
414 | "strb r8, [r1, #1] \n" | ||
415 | "ldrsh r8, [r0, #10] \n" | ||
416 | "add r9, r9, r5 \n" | ||
417 | "ldrb r5, [r1, #6] \n" | ||
418 | "cmp r9, #255 \n" | ||
419 | "mvnhi r9, r9, asr #31 \n" | ||
420 | "strb r9, [r1, #2] \n" | ||
421 | "ldrsh r9, [r0, #12] \n" | ||
422 | "add r10, r10, r6 \n" | ||
423 | "ldrb r6, [r1, #7] \n" | ||
424 | "cmp r10, #255 \n" | ||
425 | "mvnhi r10, r10, asr #31 \n" | ||
426 | "strb r10, [r1, #3] \n" | ||
427 | "ldrsh r10, [r0, #14] \n" | ||
428 | "str r11, [r0], #4 \n" | ||
429 | "add r7, r7, r3 \n" | ||
430 | "cmp r7, #255 \n" | ||
431 | "mvnhi r7, r7, asr #31 \n" | ||
432 | "strb r7, [r1, #4] \n" | ||
433 | "str r11, [r0], #4 \n" | ||
434 | "add r8, r8, r4 \n" | ||
435 | "cmp r8, #255 \n" | ||
436 | "mvnhi r8, r8, asr #31 \n" | ||
437 | "strb r8, [r1, #5] \n" | ||
438 | "str r11, [r0], #4 \n" | ||
439 | "add r9, r9, r5 \n" | ||
440 | "cmp r9, #255 \n" | ||
441 | "mvnhi r9, r9, asr #31 \n" | ||
442 | "strb r9, [r1, #6] \n" | ||
443 | "add r10, r10, r6 \n" | ||
444 | "cmp r10, #255 \n" | ||
445 | "mvnhi r10, r10, asr #31 \n" | ||
446 | "strb r10, [r1, #7] \n" | ||
447 | "str r11, [r0], #4 \n" | ||
448 | "add r1, r1, r2 \n" | ||
449 | "cmp r0, r12 \n" | ||
450 | "blo 2b \n" | ||
451 | "ldmfd sp!, { r4-r12, pc } \n" | ||
452 | "3: \n" | ||
453 | "stmfd sp!, { r4-r11 } \n" | ||
454 | "ldrsh r1, [r0, #0] \n" /* r1 = block[0] */ | ||
455 | "mov r11, #0 \n" | ||
456 | "strh r11, [r0, #0] \n" /* block[0] = 0 */ | ||
457 | "strh r11, [r0, #126] \n" /* block[63] = 0 */ | ||
458 | "add r1, r1, #64 \n" /* r1 = DC << 7 */ | ||
459 | "add r0, r2, r3, asl #3 \n" | ||
460 | "4: \n" | ||
461 | "ldrb r4, [r2, #0] \n" | ||
462 | "ldrb r5, [r2, #1] \n" | ||
463 | "ldrb r6, [r2, #2] \n" | ||
464 | "ldrb r7, [r2, #3] \n" | ||
465 | "ldrb r8, [r2, #4] \n" | ||
466 | "ldrb r9, [r2, #5] \n" | ||
467 | "ldrb r10, [r2, #6] \n" | ||
468 | "ldrb r11, [r2, #7] \n" | ||
469 | "add r4, r4, r1, asr #7 \n" | ||
470 | "cmp r4, #255 \n" | ||
471 | "mvnhi r4, r4, asr #31 \n" | ||
472 | "strb r4, [r2, #0] \n" | ||
473 | "add r5, r5, r1, asr #7 \n" | ||
474 | "cmp r5, #255 \n" | ||
475 | "mvnhi r5, r5, asr #31 \n" | ||
476 | "strb r5, [r2, #1] \n" | ||
477 | "add r6, r6, r1, asr #7 \n" | ||
478 | "cmp r6, #255 \n" | ||
479 | "mvnhi r6, r6, asr #31 \n" | ||
480 | "strb r6, [r2, #2] \n" | ||
481 | "add r7, r7, r1, asr #7 \n" | ||
482 | "cmp r7, #255 \n" | ||
483 | "mvnhi r7, r7, asr #31 \n" | ||
484 | "strb r7, [r2, #3] \n" | ||
485 | "add r8, r8, r1, asr #7 \n" | ||
486 | "cmp r8, #255 \n" | ||
487 | "mvnhi r8, r8, asr #31 \n" | ||
488 | "strb r8, [r2, #4] \n" | ||
489 | "add r9, r9, r1, asr #7 \n" | ||
490 | "cmp r9, #255 \n" | ||
491 | "mvnhi r9, r9, asr #31 \n" | ||
492 | "strb r9, [r2, #5] \n" | ||
493 | "add r10, r10, r1, asr #7 \n" | ||
494 | "cmp r10, #255 \n" | ||
495 | "mvnhi r10, r10, asr #31 \n" | ||
496 | "strb r10, [r2, #6] \n" | ||
497 | "add r11, r11, r1, asr #7 \n" | ||
498 | "cmp r11, #255 \n" | ||
499 | "mvnhi r11, r11, asr #31 \n" | ||
500 | "strb r11, [r2, #7] \n" | ||
501 | "add r2, r2, r3 \n" | ||
502 | "cmp r2, r0 \n" | ||
503 | "blo 4b \n" | ||
504 | "ldmfd sp!, { r4-r11 } \n" | ||
505 | "bx lr \n" | ||
506 | ); | ||
507 | (void)last; (void)block; (void)dest; (void)stride; | ||
508 | } | ||
509 | |||
510 | void mpeg2_idct_init (void) | ||
511 | { | ||
512 | extern uint8_t default_mpeg2_scan_norm[64]; | ||
513 | extern uint8_t default_mpeg2_scan_alt[64]; | ||
514 | extern uint8_t mpeg2_scan_norm[64]; | ||
515 | extern uint8_t mpeg2_scan_alt[64]; | ||
516 | int i, j; | ||
517 | |||
518 | mpeg2_idct_copy = mpeg2_idct_copy_c; | ||
519 | mpeg2_idct_add = mpeg2_idct_add_c; | ||
520 | |||
521 | for (i = 0; i < 64; i++) | ||
522 | { | ||
523 | j = default_mpeg2_scan_norm[i]; | ||
524 | mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | ||
525 | |||
526 | j = default_mpeg2_scan_alt[i]; | ||
527 | mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | ||
528 | } | ||
529 | } | ||