diff options
author | Jens Arnold <amiconn@rockbox.org> | 2007-10-17 00:29:44 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2007-10-17 00:29:44 +0000 |
commit | 45d4345405d06360370c90f2b3e4036e7df968d1 (patch) | |
tree | 981fe8735085d2af9aa4f6a0737b8fe893f7c5a2 | |
parent | 4829f7835a9f1965263438f2ee9587dc19408397 (diff) | |
download | rockbox-45d4345405d06360370c90f2b3e4036e7df968d1.tar.gz rockbox-45d4345405d06360370c90f2b3e4036e7df968d1.zip |
Mpegplayer: Convert the assembler optimised ARM IDCT into a proper assembler file. Clean up the CPU selection in idct.c.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15158 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | apps/plugins/mpegplayer/SOURCES | 14 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/idct.c | 67 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/idct_arm.S | 440 | ||||
-rw-r--r-- | apps/plugins/mpegplayer/idct_arm_c.c | 529 |
4 files changed, 472 insertions, 578 deletions
diff --git a/apps/plugins/mpegplayer/SOURCES b/apps/plugins/mpegplayer/SOURCES index 004c6395a2..3d5a4c2375 100644 --- a/apps/plugins/mpegplayer/SOURCES +++ b/apps/plugins/mpegplayer/SOURCES | |||
@@ -1,22 +1,22 @@ | |||
1 | alloc.c | 1 | alloc.c |
2 | decode.c | 2 | decode.c |
3 | header.c | 3 | header.c |
4 | 4 | idct.c | |
5 | motion_comp.c | 5 | motion_comp.c |
6 | 6 | ||
7 | #ifdef CPU_COLDFIRE | ||
8 | idct_coldfire.S | ||
9 | #elif defined CPU_ARM | ||
10 | idct_arm.S | ||
11 | #endif | ||
12 | |||
7 | #ifdef CPU_ARM | 13 | #ifdef CPU_ARM |
8 | idct_arm_c.c | ||
9 | motion_comp_arm_c.c | 14 | motion_comp_arm_c.c |
10 | motion_comp_arm_s.S | 15 | motion_comp_arm_s.S |
11 | #else /* other CPU or SIM */ | 16 | #else /* other CPU or SIM */ |
12 | idct.c | ||
13 | motion_comp_c.c | 17 | motion_comp_c.c |
14 | #endif /* CPU_* */ | 18 | #endif /* CPU_* */ |
15 | 19 | ||
16 | #ifdef CPU_COLDFIRE | ||
17 | idct_coldfire.S | ||
18 | #endif | ||
19 | |||
20 | slice.c | 20 | slice.c |
21 | video_out_rockbox.c | 21 | video_out_rockbox.c |
22 | mpeg_settings.c | 22 | mpeg_settings.c |
diff --git a/apps/plugins/mpegplayer/idct.c b/apps/plugins/mpegplayer/idct.c index bf7097401e..de192e3549 100644 --- a/apps/plugins/mpegplayer/idct.c +++ b/apps/plugins/mpegplayer/idct.c | |||
@@ -29,6 +29,27 @@ | |||
29 | #include "attributes.h" | 29 | #include "attributes.h" |
30 | #include "mpeg2_internal.h" | 30 | #include "mpeg2_internal.h" |
31 | 31 | ||
32 | /* idct main entry point */ | ||
33 | void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); | ||
34 | void (* mpeg2_idct_add) (int last, int16_t * block, | ||
35 | uint8_t * dest, int stride); | ||
36 | |||
37 | #ifdef CPU_COLDFIRE | ||
38 | /* assembler functions */ | ||
39 | extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest, | ||
40 | const int stride); | ||
41 | extern void mpeg2_idct_add_coldfire(const int last, int16_t * block, | ||
42 | uint8_t * dest, const int stride); | ||
43 | |||
44 | #elif defined CPU_ARM | ||
45 | /* assembler functions */ | ||
46 | extern void mpeg2_idct_copy_arm(int16_t * block, uint8_t * dest, | ||
47 | const int stride); | ||
48 | extern void mpeg2_idct_add_arm(const int last, int16_t * block, | ||
49 | uint8_t * dest, const int stride); | ||
50 | |||
51 | #else /* !CPU_COLDFIE, !CPU_ARM */ | ||
52 | |||
32 | #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ | 53 | #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ |
33 | #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ | 54 | #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ |
34 | #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ | 55 | #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ |
@@ -36,53 +57,14 @@ | |||
36 | #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ | 57 | #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ |
37 | #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ | 58 | #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ |
38 | 59 | ||
39 | /* idct main entry point */ | ||
40 | void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); | ||
41 | void (* mpeg2_idct_add) (int last, int16_t * block, | ||
42 | uint8_t * dest, int stride); | ||
43 | |||
44 | /* | 60 | /* |
45 | * In legal streams, the IDCT output should be between -384 and +384. | 61 | * In legal streams, the IDCT output should be between -384 and +384. |
46 | * In corrupted streams, it is possible to force the IDCT output to go | 62 | * In corrupted streams, it is possible to force the IDCT output to go |
47 | * to +-3826 - this is the worst case for a column IDCT where the | 63 | * to +-3826 - this is the worst case for a column IDCT where the |
48 | * column inputs are 16-bit values. | 64 | * column inputs are 16-bit values. |
49 | */ | 65 | */ |
50 | #ifdef CPU_COLDFIRE | ||
51 | static inline unsigned CLIP(int value) | ||
52 | { | ||
53 | asm ( /* Note: Uses knowledge that only the low byte of the result is used */ | ||
54 | "cmp.l #255,%[v] \n" /* overflow? */ | ||
55 | "bls.b 1f \n" /* no: return value */ | ||
56 | "spl.b %[v] \n" /* yes: set low byte to appropriate boundary */ | ||
57 | "1: \n" | ||
58 | : /* outputs */ | ||
59 | [v]"+d"(value) | ||
60 | ); | ||
61 | return value; | ||
62 | } | ||
63 | #elif defined CPU_ARM | ||
64 | static inline unsigned CLIP(int value) | ||
65 | { | ||
66 | asm volatile ( /* Note: Uses knowledge that only the low byte of the result is used */ | ||
67 | "cmp %[v], #255 \n" | ||
68 | "mvnhi %[v], %[v], asr #31 \n" | ||
69 | : /* outputs */ | ||
70 | [v]"+r"(value) | ||
71 | ); | ||
72 | return value; | ||
73 | } | ||
74 | #else | ||
75 | uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR; | 66 | uint8_t mpeg2_clip[3840 * 2 + 256] IBSS_ATTR; |
76 | #define CLIP(i) ((mpeg2_clip + 3840)[i]) | 67 | #define CLIP(i) ((mpeg2_clip + 3840)[i]) |
77 | #endif | ||
78 | |||
79 | #ifdef CPU_COLDFIRE | ||
80 | /* assembler functions */ | ||
81 | extern void mpeg2_idct_copy_coldfire(int16_t * block, uint8_t * dest, | ||
82 | const int stride); | ||
83 | extern void mpeg2_idct_add_coldfire(const int last, int16_t * block, | ||
84 | uint8_t * dest, const int stride); | ||
85 | #else /* !CPU_COLDFIE */ | ||
86 | 68 | ||
87 | #if 0 | 69 | #if 0 |
88 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | 70 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ |
@@ -266,7 +248,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, | |||
266 | } | 248 | } |
267 | } | 249 | } |
268 | 250 | ||
269 | #endif /* !CPU_COLDFIRE */ | 251 | #endif /* CPU selection */ |
270 | 252 | ||
271 | void mpeg2_idct_init (void) | 253 | void mpeg2_idct_init (void) |
272 | { | 254 | { |
@@ -279,12 +261,13 @@ void mpeg2_idct_init (void) | |||
279 | #ifdef CPU_COLDFIRE | 261 | #ifdef CPU_COLDFIRE |
280 | mpeg2_idct_copy = mpeg2_idct_copy_coldfire; | 262 | mpeg2_idct_copy = mpeg2_idct_copy_coldfire; |
281 | mpeg2_idct_add = mpeg2_idct_add_coldfire; | 263 | mpeg2_idct_add = mpeg2_idct_add_coldfire; |
264 | #elif defined CPU_ARM | ||
265 | mpeg2_idct_copy = mpeg2_idct_copy_arm; | ||
266 | mpeg2_idct_add = mpeg2_idct_add_arm; | ||
282 | #else | 267 | #else |
283 | mpeg2_idct_copy = mpeg2_idct_copy_c; | 268 | mpeg2_idct_copy = mpeg2_idct_copy_c; |
284 | mpeg2_idct_add = mpeg2_idct_add_c; | 269 | mpeg2_idct_add = mpeg2_idct_add_c; |
285 | #endif | ||
286 | 270 | ||
287 | #if !defined(CPU_COLDFIRE) && !defined(CPU_ARM) | ||
288 | for (i = -3840; i < 3840 + 256; i++) | 271 | for (i = -3840; i < 3840 + 256; i++) |
289 | CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); | 272 | CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); |
290 | #endif | 273 | #endif |
diff --git a/apps/plugins/mpegplayer/idct_arm.S b/apps/plugins/mpegplayer/idct_arm.S new file mode 100644 index 0000000000..a340f40a07 --- /dev/null +++ b/apps/plugins/mpegplayer/idct_arm.S | |||
@@ -0,0 +1,440 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id $ | ||
9 | * | ||
10 | * Copyright (C) 2007 by Michael Sevakis | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | .global mpeg2_idct_copy_arm | ||
21 | .type mpeg2_idct_copy_arm, %function | ||
22 | .global mpeg2_idct_add_arm | ||
23 | .type mpeg2_idct_add_arm, %function | ||
24 | |||
25 | |||
26 | /* Custom calling convention: | ||
27 | * r0 contains block pointer and is non-volatile | ||
28 | * all non-volatile c context saved and restored on its behalf | ||
29 | */ | ||
30 | .idct: | ||
31 | add r12, r0, #128 | ||
32 | 1: | ||
33 | ldrsh r1, [r0, #0] /* d0 */ | ||
34 | ldrsh r2, [r0, #2] /* d1 */ | ||
35 | ldrsh r3, [r0, #4] /* d2 */ | ||
36 | ldrsh r4, [r0, #6] /* d3 */ | ||
37 | ldrsh r5, [r0, #8] /* d0 */ | ||
38 | ldrsh r6, [r0, #10] /* d1 */ | ||
39 | ldrsh r7, [r0, #12] /* d2 */ | ||
40 | ldrsh r8, [r0, #14] /* d3 */ | ||
41 | orrs r9, r2, r3 | ||
42 | orreqs r9, r4, r5 | ||
43 | orreqs r9, r6, r7 | ||
44 | cmpeq r8, #0 | ||
45 | bne 2f | ||
46 | mov r1, r1, asl #15 | ||
47 | bic r1, r1, #0x8000 | ||
48 | orr r1, r1, r1, lsr #16 | ||
49 | str r1, [r0], #4 | ||
50 | str r1, [r0], #4 | ||
51 | str r1, [r0], #4 | ||
52 | str r1, [r0], #4 | ||
53 | cmp r0, r12 | ||
54 | blo 1b | ||
55 | b 3f | ||
56 | 2: | ||
57 | mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
58 | add r1, r1, #2048 | ||
59 | add r1, r1, r3, asl #11 /* r1 = t0 = d0 + (block[2] << 11) */ | ||
60 | sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - (block[2] << 11) */ | ||
61 | |||
62 | add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
63 | add r10, r9, r9, asl #2 | ||
64 | add r10, r10, r9, asl #4 | ||
65 | add r9, r10, r9, asl #8 | ||
66 | |||
67 | add r10, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
68 | add r2, r10, r2, asl #5 | ||
69 | add r2, r9, r2, asl #3 | ||
70 | |||
71 | add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
72 | rsb r10, r10, r4, asl #6 | ||
73 | add r4, r4, r10, asl #3 | ||
74 | sub r4, r9, r4, asl #1 | ||
75 | /* t2 & t3 are 1/4 final value here */ | ||
76 | add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */ | ||
77 | sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */ | ||
78 | add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */ | ||
79 | sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */ | ||
80 | |||
81 | add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */ | ||
82 | add r10, r9, r9, asl #4 | ||
83 | add r10, r10, r10, asl #5 | ||
84 | add r9, r10, r9, asl #2 | ||
85 | |||
86 | add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
87 | add r10, r10, r10, asl #5 | ||
88 | add r5, r10, r5, asl #3 | ||
89 | add r5, r9, r5, asl #2 | ||
90 | |||
91 | add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
92 | add r10, r10, r10, asl #4 | ||
93 | add r10, r10, r8, asl #7 | ||
94 | rsb r8, r8, r10, asl #3 | ||
95 | sub r8, r9, r8, asl #1 | ||
96 | |||
97 | add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
98 | add r10, r9, r9, asl #3 | ||
99 | add r10, r10, r10, asl #5 | ||
100 | add r9, r10, r9, asl #2 | ||
101 | |||
102 | add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
103 | add r10, r10, r7, asl #4 | ||
104 | rsb r7, r7, r10, asl #5 | ||
105 | rsb r7, r7, r9, asl #3 | ||
106 | |||
107 | sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
108 | sub r10, r10, r6, asl #6 | ||
109 | add r10, r10, r6, asl #12 | ||
110 | add r6, r10, r6 | ||
111 | rsb r6, r6, r9, asl #3 | ||
112 | /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/ | ||
113 | add r9, r5, r7 /* r9 = b0 = t0 + t2 */ | ||
114 | add r10, r8, r6 /* r10 = b3 = t1 + t3 */ | ||
115 | sub r5, r5, r7 /* t0 -= t2 */ | ||
116 | sub r8, r8, r6 /* t1 -= t3 */ | ||
117 | add r6, r5, r8 /* r6 = t0 + t1 */ | ||
118 | sub r7, r5, r8 /* r7 = t0 - t1 */ | ||
119 | |||
120 | add r11, r6, r6, asr #2 /* r6 = b1 = r6*(181/128) */ | ||
121 | add r11, r11, r11, asr #5 | ||
122 | add r6, r11, r6, asr #3 | ||
123 | add r11, r7, r7, asr #2 /* r7 = b2 = r7*(181/128) */ | ||
124 | add r11, r11, r11, asr #5 | ||
125 | add r7, r11, r7, asr #3 | ||
126 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
127 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
128 | add r5, r1, r9 /* block[0] = (a0 + b0) >> 12 */ | ||
129 | mov r5, r5, asr #12 | ||
130 | strh r5, [r0], #2 | ||
131 | add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 12 */ | ||
132 | mov r8, r8, asr #12 | ||
133 | strh r8, [r0], #2 | ||
134 | add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 12 */ | ||
135 | mov r5, r5, asr #12 | ||
136 | strh r5, [r0], #2 | ||
137 | add r8, r2, r10 /* block[3] = (a3 + b3) >> 12 */ | ||
138 | mov r8, r8, asr #12 | ||
139 | strh r8, [r0], #2 | ||
140 | sub r5, r2, r10 /* block[4] = (a3 - b3) >> 12 */ | ||
141 | mov r5, r5, asr #12 | ||
142 | strh r5, [r0], #2 | ||
143 | sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 12 */ | ||
144 | mov r8, r8, asr #12 | ||
145 | strh r8, [r0], #2 | ||
146 | sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 12 */ | ||
147 | mov r5, r5, asr #12 | ||
148 | strh r5, [r0], #2 | ||
149 | sub r8, r1, r9 /* block[7] = (a0 - b0) >> 12 */ | ||
150 | mov r8, r8, asr #12 | ||
151 | strh r8, [r0], #2 | ||
152 | cmp r0, r12 | ||
153 | blo 1b | ||
154 | 3: | ||
155 | sub r0, r0, #128 | ||
156 | add r12, r0, #16 | ||
157 | 4: | ||
158 | ldrsh r1, [r0, #0*8] /* d0 */ | ||
159 | ldrsh r2, [r0, #2*8] /* d1 */ | ||
160 | ldrsh r3, [r0, #4*8] /* d2 */ | ||
161 | ldrsh r4, [r0, #6*8] /* d3 */ | ||
162 | ldrsh r5, [r0, #8*8] /* d0 */ | ||
163 | ldrsh r6, [r0, #10*8] /* d1 */ | ||
164 | ldrsh r7, [r0, #12*8] /* d2 */ | ||
165 | ldrsh r8, [r0, #14*8] /* d3 */ | ||
166 | |||
167 | mov r1, r1, asl #11 /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
168 | add r1, r1, #65536 | ||
169 | add r1, r1, r3, asl #11 /* r1 = t0 = d0 + d2:(block[2] << 11) */ | ||
170 | sub r3, r1, r3, asl #12 /* r3 = t1 = d0 - d2:(block[2] << 11) */ | ||
171 | |||
172 | add r9, r2, r4 /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
173 | add r10, r9, r9, asl #2 | ||
174 | add r10, r10, r9, asl #4 | ||
175 | add r9, r10, r9, asl #8 | ||
176 | |||
177 | add r11, r2, r2, asl #4 /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
178 | add r2, r11, r2, asl #5 | ||
179 | add r2, r9, r2, asl #3 | ||
180 | |||
181 | add r10, r4, r4, asl #2 /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
182 | rsb r10, r10, r4, asl #6 | ||
183 | add r4, r4, r10, asl #3 | ||
184 | sub r4, r9, r4, asl #1 | ||
185 | /* t2 & t3 are 1/4 final value here */ | ||
186 | add r1, r1, r2, asl #2 /* r1 = a0 = t0 + t2 */ | ||
187 | sub r2, r1, r2, asl #3 /* r2 = a3 = t0 - t2 */ | ||
188 | add r3, r3, r4, asl #2 /* r3 = a1 = t1 + t3 */ | ||
189 | sub r4, r3, r4, asl #3 /* r4 = a2 = t1 - t3 */ | ||
190 | |||
191 | add r9, r8, r5 /* r9 = tmp = 565*(d3 + d0) */ | ||
192 | add r10, r9, r9, asl #4 | ||
193 | add r10, r10, r10, asl #5 | ||
194 | add r9, r10, r9, asl #2 | ||
195 | |||
196 | add r10, r5, r5, asl #4 /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
197 | add r10, r10, r10, asl #5 | ||
198 | add r5, r10, r5, asl #3 | ||
199 | add r5, r9, r5, asl #2 | ||
200 | |||
201 | add r10, r8, r8, asl #2 /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
202 | add r10, r10, r10, asl #4 | ||
203 | add r10, r10, r8, asl #7 | ||
204 | rsb r8, r8, r10, asl #3 | ||
205 | sub r8, r9, r8, asl #1 | ||
206 | |||
207 | add r9, r6, r7 /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
208 | add r10, r9, r9, asl #3 | ||
209 | add r10, r10, r10, asl #5 | ||
210 | add r9, r10, r9, asl #2 | ||
211 | |||
212 | add r10, r7, r7, asl #3 /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
213 | add r10, r10, r7, asl #4 | ||
214 | rsb r7, r7, r10, asl #5 | ||
215 | rsb r7, r7, r9, asl #3 | ||
216 | |||
217 | sub r10, r6, r6, asl #4 /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
218 | sub r10, r10, r6, asl #6 | ||
219 | add r10, r10, r6, asl #12 | ||
220 | add r6, r10, r6 | ||
221 | rsb r6, r6, r9, asl #3 | ||
222 | /* t0=r5, t1=r8, t2=r7, t3=r6*/ | ||
223 | add r9, r5, r7 /* r9 = b0 = t0 + t2 */ | ||
224 | add r10, r8, r6 /* r10 = b3 = t1 + t3 */ | ||
225 | sub r5, r5, r7 /* t0 -= t2 */ | ||
226 | sub r8, r8, r6 /* t1 -= t3 */ | ||
227 | add r6, r5, r8 /* r6 = t0 + t1 */ | ||
228 | sub r7, r5, r8 /* r7 = t0 - t1 */ | ||
229 | |||
230 | add r11, r6, r6, asr #2 /* r6 = b1 = r5*(181/128) */ | ||
231 | add r11, r11, r11, asr #5 | ||
232 | add r6, r11, r6, asr #3 | ||
233 | add r11, r7, r7, asr #2 /* r7 = b2 = r6*(181/128) */ | ||
234 | add r11, r11, r11, asr #5 | ||
235 | add r7, r11, r7, asr #3 | ||
236 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
237 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
238 | add r5, r1, r9 /* block[0] = (a0 + b0) >> 17 */ | ||
239 | mov r5, r5, asr #17 | ||
240 | strh r5, [r0, #0*8] | ||
241 | add r8, r3, r6, asr #1 /* block[1] = (a1 + b1) >> 17 */ | ||
242 | mov r8, r8, asr #17 | ||
243 | strh r8, [r0, #2*8] | ||
244 | add r5, r4, r7, asr #1 /* block[2] = (a2 + b2) >> 17 */ | ||
245 | mov r5, r5, asr #17 | ||
246 | strh r5, [r0, #4*8] | ||
247 | add r8, r2, r10 /* block[3] = (a3 + b3) >> 17 */ | ||
248 | mov r8, r8, asr #17 | ||
249 | strh r8, [r0, #6*8] | ||
250 | sub r5, r2, r10 /* block[4] = (a3 - b3) >> 17 */ | ||
251 | mov r5, r5, asr #17 | ||
252 | strh r5, [r0, #8*8] | ||
253 | sub r8, r4, r7, asr #1 /* block[5] = (a2 - b2) >> 17 */ | ||
254 | mov r8, r8, asr #17 | ||
255 | strh r8, [r0, #10*8] | ||
256 | sub r5, r3, r6, asr #1 /* block[6] = (a1 - b1) >> 17 */ | ||
257 | mov r5, r5, asr #17 | ||
258 | strh r5, [r0, #12*8] | ||
259 | sub r8, r1, r9 /* block[7] = (a0 - b0) >> 17 */ | ||
260 | mov r8, r8, asr #17 | ||
261 | strh r8, [r0, #14*8] | ||
262 | add r0, r0, #2 | ||
263 | cmp r0, r12 | ||
264 | blo 4b | ||
265 | sub r0, r0, #16 | ||
266 | bx lr | ||
267 | |||
268 | mpeg2_idct_copy_arm: | ||
269 | stmfd sp!, { r1-r2, r4-r12, lr } | ||
270 | bl .idct | ||
271 | ldmfd sp!, { r1-r2 } | ||
272 | mov r11, #0 | ||
273 | add r12, r0, #128 | ||
274 | 1: | ||
275 | ldrsh r3, [r0, #0] | ||
276 | ldrsh r4, [r0, #2] | ||
277 | ldrsh r5, [r0, #4] | ||
278 | ldrsh r6, [r0, #6] | ||
279 | ldrsh r7, [r0, #8] | ||
280 | ldrsh r8, [r0, #10] | ||
281 | ldrsh r9, [r0, #12] | ||
282 | ldrsh r10, [r0, #14] | ||
283 | cmp r3, #255 | ||
284 | mvnhi r3, r3, asr #31 | ||
285 | strb r3, [r1, #0] | ||
286 | str r11, [r0], #4 | ||
287 | cmp r4, #255 | ||
288 | mvnhi r4, r4, asr #31 | ||
289 | strb r4, [r1, #1] | ||
290 | cmp r5, #255 | ||
291 | mvnhi r5, r5, asr #31 | ||
292 | strb r5, [r1, #2] | ||
293 | str r11, [r0], #4 | ||
294 | cmp r6, #255 | ||
295 | mvnhi r6, r6, asr #31 | ||
296 | strb r6, [r1, #3] | ||
297 | cmp r7, #255 | ||
298 | mvnhi r7, r7, asr #31 | ||
299 | strb r7, [r1, #4] | ||
300 | str r11, [r0], #4 | ||
301 | cmp r8, #255 | ||
302 | mvnhi r8, r8, asr #31 | ||
303 | strb r8, [r1, #5] | ||
304 | cmp r9, #255 | ||
305 | mvnhi r9, r9, asr #31 | ||
306 | strb r9, [r1, #6] | ||
307 | str r11, [r0], #4 | ||
308 | cmp r10, #255 | ||
309 | mvnhi r10, r10, asr #31 | ||
310 | strb r10, [r1, #7] | ||
311 | add r1, r1, r2 | ||
312 | cmp r0, r12 | ||
313 | blo 1b | ||
314 | ldmfd sp!, { r4-r12, pc } | ||
315 | |||
316 | mpeg2_idct_add_arm: | ||
317 | cmp r0, #129 | ||
318 | mov r0, r1 | ||
319 | ldreqsh r1, [r0, #0] | ||
320 | bne 1f | ||
321 | and r1, r1, #0x70 | ||
322 | cmp r1, #0x40 | ||
323 | bne 3f | ||
324 | 1: | ||
325 | stmfd sp!, { r2-r12, lr } | ||
326 | bl .idct | ||
327 | ldmfd sp!, { r1-r2 } | ||
328 | mov r11, #0 | ||
329 | add r12, r0, #128 | ||
330 | 2: | ||
331 | ldrb r3, [r1, #0] | ||
332 | ldrb r4, [r1, #1] | ||
333 | ldrb r5, [r1, #2] | ||
334 | ldrb r6, [r1, #3] | ||
335 | ldrsh r7, [r0, #0] | ||
336 | ldrsh r8, [r0, #2] | ||
337 | ldrsh r9, [r0, #4] | ||
338 | ldrsh r10, [r0, #6] | ||
339 | add r7, r7, r3 | ||
340 | ldrb r3, [r1, #4] | ||
341 | cmp r7, #255 | ||
342 | mvnhi r7, r7, asr #31 | ||
343 | strb r7, [r1, #0] | ||
344 | ldrsh r7, [r0, #8] | ||
345 | add r8, r8, r4 | ||
346 | ldrb r4, [r1, #5] | ||
347 | cmp r8, #255 | ||
348 | mvnhi r8, r8, asr #31 | ||
349 | strb r8, [r1, #1] | ||
350 | ldrsh r8, [r0, #10] | ||
351 | add r9, r9, r5 | ||
352 | ldrb r5, [r1, #6] | ||
353 | cmp r9, #255 | ||
354 | mvnhi r9, r9, asr #31 | ||
355 | strb r9, [r1, #2] | ||
356 | ldrsh r9, [r0, #12] | ||
357 | add r10, r10, r6 | ||
358 | ldrb r6, [r1, #7] | ||
359 | cmp r10, #255 | ||
360 | mvnhi r10, r10, asr #31 | ||
361 | strb r10, [r1, #3] | ||
362 | ldrsh r10, [r0, #14] | ||
363 | str r11, [r0], #4 | ||
364 | add r7, r7, r3 | ||
365 | cmp r7, #255 | ||
366 | mvnhi r7, r7, asr #31 | ||
367 | strb r7, [r1, #4] | ||
368 | str r11, [r0], #4 | ||
369 | add r8, r8, r4 | ||
370 | cmp r8, #255 | ||
371 | mvnhi r8, r8, asr #31 | ||
372 | strb r8, [r1, #5] | ||
373 | str r11, [r0], #4 | ||
374 | add r9, r9, r5 | ||
375 | cmp r9, #255 | ||
376 | mvnhi r9, r9, asr #31 | ||
377 | strb r9, [r1, #6] | ||
378 | add r10, r10, r6 | ||
379 | cmp r10, #255 | ||
380 | mvnhi r10, r10, asr #31 | ||
381 | strb r10, [r1, #7] | ||
382 | str r11, [r0], #4 | ||
383 | add r1, r1, r2 | ||
384 | cmp r0, r12 | ||
385 | blo 2b | ||
386 | ldmfd sp!, { r4-r12, pc } | ||
387 | 3: | ||
388 | stmfd sp!, { r4-r11 } | ||
389 | ldrsh r1, [r0, #0] /* r1 = block[0] */ | ||
390 | mov r11, #0 | ||
391 | strh r11, [r0, #0] /* block[0] = 0 */ | ||
392 | strh r11, [r0, #126] /* block[63] = 0 */ | ||
393 | add r1, r1, #64 /* r1 = DC << 7 */ | ||
394 | add r0, r2, r3, asl #3 | ||
395 | 4: | ||
396 | ldrb r4, [r2, #0] | ||
397 | ldrb r5, [r2, #1] | ||
398 | ldrb r6, [r2, #2] | ||
399 | ldrb r7, [r2, #3] | ||
400 | ldrb r8, [r2, #4] | ||
401 | ldrb r9, [r2, #5] | ||
402 | ldrb r10, [r2, #6] | ||
403 | ldrb r11, [r2, #7] | ||
404 | add r4, r4, r1, asr #7 | ||
405 | cmp r4, #255 | ||
406 | mvnhi r4, r4, asr #31 | ||
407 | strb r4, [r2, #0] | ||
408 | add r5, r5, r1, asr #7 | ||
409 | cmp r5, #255 | ||
410 | mvnhi r5, r5, asr #31 | ||
411 | strb r5, [r2, #1] | ||
412 | add r6, r6, r1, asr #7 | ||
413 | cmp r6, #255 | ||
414 | mvnhi r6, r6, asr #31 | ||
415 | strb r6, [r2, #2] | ||
416 | add r7, r7, r1, asr #7 | ||
417 | cmp r7, #255 | ||
418 | mvnhi r7, r7, asr #31 | ||
419 | strb r7, [r2, #3] | ||
420 | add r8, r8, r1, asr #7 | ||
421 | cmp r8, #255 | ||
422 | mvnhi r8, r8, asr #31 | ||
423 | strb r8, [r2, #4] | ||
424 | add r9, r9, r1, asr #7 | ||
425 | cmp r9, #255 | ||
426 | mvnhi r9, r9, asr #31 | ||
427 | strb r9, [r2, #5] | ||
428 | add r10, r10, r1, asr #7 | ||
429 | cmp r10, #255 | ||
430 | mvnhi r10, r10, asr #31 | ||
431 | strb r10, [r2, #6] | ||
432 | add r11, r11, r1, asr #7 | ||
433 | cmp r11, #255 | ||
434 | mvnhi r11, r11, asr #31 | ||
435 | strb r11, [r2, #7] | ||
436 | add r2, r2, r3 | ||
437 | cmp r2, r0 | ||
438 | blo 4b | ||
439 | ldmfd sp!, { r4-r11 } | ||
440 | bx lr | ||
diff --git a/apps/plugins/mpegplayer/idct_arm_c.c b/apps/plugins/mpegplayer/idct_arm_c.c deleted file mode 100644 index 9805f421a6..0000000000 --- a/apps/plugins/mpegplayer/idct_arm_c.c +++ /dev/null | |||
@@ -1,529 +0,0 @@ | |||
1 | /* | ||
2 | * idct.c | ||
3 | * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> | ||
4 | * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | ||
5 | * | ||
6 | * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | ||
7 | * See http://libmpeg2.sourceforge.net/ for updates. | ||
8 | * | ||
9 | * mpeg2dec is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * mpeg2dec is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | */ | ||
23 | |||
24 | #include "plugin.h" | ||
25 | |||
26 | #include "mpeg2dec_config.h" | ||
27 | |||
28 | #include "mpeg2.h" | ||
29 | #include "attributes.h" | ||
30 | #include "mpeg2_internal.h" | ||
31 | |||
32 | /* 101100011001 */ | ||
33 | #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ | ||
34 | /* 101001110100 */ | ||
35 | #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ | ||
36 | /* 100101101000 */ | ||
37 | #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ | ||
38 | /* 011001001001 */ | ||
39 | #define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ | ||
40 | /* 010001010100 */ | ||
41 | #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ | ||
42 | /* 001000110101 */ | ||
43 | #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ | ||
44 | |||
45 | /* idct main entry point */ | ||
46 | void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); | ||
47 | void (* mpeg2_idct_add) (int last, int16_t * block, | ||
48 | uint8_t * dest, int stride); | ||
49 | |||
50 | /* | ||
51 | * In legal streams, the IDCT output should be between -384 and +384. | ||
52 | * In corrupted streams, it is possible to force the IDCT output to go | ||
53 | * to +-3826 - this is the worst case for a column IDCT where the | ||
54 | * column inputs are 16-bit values. | ||
55 | */ | ||
56 | #if 0 | ||
57 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | ||
58 | do { \ | ||
59 | t0 = W0 * d0 + W1 * d1; \ | ||
60 | t1 = W0 * d1 - W1 * d0; \ | ||
61 | } while (0) | ||
62 | #else | ||
63 | #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ | ||
64 | do { \ | ||
65 | int tmp = W0 * (d0 + d1); \ | ||
66 | t0 = tmp + (W1 - W0) * d1; \ | ||
67 | t1 = tmp - (W1 + W0) * d0; \ | ||
68 | } while (0) | ||
69 | #endif | ||
70 | |||
71 | /* Custom calling convention: | ||
72 | * r0 contains block pointer and is non-volatile | ||
73 | * all non-volatile c context saved and restored on its behalf | ||
74 | */ | ||
75 | static void idct (int16_t * block) __attribute__((naked,used)); | ||
76 | static void idct (int16_t * block) | ||
77 | { | ||
78 | asm volatile ( | ||
79 | "add r12, r0, #128 \n" | ||
80 | "1: \n" | ||
81 | "ldrsh r1, [r0, #0] \n" /* d0 */ | ||
82 | "ldrsh r2, [r0, #2] \n" /* d1 */ | ||
83 | "ldrsh r3, [r0, #4] \n" /* d2 */ | ||
84 | "ldrsh r4, [r0, #6] \n" /* d3 */ | ||
85 | "ldrsh r5, [r0, #8] \n" /* d0 */ | ||
86 | "ldrsh r6, [r0, #10] \n" /* d1 */ | ||
87 | "ldrsh r7, [r0, #12] \n" /* d2 */ | ||
88 | "ldrsh r8, [r0, #14] \n" /* d3 */ | ||
89 | "orrs r9, r2, r3 \n" | ||
90 | "orreqs r9, r4, r5 \n" | ||
91 | "orreqs r9, r6, r7 \n" | ||
92 | "cmpeq r8, #0 \n" | ||
93 | "bne 2f \n" | ||
94 | "mov r1, r1, asl #15 \n" | ||
95 | "bic r1, r1, #0x8000 \n" | ||
96 | "orr r1, r1, r1, lsr #16 \n" | ||
97 | "str r1, [r0], #4 \n" | ||
98 | "str r1, [r0], #4 \n" | ||
99 | "str r1, [r0], #4 \n" | ||
100 | "str r1, [r0], #4 \n" | ||
101 | "cmp r0, r12 \n" | ||
102 | "blo 1b \n" | ||
103 | "b 3f \n" | ||
104 | "2: \n" | ||
105 | "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
106 | "add r1, r1, #2048 \n" | ||
107 | "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + (block[2] << 11) */ | ||
108 | "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - (block[2] << 11) */ | ||
109 | |||
110 | "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
111 | "add r10, r9, r9, asl #2 \n" | ||
112 | "add r10, r10, r9, asl #4 \n" | ||
113 | "add r9, r10, r9, asl #8 \n" | ||
114 | |||
115 | "add r10, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
116 | "add r2, r10, r2, asl #5 \n" | ||
117 | "add r2, r9, r2, asl #3 \n" | ||
118 | |||
119 | "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
120 | "rsb r10, r10, r4, asl #6 \n" | ||
121 | "add r4, r4, r10, asl #3 \n" | ||
122 | "sub r4, r9, r4, asl #1 \n" | ||
123 | /* t2 & t3 are 1/4 final value here */ | ||
124 | "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */ | ||
125 | "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */ | ||
126 | "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */ | ||
127 | "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */ | ||
128 | |||
129 | "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */ | ||
130 | "add r10, r9, r9, asl #4 \n" | ||
131 | "add r10, r10, r10, asl #5 \n" | ||
132 | "add r9, r10, r9, asl #2 \n" | ||
133 | |||
134 | "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
135 | "add r10, r10, r10, asl #5 \n" | ||
136 | "add r5, r10, r5, asl #3 \n" | ||
137 | "add r5, r9, r5, asl #2 \n" | ||
138 | |||
139 | "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
140 | "add r10, r10, r10, asl #4 \n" | ||
141 | "add r10, r10, r8, asl #7 \n" | ||
142 | "rsb r8, r8, r10, asl #3 \n" | ||
143 | "sub r8, r9, r8, asl #1 \n" | ||
144 | |||
145 | "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
146 | "add r10, r9, r9, asl #3 \n" | ||
147 | "add r10, r10, r10, asl #5 \n" | ||
148 | "add r9, r10, r9, asl #2 \n" | ||
149 | |||
150 | "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
151 | "add r10, r10, r7, asl #4 \n" | ||
152 | "rsb r7, r7, r10, asl #5 \n" | ||
153 | "rsb r7, r7, r9, asl #3 \n" | ||
154 | |||
155 | "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
156 | "sub r10, r10, r6, asl #6 \n" | ||
157 | "add r10, r10, r6, asl #12 \n" | ||
158 | "add r6, r10, r6 \n" | ||
159 | "rsb r6, r6, r9, asl #3 \n" | ||
160 | /* t0 = r5, t1 = r8, t2 = r7, t3 = r6*/ | ||
161 | "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */ | ||
162 | "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */ | ||
163 | "sub r5, r5, r7 \n" /* t0 -= t2 */ | ||
164 | "sub r8, r8, r6 \n" /* t1 -= t3 */ | ||
165 | "add r6, r5, r8 \n" /* r6 = t0 + t1 */ | ||
166 | "sub r7, r5, r8 \n" /* r7 = t0 - t1 */ | ||
167 | |||
168 | "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r6*(181/128) */ | ||
169 | "add r11, r11, r11, asr #5 \n" | ||
170 | "add r6, r11, r6, asr #3 \n" | ||
171 | "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r7*(181/128) */ | ||
172 | "add r11, r11, r11, asr #5 \n" | ||
173 | "add r7, r11, r7, asr #3 \n" | ||
174 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
175 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
176 | "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 12 */ | ||
177 | "mov r5, r5, asr #12 \n" | ||
178 | "strh r5, [r0], #2 \n" | ||
179 | "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 12 */ | ||
180 | "mov r8, r8, asr #12 \n" | ||
181 | "strh r8, [r0], #2 \n" | ||
182 | "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 12 */ | ||
183 | "mov r5, r5, asr #12 \n" | ||
184 | "strh r5, [r0], #2 \n" | ||
185 | "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 12 */ | ||
186 | "mov r8, r8, asr #12 \n" | ||
187 | "strh r8, [r0], #2 \n" | ||
188 | "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 12 */ | ||
189 | "mov r5, r5, asr #12 \n" | ||
190 | "strh r5, [r0], #2 \n" | ||
191 | "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 12 */ | ||
192 | "mov r8, r8, asr #12 \n" | ||
193 | "strh r8, [r0], #2 \n" | ||
194 | "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 12 */ | ||
195 | "mov r5, r5, asr #12 \n" | ||
196 | "strh r5, [r0], #2 \n" | ||
197 | "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 12 */ | ||
198 | "mov r8, r8, asr #12 \n" | ||
199 | "strh r8, [r0], #2 \n" | ||
200 | "cmp r0, r12 \n" | ||
201 | "blo 1b \n" | ||
202 | "3: \n" | ||
203 | "sub r0, r0, #128 \n" | ||
204 | "add r12, r0, #16 \n" | ||
205 | "4: \n" | ||
206 | "ldrsh r1, [r0, #0*8] \n" /* d0 */ | ||
207 | "ldrsh r2, [r0, #2*8] \n" /* d1 */ | ||
208 | "ldrsh r3, [r0, #4*8] \n" /* d2 */ | ||
209 | "ldrsh r4, [r0, #6*8] \n" /* d3 */ | ||
210 | "ldrsh r5, [r0, #8*8] \n" /* d0 */ | ||
211 | "ldrsh r6, [r0, #10*8] \n" /* d1 */ | ||
212 | "ldrsh r7, [r0, #12*8] \n" /* d2 */ | ||
213 | "ldrsh r8, [r0, #14*8] \n" /* d3 */ | ||
214 | |||
215 | "mov r1, r1, asl #11 \n" /* r1 = d0 = (block[0] << 11) + 2048 */ | ||
216 | "add r1, r1, #65536 \n" | ||
217 | "add r1, r1, r3, asl #11 \n" /* r1 = t0 = d0 + d2:(block[2] << 11) */ | ||
218 | "sub r3, r1, r3, asl #12 \n" /* r3 = t1 = d0 - d2:(block[2] << 11) */ | ||
219 | |||
220 | "add r9, r2, r4 \n" /* r9 = tmp = (d1+d3)*(1108/4) */ | ||
221 | "add r10, r9, r9, asl #2 \n" | ||
222 | "add r10, r10, r9, asl #4 \n" | ||
223 | "add r9, r10, r9, asl #8 \n" | ||
224 | |||
225 | "add r11, r2, r2, asl #4 \n" /* r2 = t2 = tmp + (d1*(1568/32)*8) */ | ||
226 | "add r2, r11, r2, asl #5 \n" | ||
227 | "add r2, r9, r2, asl #3 \n" | ||
228 | |||
229 | "add r10, r4, r4, asl #2 \n" /* r4 = t3 = tmp - (d3*(3784/8)*2) */ | ||
230 | "rsb r10, r10, r4, asl #6 \n" | ||
231 | "add r4, r4, r10, asl #3 \n" | ||
232 | "sub r4, r9, r4, asl #1 \n" | ||
233 | /* t2 & t3 are 1/4 final value here */ | ||
234 | "add r1, r1, r2, asl #2 \n" /* r1 = a0 = t0 + t2 */ | ||
235 | "sub r2, r1, r2, asl #3 \n" /* r2 = a3 = t0 - t2 */ | ||
236 | "add r3, r3, r4, asl #2 \n" /* r3 = a1 = t1 + t3 */ | ||
237 | "sub r4, r3, r4, asl #3 \n" /* r4 = a2 = t1 - t3 */ | ||
238 | |||
239 | "add r9, r8, r5 \n" /* r9 = tmp = 565*(d3 + d0) */ | ||
240 | "add r10, r9, r9, asl #4 \n" | ||
241 | "add r10, r10, r10, asl #5 \n" | ||
242 | "add r9, r10, r9, asl #2 \n" | ||
243 | |||
244 | "add r10, r5, r5, asl #4 \n" /* r5 = t0 = tmp + (((2276/4)*d0)*4) */ | ||
245 | "add r10, r10, r10, asl #5 \n" | ||
246 | "add r5, r10, r5, asl #3 \n" | ||
247 | "add r5, r9, r5, asl #2 \n" | ||
248 | |||
249 | "add r10, r8, r8, asl #2 \n" /* r8 = t1 = tmp - (((3406/2)*d3)*2) */ | ||
250 | "add r10, r10, r10, asl #4 \n" | ||
251 | "add r10, r10, r8, asl #7 \n" | ||
252 | "rsb r8, r8, r10, asl #3 \n" | ||
253 | "sub r8, r9, r8, asl #1 \n" | ||
254 | |||
255 | "add r9, r6, r7 \n" /* r9 = tmp = (2408/8)*(d1 + d2) */ | ||
256 | "add r10, r9, r9, asl #3 \n" | ||
257 | "add r10, r10, r10, asl #5 \n" | ||
258 | "add r9, r10, r9, asl #2 \n" | ||
259 | |||
260 | "add r10, r7, r7, asl #3 \n" /* r7 = t2 = (tmp*8) - 799*d2 */ | ||
261 | "add r10, r10, r7, asl #4 \n" | ||
262 | "rsb r7, r7, r10, asl #5 \n" | ||
263 | "rsb r7, r7, r9, asl #3 \n" | ||
264 | |||
265 | "sub r10, r6, r6, asl #4 \n" /* r6 = t3 = (tmp*8) - 4017*d1 */ | ||
266 | "sub r10, r10, r6, asl #6 \n" | ||
267 | "add r10, r10, r6, asl #12 \n" | ||
268 | "add r6, r10, r6 \n" | ||
269 | "rsb r6, r6, r9, asl #3 \n" | ||
270 | /* t0=r5, t1=r8, t2=r7, t3=r6*/ | ||
271 | "add r9, r5, r7 \n" /* r9 = b0 = t0 + t2 */ | ||
272 | "add r10, r8, r6 \n" /* r10 = b3 = t1 + t3 */ | ||
273 | "sub r5, r5, r7 \n" /* t0 -= t2 */ | ||
274 | "sub r8, r8, r6 \n" /* t1 -= t3 */ | ||
275 | "add r6, r5, r8 \n" /* r6 = t0 + t1 */ | ||
276 | "sub r7, r5, r8 \n" /* r7 = t0 - t1 */ | ||
277 | |||
278 | "add r11, r6, r6, asr #2 \n" /* r6 = b1 = r5*(181/128) */ | ||
279 | "add r11, r11, r11, asr #5 \n" | ||
280 | "add r6, r11, r6, asr #3 \n" | ||
281 | "add r11, r7, r7, asr #2 \n" /* r7 = b2 = r6*(181/128) */ | ||
282 | "add r11, r11, r11, asr #5 \n" | ||
283 | "add r7, r11, r7, asr #3 \n" | ||
284 | /* r1 = a0, r3 = a1, r4 = a2, r2 = a3 */ | ||
285 | /* r9 = b0, r6 = b1*2, r7 = b2*2, r10 = b3 */ | ||
286 | "add r5, r1, r9 \n" /* block[0] = (a0 + b0) >> 17 */ | ||
287 | "mov r5, r5, asr #17 \n" | ||
288 | "strh r5, [r0, #0*8] \n" | ||
289 | "add r8, r3, r6, asr #1 \n" /* block[1] = (a1 + b1) >> 17 */ | ||
290 | "mov r8, r8, asr #17 \n" | ||
291 | "strh r8, [r0, #2*8] \n" | ||
292 | "add r5, r4, r7, asr #1 \n" /* block[2] = (a2 + b2) >> 17 */ | ||
293 | "mov r5, r5, asr #17 \n" | ||
294 | "strh r5, [r0, #4*8] \n" | ||
295 | "add r8, r2, r10 \n" /* block[3] = (a3 + b3) >> 17 */ | ||
296 | "mov r8, r8, asr #17 \n" | ||
297 | "strh r8, [r0, #6*8] \n" | ||
298 | "sub r5, r2, r10 \n" /* block[4] = (a3 - b3) >> 17 */ | ||
299 | "mov r5, r5, asr #17 \n" | ||
300 | "strh r5, [r0, #8*8] \n" | ||
301 | "sub r8, r4, r7, asr #1 \n" /* block[5] = (a2 - b2) >> 17 */ | ||
302 | "mov r8, r8, asr #17 \n" | ||
303 | "strh r8, [r0, #10*8] \n" | ||
304 | "sub r5, r3, r6, asr #1 \n" /* block[6] = (a1 - b1) >> 17 */ | ||
305 | "mov r5, r5, asr #17 \n" | ||
306 | "strh r5, [r0, #12*8] \n" | ||
307 | "sub r8, r1, r9 \n" /* block[7] = (a0 - b0) >> 17 */ | ||
308 | "mov r8, r8, asr #17 \n" | ||
309 | "strh r8, [r0, #14*8] \n" | ||
310 | "add r0, r0, #2 \n" | ||
311 | "cmp r0, r12 \n" | ||
312 | "blo 4b \n" | ||
313 | "sub r0, r0, #16 \n" | ||
314 | "bx lr \n" | ||
315 | ); | ||
316 | (void)block; | ||
317 | } | ||
318 | |||
319 | static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, | ||
320 | const int stride) __attribute__((naked)); | ||
321 | static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, | ||
322 | const int stride) | ||
323 | { | ||
324 | asm volatile( | ||
325 | "stmfd sp!, { r1-r2, \ | ||
326 | r4-r12, lr } \n" | ||
327 | "bl idct \n" | ||
328 | "ldmfd sp!, { r1-r2 } \n" | ||
329 | "mov r11, #0 \n" | ||
330 | "add r12, r0, #128 \n" | ||
331 | "1: \n" | ||
332 | "ldrsh r3, [r0, #0] \n" | ||
333 | "ldrsh r4, [r0, #2] \n" | ||
334 | "ldrsh r5, [r0, #4] \n" | ||
335 | "ldrsh r6, [r0, #6] \n" | ||
336 | "ldrsh r7, [r0, #8] \n" | ||
337 | "ldrsh r8, [r0, #10] \n" | ||
338 | "ldrsh r9, [r0, #12] \n" | ||
339 | "ldrsh r10, [r0, #14] \n" | ||
340 | "cmp r3, #255 \n" | ||
341 | "mvnhi r3, r3, asr #31 \n" | ||
342 | "strb r3, [r1, #0] \n" | ||
343 | "str r11, [r0], #4 \n" | ||
344 | "cmp r4, #255 \n" | ||
345 | "mvnhi r4, r4, asr #31 \n" | ||
346 | "strb r4, [r1, #1] \n" | ||
347 | "cmp r5, #255 \n" | ||
348 | "mvnhi r5, r5, asr #31 \n" | ||
349 | "strb r5, [r1, #2] \n" | ||
350 | "str r11, [r0], #4 \n" | ||
351 | "cmp r6, #255 \n" | ||
352 | "mvnhi r6, r6, asr #31 \n" | ||
353 | "strb r6, [r1, #3] \n" | ||
354 | "cmp r7, #255 \n" | ||
355 | "mvnhi r7, r7, asr #31 \n" | ||
356 | "strb r7, [r1, #4] \n" | ||
357 | "str r11, [r0], #4 \n" | ||
358 | "cmp r8, #255 \n" | ||
359 | "mvnhi r8, r8, asr #31 \n" | ||
360 | "strb r8, [r1, #5] \n" | ||
361 | "cmp r9, #255 \n" | ||
362 | "mvnhi r9, r9, asr #31 \n" | ||
363 | "strb r9, [r1, #6] \n" | ||
364 | "str r11, [r0], #4 \n" | ||
365 | "cmp r10, #255 \n" | ||
366 | "mvnhi r10, r10, asr #31 \n" | ||
367 | "strb r10, [r1, #7] \n" | ||
368 | "add r1, r1, r2 \n" | ||
369 | "cmp r0, r12 \n" | ||
370 | "blo 1b \n" | ||
371 | "ldmfd sp!, { r4-r12, pc } \n" | ||
372 | ); | ||
373 | (void)block; (void)dest; (void)stride; | ||
374 | } | ||
375 | |||
376 | static void mpeg2_idct_add_c (int last, int16_t * block, | ||
377 | uint8_t * dest, const int stride) __attribute__((naked)); | ||
378 | static void mpeg2_idct_add_c (int last, int16_t * block, | ||
379 | uint8_t * dest, const int stride) | ||
380 | { | ||
381 | asm volatile ( | ||
382 | "cmp r0, #129 \n" | ||
383 | "mov r0, r1 \n" | ||
384 | "ldreqsh r1, [r0, #0] \n" | ||
385 | "bne 1f \n" | ||
386 | "and r1, r1, #0x70 \n" | ||
387 | "cmp r1, #0x40 \n" | ||
388 | "bne 3f \n" | ||
389 | "1: \n" | ||
390 | "stmfd sp!, { r2-r12, lr } \n" | ||
391 | "bl idct \n" | ||
392 | "ldmfd sp!, { r1-r2 } \n" | ||
393 | "mov r11, #0 \n" | ||
394 | "add r12, r0, #128 \n" | ||
395 | "2: \n" | ||
396 | "ldrb r3, [r1, #0] \n" | ||
397 | "ldrb r4, [r1, #1] \n" | ||
398 | "ldrb r5, [r1, #2] \n" | ||
399 | "ldrb r6, [r1, #3] \n" | ||
400 | "ldrsh r7, [r0, #0] \n" | ||
401 | "ldrsh r8, [r0, #2] \n" | ||
402 | "ldrsh r9, [r0, #4] \n" | ||
403 | "ldrsh r10, [r0, #6] \n" | ||
404 | "add r7, r7, r3 \n" | ||
405 | "ldrb r3, [r1, #4] \n" | ||
406 | "cmp r7, #255 \n" | ||
407 | "mvnhi r7, r7, asr #31 \n" | ||
408 | "strb r7, [r1, #0] \n" | ||
409 | "ldrsh r7, [r0, #8] \n" | ||
410 | "add r8, r8, r4 \n" | ||
411 | "ldrb r4, [r1, #5] \n" | ||
412 | "cmp r8, #255 \n" | ||
413 | "mvnhi r8, r8, asr #31 \n" | ||
414 | "strb r8, [r1, #1] \n" | ||
415 | "ldrsh r8, [r0, #10] \n" | ||
416 | "add r9, r9, r5 \n" | ||
417 | "ldrb r5, [r1, #6] \n" | ||
418 | "cmp r9, #255 \n" | ||
419 | "mvnhi r9, r9, asr #31 \n" | ||
420 | "strb r9, [r1, #2] \n" | ||
421 | "ldrsh r9, [r0, #12] \n" | ||
422 | "add r10, r10, r6 \n" | ||
423 | "ldrb r6, [r1, #7] \n" | ||
424 | "cmp r10, #255 \n" | ||
425 | "mvnhi r10, r10, asr #31 \n" | ||
426 | "strb r10, [r1, #3] \n" | ||
427 | "ldrsh r10, [r0, #14] \n" | ||
428 | "str r11, [r0], #4 \n" | ||
429 | "add r7, r7, r3 \n" | ||
430 | "cmp r7, #255 \n" | ||
431 | "mvnhi r7, r7, asr #31 \n" | ||
432 | "strb r7, [r1, #4] \n" | ||
433 | "str r11, [r0], #4 \n" | ||
434 | "add r8, r8, r4 \n" | ||
435 | "cmp r8, #255 \n" | ||
436 | "mvnhi r8, r8, asr #31 \n" | ||
437 | "strb r8, [r1, #5] \n" | ||
438 | "str r11, [r0], #4 \n" | ||
439 | "add r9, r9, r5 \n" | ||
440 | "cmp r9, #255 \n" | ||
441 | "mvnhi r9, r9, asr #31 \n" | ||
442 | "strb r9, [r1, #6] \n" | ||
443 | "add r10, r10, r6 \n" | ||
444 | "cmp r10, #255 \n" | ||
445 | "mvnhi r10, r10, asr #31 \n" | ||
446 | "strb r10, [r1, #7] \n" | ||
447 | "str r11, [r0], #4 \n" | ||
448 | "add r1, r1, r2 \n" | ||
449 | "cmp r0, r12 \n" | ||
450 | "blo 2b \n" | ||
451 | "ldmfd sp!, { r4-r12, pc } \n" | ||
452 | "3: \n" | ||
453 | "stmfd sp!, { r4-r11 } \n" | ||
454 | "ldrsh r1, [r0, #0] \n" /* r1 = block[0] */ | ||
455 | "mov r11, #0 \n" | ||
456 | "strh r11, [r0, #0] \n" /* block[0] = 0 */ | ||
457 | "strh r11, [r0, #126] \n" /* block[63] = 0 */ | ||
458 | "add r1, r1, #64 \n" /* r1 = DC << 7 */ | ||
459 | "add r0, r2, r3, asl #3 \n" | ||
460 | "4: \n" | ||
461 | "ldrb r4, [r2, #0] \n" | ||
462 | "ldrb r5, [r2, #1] \n" | ||
463 | "ldrb r6, [r2, #2] \n" | ||
464 | "ldrb r7, [r2, #3] \n" | ||
465 | "ldrb r8, [r2, #4] \n" | ||
466 | "ldrb r9, [r2, #5] \n" | ||
467 | "ldrb r10, [r2, #6] \n" | ||
468 | "ldrb r11, [r2, #7] \n" | ||
469 | "add r4, r4, r1, asr #7 \n" | ||
470 | "cmp r4, #255 \n" | ||
471 | "mvnhi r4, r4, asr #31 \n" | ||
472 | "strb r4, [r2, #0] \n" | ||
473 | "add r5, r5, r1, asr #7 \n" | ||
474 | "cmp r5, #255 \n" | ||
475 | "mvnhi r5, r5, asr #31 \n" | ||
476 | "strb r5, [r2, #1] \n" | ||
477 | "add r6, r6, r1, asr #7 \n" | ||
478 | "cmp r6, #255 \n" | ||
479 | "mvnhi r6, r6, asr #31 \n" | ||
480 | "strb r6, [r2, #2] \n" | ||
481 | "add r7, r7, r1, asr #7 \n" | ||
482 | "cmp r7, #255 \n" | ||
483 | "mvnhi r7, r7, asr #31 \n" | ||
484 | "strb r7, [r2, #3] \n" | ||
485 | "add r8, r8, r1, asr #7 \n" | ||
486 | "cmp r8, #255 \n" | ||
487 | "mvnhi r8, r8, asr #31 \n" | ||
488 | "strb r8, [r2, #4] \n" | ||
489 | "add r9, r9, r1, asr #7 \n" | ||
490 | "cmp r9, #255 \n" | ||
491 | "mvnhi r9, r9, asr #31 \n" | ||
492 | "strb r9, [r2, #5] \n" | ||
493 | "add r10, r10, r1, asr #7 \n" | ||
494 | "cmp r10, #255 \n" | ||
495 | "mvnhi r10, r10, asr #31 \n" | ||
496 | "strb r10, [r2, #6] \n" | ||
497 | "add r11, r11, r1, asr #7 \n" | ||
498 | "cmp r11, #255 \n" | ||
499 | "mvnhi r11, r11, asr #31 \n" | ||
500 | "strb r11, [r2, #7] \n" | ||
501 | "add r2, r2, r3 \n" | ||
502 | "cmp r2, r0 \n" | ||
503 | "blo 4b \n" | ||
504 | "ldmfd sp!, { r4-r11 } \n" | ||
505 | "bx lr \n" | ||
506 | ); | ||
507 | (void)last; (void)block; (void)dest; (void)stride; | ||
508 | } | ||
509 | |||
510 | void mpeg2_idct_init (void) | ||
511 | { | ||
512 | extern uint8_t default_mpeg2_scan_norm[64]; | ||
513 | extern uint8_t default_mpeg2_scan_alt[64]; | ||
514 | extern uint8_t mpeg2_scan_norm[64]; | ||
515 | extern uint8_t mpeg2_scan_alt[64]; | ||
516 | int i, j; | ||
517 | |||
518 | mpeg2_idct_copy = mpeg2_idct_copy_c; | ||
519 | mpeg2_idct_add = mpeg2_idct_add_c; | ||
520 | |||
521 | for (i = 0; i < 64; i++) | ||
522 | { | ||
523 | j = default_mpeg2_scan_norm[i]; | ||
524 | mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | ||
525 | |||
526 | j = default_mpeg2_scan_alt[i]; | ||
527 | mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); | ||
528 | } | ||
529 | } | ||