diff options
Diffstat (limited to 'apps/recorder/jpeg_idct_arm.S')
-rw-r--r-- | apps/recorder/jpeg_idct_arm.S | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S new file mode 100644 index 0000000000..2ef868e753 --- /dev/null +++ b/apps/recorder/jpeg_idct_arm.S | |||
@@ -0,0 +1,287 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * JPEG assembly IDCT | ||
11 | * | ||
12 | * Copyright (C) 2009 Andrew Mahone asm versions of the C IDCT algorithms used | ||
13 | * jpeg_load.c with | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or | ||
16 | * modify it under the terms of the GNU General Public License | ||
17 | * as published by the Free Software Foundation; either version 2 | ||
18 | * of the License, or (at your option) any later version. | ||
19 | * | ||
20 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
21 | * KIND, either express or implied. | ||
22 | * | ||
23 | ****************************************************************************/ | ||
24 | #include "config.h" | ||
25 | |||
26 | .section .text | ||
27 | .align 2 | ||
28 | .global jpeg_idct4v | ||
29 | .type jpeg_idct4v, %function | ||
30 | .global jpeg_idct4h | ||
31 | .type jpeg_idct4h, %function | ||
32 | |||
33 | jpeg_idct4v: | ||
34 | #if ARM_ARCH < 5 | ||
35 | stmdb sp!, { r4-r7, lr } | ||
36 | ldr r14, =-15137 | ||
37 | ldr r12, =6270 | ||
38 | 1: | ||
39 | ldrsh r4, [r0, #32] | ||
40 | ldrsh r2, [r0] | ||
41 | ldrsh r5, [r0, #48] | ||
42 | ldrsh r3, [r0, #16] | ||
43 | add r6, r2, r4 /* r6 = tmp10 >> 2 = d0 + d2 */ | ||
44 | sub r2, r2, r4 /* r2 = tmp12 >> 2= d0 - d2 */ | ||
45 | add r4, r3, r5 /* r4 = z1 = d1 + d3 */ | ||
46 | add r7, r4, r4, lsl #3 | ||
47 | rsb r4, r4, r7, lsl #4 | ||
48 | rsb r4, r4, r4, lsl #5 /* z1 *= 4433 */ | ||
49 | add r4, r4, #1024 | ||
50 | mla r3, r12, r3, r4 /* r3 = tmp2 = z1 + z2 * 6270 */ | ||
51 | mla r5, r14, r5, r4 /* r5 = tmp0 = z1 - z3 * 15137 */ | ||
52 | mov r6, r6, lsl #2 /* r6 <<= 2 */ | ||
53 | mov r2, r2, lsl #2 /* r2 <<= 2 */ | ||
54 | add r7, r6, r3, asr #11 /* r7 = o0 */ | ||
55 | sub r3, r6, r3, asr #11 /* r3 = o3 */ | ||
56 | add r6, r2, r5, asr #11 /* r6 = o1 */ | ||
57 | sub r2, r2, r5, asr #11 /* r2 = o2 */ | ||
58 | strh r7, [r0] | ||
59 | strh r3, [r0, #48] | ||
60 | strh r6, [r0, #16] | ||
61 | strh r2, [r0, #32] | ||
62 | add r0, r0, #2 | ||
63 | teq r0, r1 | ||
64 | bne 1b | ||
65 | ldmia sp!, { r4-r7, pc } | ||
66 | #elif ARM_ARCH < 6 | ||
67 | stmdb sp!, { r4-r8, lr } | ||
68 | ldr r8, =1024 | ||
69 | ldr r14, =4433 | ||
70 | ldr r12, =3302955134 | ||
71 | 1: | ||
72 | ldrsh r5, [r0, #48] | ||
73 | ldrsh r3, [r0, #16] | ||
74 | ldrsh r4, [r0, #32] | ||
75 | ldrsh r2, [r0] | ||
76 | add r6, r3, r5 /* r6 = z1 = d1 + d3 */ | ||
77 | add r7, r2, r4 /* r7 = tmp10 >> 2 = d0 + d2 */ | ||
78 | smlabb r6, r14, r6, r8 /* z1 *= 4433 */ | ||
79 | sub r2, r2, r4 /* r2 = tmp12 >> 2= d0 - d2 */ | ||
80 | smlabb r3, r12, r3, r6 /* r3 = tmp2 = z1 + z2 * 6270 */ | ||
81 | smlatb r5, r12, r5, r6 /* r5 = tmp0 = z1 - z3 * 15137 */ | ||
82 | mov r7, r7, lsl #2 | ||
83 | mov r2, r2, lsl #2 | ||
84 | add r4, r7, r3, asr #11 /* r4 = o0 */ | ||
85 | sub r7, r7, r3, asr #11 /* r7 = o3 */ | ||
86 | add r3, r2, r5, asr #11 /* r3 = o1 */ | ||
87 | sub r2, r2, r5, asr #11 /* r2 = o2 */ | ||
88 | strh r4, [r0] | ||
89 | strh r7, [r0, #48] | ||
90 | strh r3, [r0, #16] | ||
91 | strh r2, [r0, #32] | ||
92 | add r0, r0, #2 | ||
93 | teq r0, r1 | ||
94 | bne 1b | ||
95 | ldmia sp!, { r4-r8, pc } | ||
96 | #else | ||
97 | stmdb sp!, { r4-r10, lr } | ||
98 | ldr r2, =1024 | ||
99 | ldr r3, =4433 | ||
100 | ldr r12, =3302955134 | ||
101 | 1: | ||
102 | ldr r6, [r0, #32] | ||
103 | ldr r4, [r0] | ||
104 | ldr r7, [r0, #48] | ||
105 | ldr r5, [r0, #16] | ||
106 | /* this part is being done in parallel on two columns */ | ||
107 | sadd16 r8, r4, r6 /* r8 = d0 + d2 */ | ||
108 | ssub16 r4, r4, r6 /* r4 = d0 - d2 */ | ||
109 | sadd16 r6, r5, r7 /* r6 = d1 + d3 */ | ||
110 | /* there is no parallel shift operation, but we can fake it with bic | ||
111 | and lsl */ | ||
112 | bic r8, r8, #0xc000 | ||
113 | bic r4, r4, #0xc000 | ||
114 | /* multiplication expands values beyond 16 bits, so this part needs to be | ||
115 | split. the values will be merged below so that the rest of the addition | ||
116 | can be done in parallel */ | ||
117 | smlabb r9, r3, r6, r2 /* r9 = z1[0] = (d1 * d3) * 4433 + 1024 */ | ||
118 | smlabt r6, r3, r6, r2 /* r6 = z1[1] = (d1 * d3) * 4433 + 1024 */ | ||
119 | smlabb r10, r12, r5, r9 /* r10 = tmp2[0] = z1 + d1 * 6270 */ | ||
120 | smlatb r14, r12, r7, r9 /* r14 = tmp0[0] = z1 - d3 * 15137 */ | ||
121 | smlabt r5, r12, r5, r6 /* r5 = tmp2[1] */ | ||
122 | smlatt r6, r12, r7, r6 /* r6 = tmp0[1] */ | ||
123 | mov r8, r8, lsl #2 /* complete the parallel shift started */ | ||
124 | mov r4, r4, lsl #2 /* with the earlier bic instructions */ | ||
125 | /* tmp2 are in r10, r5; tmp0 are in r14, r6 */ | ||
126 | /* tmp10, tmp12 are in r4, r8 */ | ||
127 | mov r10, r10, asr #11 | ||
128 | mov r14, r14, asr #11 | ||
129 | pkhbt r5, r10, r5, lsl #5 /* parallel tmp2 */ | ||
130 | pkhbt r6, r14, r6, lsl #5 /* parallel tmp0 */ | ||
131 | sadd16 r10, r8, r5 /* d0 */ | ||
132 | ssub16 r5, r8, r5 /* d3 */ | ||
133 | sadd16 r14, r4, r6 /* d1 */ | ||
134 | ssub16 r6, r4, r6 /* d2 */ | ||
135 | str r10, [r0] | ||
136 | str r5, [r0, #48] | ||
137 | str r14, [r0, #16] | ||
138 | str r6, [r0, #32] | ||
139 | add r0, r0, #4 | ||
140 | cmp r0, r1 | ||
141 | bcc 1b | ||
142 | ldmia sp!, { r4-r10, pc } | ||
143 | #endif | ||
144 | .size jpeg_idct4v, .-jpeg_idct4v | ||
145 | |||
146 | jpeg_idct4h: | ||
147 | #if ARM_ARCH < 5 | ||
148 | stmdb sp!, { r4-r10, lr } | ||
149 | ldr r10, =-15137 | ||
150 | ldr r14, =4112 | ||
151 | ldr r12, =6270 | ||
152 | 1: | ||
153 | ldrsh r4, [r0] | ||
154 | ldrsh r6, [r0, #4] | ||
155 | ldrsh r7, [r0, #6] | ||
156 | ldrsh r5, [r0, #2] | ||
157 | add r4, r4, r14 | ||
158 | add r8, r4, r6 /* r8 = tmp10 >> 2 = d0 + d2 */ | ||
159 | sub r4, r4, r6 /* r4 = tmp12 >> 2= d0 - d2 */ | ||
160 | add r6, r5, r7 /* r6 = z1 = d1 + d3 */ | ||
161 | add r9, r6, r6, lsl #3 | ||
162 | rsb r6, r6, r9, lsl #4 | ||
163 | rsb r6, r6, r6, lsl #5 /* z1 *= 4433 */ | ||
164 | mla r7, r10, r7, r6 /* r5 = tmp0 = z1 - z3 * 15137 */ | ||
165 | mla r5, r12, r5, r6 /* r3 = tmp2 = z1 + z2 * 6270 */ | ||
166 | add r9, r5, r8, lsl #13 /* r7 = o0 */ | ||
167 | rsb r5, r5, r8, lsl #13 /* r3 = o3 */ | ||
168 | add r8, r7, r4, lsl #13 /* r6 = o1 */ | ||
169 | rsb r4, r7, r4, lsl #13 /* r2 = o2 */ | ||
170 | mov r9, r9, asr #18 | ||
171 | mov r8, r8, asr #18 | ||
172 | mov r4, r4, asr #18 | ||
173 | mov r5, r5, asr #18 | ||
174 | cmp r9, #255 | ||
175 | mvnhi r9, r9, asr #31 | ||
176 | cmp r8, #255 | ||
177 | mvnhi r8, r8, asr #31 | ||
178 | cmp r4, #255 | ||
179 | mvnhi r4, r4, asr #31 | ||
180 | cmp r5, #255 | ||
181 | mvnhi r5, r5, asr #31 | ||
182 | #ifdef HAVE_LCD_COLOR | ||
183 | strb r9, [r1] | ||
184 | strb r8, [r1, #4] | ||
185 | strb r4, [r1, #8] | ||
186 | strb r5, [r1, #12] | ||
187 | #else | ||
188 | strb r9, [r1] | ||
189 | strb r8, [r1, #1] | ||
190 | strb r4, [r1, #2] | ||
191 | strb r5, [r1, #3] | ||
192 | #endif | ||
193 | add r0, r0, #16 | ||
194 | add r1, r1, r3 | ||
195 | teq r0, r2 | ||
196 | bne 1b | ||
197 | ldmia sp!, { r4-r10, pc } | ||
198 | #elif ARM_ARCH < 6 | ||
199 | stmdb sp!, { r4-r10, lr } | ||
200 | ldr r10, =4433 | ||
201 | ldr r14, =4112 | ||
202 | ldr r12, =3302955134 | ||
203 | 1: | ||
204 | ldrsh r7, [r0, #6] | ||
205 | ldrsh r5, [r0, #2] | ||
206 | ldrsh r4, [r0] | ||
207 | ldrsh r6, [r0, #4] | ||
208 | add r8, r5, r7 /* r8 = z1 = d1 + d3 */ | ||
209 | add r4, r4, r14 | ||
210 | smulbb r8, r10, r8 /* z1 *= 4433 */ | ||
211 | add r9, r4, r6 /* r9 = tmp10 >> 13 = d0 + d2 */ | ||
212 | smlabb r5, r12, r5, r8 /* r5 = tmp2 = z1 + z2 * 6270 */ | ||
213 | smlatb r7, r12, r7, r8 /* r7 = tmp0 = z1 - z3 * 15137 */ | ||
214 | sub r4, r5, r6 /* r4 = tmp12 >> 13 = d0 - d2 */ | ||
215 | add r6, r5, r9, lsl #13 /* r6 = o0 */ | ||
216 | rsb r9, r5, r9, lsl #13 /* r9 = o3 */ | ||
217 | add r5, r7, r4, lsl #13 /* r5 = o1 */ | ||
218 | rsb r4, r7, r4, lsl #13 /* r4 = o2 */ | ||
219 | mov r6, r6, asr #18 | ||
220 | mov r5, r5, asr #18 | ||
221 | mov r4, r4, asr #18 | ||
222 | mov r9, r9, asr #18 | ||
223 | cmp r6, #255 | ||
224 | mvnhi r6, r6, asr #31 | ||
225 | cmp r5, #255 | ||
226 | mvnhi r5, r5, asr #31 | ||
227 | cmp r4, #255 | ||
228 | mvnhi r4, r4, asr #31 | ||
229 | cmp r9, #255 | ||
230 | mvnhi r9, r9, asr #31 | ||
231 | #ifdef HAVE_LCD_COLOR | ||
232 | strb r6, [r1] | ||
233 | strb r5, [r1, #4] | ||
234 | strb r4, [r1, #8] | ||
235 | strb r9, [r1, #12] | ||
236 | #else | ||
237 | strb r6, [r1] | ||
238 | strb r5, [r1, #1] | ||
239 | strb r4, [r1, #2] | ||
240 | strb r9, [r1, #3] | ||
241 | #endif | ||
242 | add r0, r0, #16 | ||
243 | add r1, r1, r3 | ||
244 | teq r0, r2 | ||
245 | bne 1b | ||
246 | ldmia sp!, { r4-r10, pc } | ||
247 | #else | ||
248 | stmdb sp!, { r4-r9, lr } | ||
249 | ldr r9, =4433 | ||
250 | ldr r14, =4112 | ||
251 | ldr r12, =3302955134 | ||
252 | 1: | ||
253 | ldmia r0, { r4-r5 } | ||
254 | sadd16 r4, r4, r14 | ||
255 | sadd16 r6, r4, r5 /* r6lo = d0 + d2, r6hi = d1 + d3 */ | ||
256 | ssub16 r7, r4, r5 /* r7lo = d0 - d2 */ | ||
257 | smulbt r8, r9, r6 | ||
258 | sxth r6, r6 | ||
259 | smlabt r4, r12, r4, r8 /* r4 = tmp2 = z1 + z2 * 6270 */ | ||
260 | smlatt r5, r12, r5, r8 /* r5 = tmp0 = z1 - z3 * 15137 */ | ||
261 | sxth r7, r7 | ||
262 | add r8, r4, r6, lsl #13 /* r8 = o0 */ | ||
263 | rsb r6, r4, r6, lsl #13 /* r6 = o3 */ | ||
264 | add r4, r5, r7, lsl #13 /* r4 = o1 */ | ||
265 | rsb r5, r5, r7, lsl #13 /* r5 = o2 */ | ||
266 | usat r8, #8, r8, asr #18 | ||
267 | usat r6, #8, r6, asr #18 | ||
268 | usat r4, #8, r4, asr #18 | ||
269 | usat r5, #8, r5, asr #18 | ||
270 | #ifdef HAVE_LCD_COLOR | ||
271 | strb r8, [r1] | ||
272 | strb r6, [r1, #12] | ||
273 | strb r4, [r1, #4] | ||
274 | strb r5, [r1, #8] | ||
275 | #else | ||
276 | strb r8, [r1] | ||
277 | strb r6, [r1, #3] | ||
278 | strb r4, [r1, #1] | ||
279 | strb r5, [r1, #2] | ||
280 | #endif | ||
281 | add r0, r0, #16 | ||
282 | add r1, r1, r3 | ||
283 | teq r0, r2 | ||
284 | bne 1b | ||
285 | ldmia sp!, { r4-r9, pc } | ||
286 | #endif | ||
287 | .size jpeg_idct4h, .-jpeg_idct4h | ||