summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/recorder/jpeg_idct_arm.S162
1 files changed, 86 insertions, 76 deletions
diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S
index 01b08c4b5a..d84e5e7962 100644
--- a/apps/recorder/jpeg_idct_arm.S
+++ b/apps/recorder/jpeg_idct_arm.S
@@ -113,7 +113,11 @@ jpeg_idct2h:
113 results can not be stored merged. 113 results can not be stored merged.
114*/ 114*/
115 stmdb sp!, { r4-r5, lr } 115 stmdb sp!, { r4-r5, lr }
116#if ARM_ARCH < 5
116 ldr r14, =4112 117 ldr r14, =4112
118#else
119 ldrsh r14, .Lpool4+2
120#endif
1171: 1211:
118 ldrsh r12, [r0] 122 ldrsh r12, [r0]
119 ldrsh r4, [r0, #2] 123 ldrsh r4, [r0, #2]
@@ -140,7 +144,7 @@ jpeg_idct2h:
140 ldmia sp!, { r4-r5, pc } 144 ldmia sp!, { r4-r5, pc }
141#else 145#else
142 stmdb sp!, { r4, lr } 146 stmdb sp!, { r4, lr }
143 ldr r14, =4112 147 ldrsh r14, .Lpool4+2
1441: 1481:
145 ldr r12, [r0] 149 ldr r12, [r0]
146 sadd16 r12, r12, r14 150 sadd16 r12, r12, r14
@@ -198,27 +202,26 @@ jpeg_idct4v:
198 ldmia sp!, { r4-r7, pc } 202 ldmia sp!, { r4-r7, pc }
199#elif ARM_ARCH < 6 203#elif ARM_ARCH < 6
200 stmdb sp!, { r4-r8, lr } 204 stmdb sp!, { r4-r8, lr }
201 ldr r8, =1024 205 mov r8, #1024
202 ldr r14, =4433 206 ldrd r4, .Lpool4
203 ldr r12, =3302955134
2041: 2071:
205 ldrsh r5, [r0, #48] 208 ldrsh r14, [r0, #48]
206 ldrsh r3, [r0, #16] 209 ldrsh r3, [r0, #16]
207 ldrsh r4, [r0, #32] 210 ldrsh r12, [r0, #32]
208 ldrsh r2, [r0] 211 ldrsh r2, [r0]
209 add r6, r3, r5 /* r6 = z1 = d1 + d3 */ 212 add r6, r3, r14 /* r6 = z1 = d1 + d3 */
210 add r7, r2, r4 /* r7 = tmp10 >> 2 = d0 + d2 */ 213 add r7, r2, r12 /* r7 = tmp10 >> 2 = d0 + d2 */
211 smlabb r6, r14, r6, r8 /* z1 *= 4433 */ 214 smlabb r6, r5, r6, r8 /* z1 *= 4433 */
212 sub r2, r2, r4 /* r2 = tmp12 >> 2= d0 - d2 */ 215 sub r2, r2, r12 /* r2 = tmp12 >> 2= d0 - d2 */
213 smlabb r3, r12, r3, r6 /* r3 = tmp2 = z1 + z2 * 6270 */ 216 smlatb r3, r5, r3, r6 /* r3 = tmp2 = z1 + z2 * 6270 */
214 smlatb r5, r12, r5, r6 /* r5 = tmp0 = z1 - z3 * 15137 */ 217 smlabb r14, r4, r14, r6 /* r14 = tmp0 = z1 - z3 * 15137 */
215 mov r7, r7, lsl #2 218 mov r7, r7, lsl #2
216 mov r2, r2, lsl #2 219 mov r2, r2, lsl #2
217 add r4, r7, r3, asr #11 /* r4 = o0 */ 220 add r12, r7, r3, asr #11 /* r12 = o0 */
218 sub r7, r7, r3, asr #11 /* r7 = o3 */ 221 sub r7, r7, r3, asr #11 /* r7 = o3 */
219 add r3, r2, r5, asr #11 /* r3 = o1 */ 222 add r3, r2, r14, asr #11 /* r3 = o1 */
220 sub r2, r2, r5, asr #11 /* r2 = o2 */ 223 sub r2, r2, r14, asr #11 /* r2 = o2 */
221 strh r4, [r0] 224 strh r12, [r0]
222 strh r7, [r0, #48] 225 strh r7, [r0, #48]
223 strh r3, [r0, #16] 226 strh r3, [r0, #16]
224 strh r2, [r0, #32] 227 strh r2, [r0, #32]
@@ -228,9 +231,8 @@ jpeg_idct4v:
228 ldmia sp!, { r4-r8, pc } 231 ldmia sp!, { r4-r8, pc }
229#else 232#else
230 stmdb sp!, { r4-r10, lr } 233 stmdb sp!, { r4-r10, lr }
231 ldr r2, =1024 234 ldrd r2, .Lpool4
232 ldr r3, =4433 235 mov r12, #1024
233 ldr r12, =3302955134
2341: 2361:
235 ldr r6, [r0, #32] 237 ldr r6, [r0, #32]
236 ldr r4, [r0] 238 ldr r4, [r0]
@@ -247,12 +249,12 @@ jpeg_idct4v:
247 /* multiplication expands values beyond 16 bits, so this part needs to be 249 /* multiplication expands values beyond 16 bits, so this part needs to be
248 split. the values will be merged below so that the rest of the addition 250 split. the values will be merged below so that the rest of the addition
249 can be done in parallel */ 251 can be done in parallel */
250 smlabb r9, r3, r6, r2 /* r9 = z1[0] = (d1 * d3) * 4433 + 1024 */ 252 smlabb r9, r3, r6, r12 /* r9 = z1[0] = (d1 * d3) * 4433 + 1024 */
251 smlabt r6, r3, r6, r2 /* r6 = z1[1] = (d1 * d3) * 4433 + 1024 */ 253 smlabt r6, r3, r6, r12 /* r6 = z1[1] = (d1 * d3) * 4433 + 1024 */
252 smlabb r10, r12, r5, r9 /* r10 = tmp2[0] = z1 + d1 * 6270 */ 254 smlatb r10, r3, r5, r9 /* r10 = tmp2[0] = z1 + d1 * 6270 */
253 smlatb r14, r12, r7, r9 /* r14 = tmp0[0] = z1 - d3 * 15137 */ 255 smlabb r14, r2, r7, r9 /* r14 = tmp0[0] = z1 - d3 * 15137 */
254 smlabt r5, r12, r5, r6 /* r5 = tmp2[1] */ 256 smlatt r5, r3, r5, r6 /* r5 = tmp2[1] */
255 smlatt r6, r12, r7, r6 /* r6 = tmp0[1] */ 257 smlabt r6, r2, r7, r6 /* r6 = tmp0[1] */
256 mov r8, r8, lsl #2 /* complete the parallel shift started */ 258 mov r8, r8, lsl #2 /* complete the parallel shift started */
257 mov r4, r4, lsl #2 /* with the earlier bic instructions */ 259 mov r4, r4, lsl #2 /* with the earlier bic instructions */
258 /* tmp2 are in r10, r5; tmp0 are in r14, r6 */ 260 /* tmp2 are in r10, r5; tmp0 are in r14, r6 */
@@ -276,6 +278,17 @@ jpeg_idct4v:
276#endif 278#endif
277 .size jpeg_idct4v, .-jpeg_idct4v 279 .size jpeg_idct4v, .-jpeg_idct4v
278 280
281#if ARM_ARCH > 4
282 .align 4
283.Lpool4:
284 .short -15137
285 .short 4112
286 .short 4433
287 .short 6270
288
289 .align 2
290#endif
291
279jpeg_idct4h: 292jpeg_idct4h:
280#if ARM_ARCH < 5 293#if ARM_ARCH < 5
281 stmdb sp!, { r4-r10, lr } 294 stmdb sp!, { r4-r10, lr }
@@ -328,88 +341,85 @@ jpeg_idct4h:
328 cmp r0, r2 341 cmp r0, r2
329 bcc 1b 342 bcc 1b
330 ldmia sp!, { r4-r10, pc } 343 ldmia sp!, { r4-r10, pc }
331#elif ARM_ARCH < 6 344#elif ARM_ARCH < 6 || 1
332 stmdb sp!, { r4-r10, lr } 345 stmdb sp!, { r4-r9, lr }
333 ldr r10, =4433 346 ldrd r4, .Lpool4
334 ldr r14, =4112
335 ldr r12, =3302955134
3361: 3471:
337 ldrsh r7, [r0, #6] 348 ldrsh r7, [r0, #6]
338 ldrsh r5, [r0, #2] 349 ldrsh r14, [r0, #2]
339 ldrsh r4, [r0] 350 ldrsh r12, [r0]
340 ldrsh r6, [r0, #4] 351 ldrsh r6, [r0, #4]
341 add r8, r5, r7 /* r8 = z1 = d1 + d3 */ 352 add r8, r14, r7 /* r8 = z1 = d1 + d3 */
342 add r4, r4, r14 353 add r12, r12, r4, lsr #16
343 smulbb r8, r10, r8 /* z1 *= 4433 */ 354 smulbb r8, r5, r8 /* z1 *= 4433 */
344 add r9, r4, r6 /* r9 = tmp10 >> 13 = d0 + d2 */ 355 add r9, r12, r6 /* r9 = tmp10 >> 13 = d0 + d2 */
345 smlabb r5, r12, r5, r8 /* r5 = tmp2 = z1 + z2 * 6270 */ 356 smlatb r14, r5, r14, r8 /* r14= tmp2 = z1 + z2 * 6270 */
346 smlatb r7, r12, r7, r8 /* r7 = tmp0 = z1 - z3 * 15137 */ 357 smlabb r7, r4, r7, r8 /* r7 = tmp0 = z1 - z3 * 15137 */
347 sub r4, r4, r6 /* r4 = tmp12 >> 13 = d0 - d2 */ 358 sub r12, r12, r6 /* r12= tmp12 >> 13 = d0 - d2 */
348 add r6, r5, r9, lsl #13 /* r6 = o0 */ 359 add r6, r14, r9, lsl #13 /* r6 = o0 */
349 rsb r9, r5, r9, lsl #13 /* r9 = o3 */ 360 rsb r9, r14, r9, lsl #13 /* r9 = o3 */
350 add r5, r7, r4, lsl #13 /* r5 = o1 */ 361 add r14, r7, r12, lsl #13 /* r14= o1 */
351 rsb r4, r7, r4, lsl #13 /* r4 = o2 */ 362 rsb r12, r7, r12, lsl #13 /* r12= o2 */
352 mov r6, r6, asr #18 363 mov r6, r6, asr #18
353 mov r5, r5, asr #18 364 mov r14, r14, asr #18
354 mov r4, r4, asr #18 365 mov r12, r12, asr #18
355 mov r9, r9, asr #18 366 mov r9, r9, asr #18
356 cmp r6, #255 367 cmp r6, #255
357 mvnhi r6, r6, asr #31 368 mvnhi r6, r6, asr #31
358 cmp r5, #255 369 cmp r14, #255
359 mvnhi r5, r5, asr #31 370 mvnhi r14, r14, asr #31
360 cmp r4, #255 371 cmp r12, #255
361 mvnhi r4, r4, asr #31 372 mvnhi r12, r12, asr #31
362 cmp r9, #255 373 cmp r9, #255
363 mvnhi r9, r9, asr #31 374 mvnhi r9, r9, asr #31
364#ifdef HAVE_LCD_COLOR 375#ifdef HAVE_LCD_COLOR
365 strb r6, [r1] 376 strb r6, [r1]
366 strb r5, [r1, #4] 377 strb r14, [r1, #4]
367 strb r4, [r1, #8] 378 strb r12, [r1, #8]
368 strb r9, [r1, #12] 379 strb r9, [r1, #12]
369#else 380#else
370 strb r6, [r1] 381 strb r6, [r1]
371 strb r5, [r1, #1] 382 strb r14, [r1, #1]
372 strb r4, [r1, #2] 383 strb r12, [r1, #2]
373 strb r9, [r1, #3] 384 strb r9, [r1, #3]
374#endif 385#endif
375 add r0, r0, #16 386 add r0, r0, #16
376 add r1, r1, r3 387 add r1, r1, r3
377 cmp r0, r2 388 cmp r0, r2
378 bcc 1b 389 bcc 1b
379 ldmia sp!, { r4-r10, pc } 390 ldmia sp!, { r4-r9, pc }
380#else 391#else
381 stmdb sp!, { r4-r9, lr } 392 stmdb sp!, { r4-r9, lr }
382 ldr r9, =4433 393 ldrd r4, .Lpool4
383 ldr r14, =4112 394 mov r9, r4, lsr #16
384 ldr r12, =3302955134
3851: 3951:
386 ldmia r0, { r4-r5 } 396 ldmia r0, { r12, r14 }
387 sadd16 r4, r4, r14 397 sadd16 r12, r12, r9
388 sadd16 r6, r4, r5 /* r6lo = d0 + d2, r6hi = d1 + d3 */ 398 sadd16 r6, r12, r14 /* r6lo = d0 + d2, r6hi = d1 + d3 */
389 ssub16 r7, r4, r5 /* r7lo = d0 - d2 */ 399 ssub16 r7, r12, r14 /* r7lo = d0 - d2 */
390 smulbt r8, r9, r6 400 smulbt r8, r5, r6
391 sxth r6, r6 401 sxth r6, r6
392 smlabt r4, r12, r4, r8 /* r4 = tmp2 = z1 + z2 * 6270 */ 402 smlatt r12, r5, r12, r8 /* r12= tmp2 = z1 + z2 * 6270 */
393 smlatt r5, r12, r5, r8 /* r5 = tmp0 = z1 - z3 * 15137 */ 403 smlabt r14, r4, r14, r8 /* r14= tmp0 = z1 - z3 * 15137 */
394 sxth r7, r7 404 sxth r7, r7
395 add r8, r4, r6, lsl #13 /* r8 = o0 */ 405 add r8, r12, r6, lsl #13 /* r8 = o0 */
396 rsb r6, r4, r6, lsl #13 /* r6 = o3 */ 406 rsb r6, r12, r6, lsl #13 /* r6 = o3 */
397 add r4, r5, r7, lsl #13 /* r4 = o1 */ 407 add r12, r14, r7, lsl #13 /* r12= o1 */
398 rsb r5, r5, r7, lsl #13 /* r5 = o2 */ 408 rsb r14, r14, r7, lsl #13 /* r14= o2 */
399 usat r8, #8, r8, asr #18 409 usat r8, #8, r8, asr #18
400 usat r6, #8, r6, asr #18 410 usat r6, #8, r6, asr #18
401 usat r4, #8, r4, asr #18 411 usat r12, #8, r12, asr #18
402 usat r5, #8, r5, asr #18 412 usat r14, #8, r14, asr #18
403#ifdef HAVE_LCD_COLOR 413#ifdef HAVE_LCD_COLOR
404 strb r8, [r1] 414 strb r8, [r1]
405 strb r6, [r1, #12] 415 strb r6, [r1, #12]
406 strb r4, [r1, #4] 416 strb r12, [r1, #4]
407 strb r5, [r1, #8] 417 strb r14, [r1, #8]
408#else 418#else
409 strb r8, [r1] 419 strb r8, [r1]
410 strb r6, [r1, #3] 420 strb r6, [r1, #3]
411 strb r4, [r1, #1] 421 strb r12, [r1, #1]
412 strb r5, [r1, #2] 422 strb r14, [r1, #2]
413#endif 423#endif
414 add r0, r0, #16 424 add r0, r0, #16
415 add r1, r1, r3 425 add r1, r1, r3
@@ -450,7 +460,7 @@ jpeg_idct8v:
450 mov r11, r11, asr #16 /* r11 = z3 = d6 */ 460 mov r11, r11, asr #16 /* r11 = z3 = d6 */
451 add r8, r8, #8192 461 add r8, r8, #8192
452 add r9, r10, r11 462 add r9, r10, r11
453 mov r8, r8, asr #3 /* r8 = z4 = (d0 + 4112) << 13 */ 463 mov r8, r8, asr #3 /* r8 = z4 = (d0 << 13) + 1024 */
454 mul r9, r14, r9 /* r9 = z1 = (z2 + z3) * 4433 */ 464 mul r9, r14, r9 /* r9 = z1 = (z2 + z3) * 4433 */
455 ldr r14, =6270 465 ldr r14, =6270
456 mla r11, r12, r11, r9 /* r11 = tmp2 = z1 - z3 * 15137 */ 466 mla r11, r12, r11, r9 /* r11 = tmp2 = z1 - z3 * 15137 */