summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/recorder/jpeg_load.c144
1 files changed, 141 insertions, 3 deletions
diff --git a/apps/recorder/jpeg_load.c b/apps/recorder/jpeg_load.c
index de5c95eab7..f5e0ea2cf3 100644
--- a/apps/recorder/jpeg_load.c
+++ b/apps/recorder/jpeg_load.c
@@ -253,6 +253,11 @@ INLINE unsigned range_limit(int value)
253#define BUFAC 227 253#define BUFAC 227
254#define COMPONENT_SHIFT 15 254#define COMPONENT_SHIFT 15
255 255
256/* Some of the below have inline ASM optimizations of the loop contents. To
257 make comparison with the C versions easier, the C variable names are used
258 in comments whenever intermediate values are labeled.
259*/
260
256/* horizontal-pass 1-point IDCT */ 261/* horizontal-pass 1-point IDCT */
257static void idct1h(int16_t *ws, unsigned char *out, int rows, int rowstep) 262static void idct1h(int16_t *ws, unsigned char *out, int rows, int rowstep)
258{ 263{
@@ -303,6 +308,64 @@ static void idct4v(int16_t *ws, int cols)
303 int col; 308 int col;
304 for (col = 0; col < cols; col++, ws++) 309 for (col = 0; col < cols; col++, ws++)
305 { 310 {
311#if defined(CPU_ARM)
312 int t0, t1, t2, t3, t4;
313#if ARM_ARCH <= 4
314 int t5;
315#endif
316 asm volatile(
317 "ldrsh %[t4], [%[ws]]\n\t" /* t4 = tmp0 (ws[8*0]) */
318 "ldrsh %[t1], [%[ws], #32]\n\t" /* t1 = tmp2 (ws[8*2]) */
319 "ldrsh %[t2], [%[ws], #16]\n\t" /* t2 = z2 (ws[8*1]) */
320 "add %[t0], %[t4], %[t1]\n\t" /* t0 = tmp10 >> 2
321 (tmp0 + tmp2) */
322 "sub %[t1], %[t4], %[t1]\n\t" /* t1 = tmp12 >> 2
323 (tmp0 - tmp2) */
324 "ldrsh %[t3], [%[ws], #48]\n\t" /* t3 = z3 (ws[8*3] */
325 "add %[t4], %[t2], %[t3]\n\t" /* t4 = z2 + z3 */
326#if ARM_ARCH > 4
327 "smulbb %[t4], %[c1], %[t4]\n\t"
328 "add %[t4], %[t4], #1024\n\t" /* t4 = z1 */
329 "smlatb %[t3], %[c2c3], %[t3], %[t4]\n\t"
330 "smlabb %[t2], %[c2c3], %[t2], %[t4]\n\t"
331 "mov %[t3], %[t3], asr #11\n\t" /* t3 = tmp0 */
332 "mov %[t2], %[t2], asr #11\n\t" /* t2 = tmp2 */
333#else
334 "add %[t5], %[t4], %[t4], lsl #3\n\t"
335 "rsb %[t4], %[t4], %[t5], lsl #4\n\t"
336 "rsb %[t4], %[t4], %[t4], lsl #5\n\t"
337 "add %[t4], %[t4], #1024\n\t" /*z1*/
338 "mla %[t3], %[c2], %[t3], %[t4]\n\t"
339 "mla %[t2], %[c3], %[t2], %[t4]\n\t"
340 "mov %[t3], %[t3], asr #11\n\t" /* t3 = tmp0 */
341 "mov %[t2], %[t2], asr #11\n\t" /* t2 = tmp2 */
342#endif
343 "add %[t4], %[t2], %[t0], lsl #2\n\t" /* t4 = tmp10 + tmp2 */
344 "rsb %[t0], %[t2], %[t0], lsl #2\n\t" /* t0 = tmp10 - tmp2 */
345 "add %[t2], %[t3], %[t1], lsl #2\n\t" /* t2 = tmp12 + tmp0 */
346 "rsb %[t3], %[t3], %[t1], lsl #2\n\t" /* t3 = tmp12 - tmp0 */
347 "strh %[t4], [%[ws]]\n\t"
348 "strh %[t0], [%[ws], #48]\n\t"
349 "strh %[t2], [%[ws], #16]\n\t"
350 "strh %[t3], [%[ws], #32]\n\t"
351 : [t0] "=&r" (t0),
352 [t1] "=&r" (t1),
353 [t2] "=&r" (t2),
354 [t3] "=&r" (t3),
355 [t4] "=&r" (t4)
356#if ARM_ARCH <= 4
357 ,[t5] "=&r" (t5)
358#endif
359 : [ws] "r" (ws),
360#if ARM_ARCH > 4
361 [c1] "r" (FIX_0_541196100),
362 [c2c3] "r" (((-FIX_1_847759065)<<16)|FIX_0_765366865)
363#else
364 [c2] "r" (-FIX_1_847759065),
365 [c3] "r" (FIX_0_765366865)
366#endif
367 );
368#else
306 int tmp0, tmp2, tmp10, tmp12; 369 int tmp0, tmp2, tmp10, tmp12;
307 int z1, z2, z3; 370 int z1, z2, z3;
308 /* Even part */ 371 /* Even part */
@@ -332,17 +395,91 @@ static void idct4v(int16_t *ws, int cols)
332 ws[8*3] = (int) (tmp10 - tmp2); 395 ws[8*3] = (int) (tmp10 - tmp2);
333 ws[8*1] = (int) (tmp12 + tmp0); 396 ws[8*1] = (int) (tmp12 + tmp0);
334 ws[8*2] = (int) (tmp12 - tmp0); 397 ws[8*2] = (int) (tmp12 - tmp0);
398#endif
335 } 399 }
336} 400}
337 401
338/* horizontal-pass 4-point IDCT */ 402/* horizontal-pass 4-point IDCT */
339static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep) 403static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep)
340{ 404{
341 int tmp0, tmp2, tmp10, tmp12;
342 int z1, z2, z3;
343 int row; 405 int row;
344 for (row = 0; row < rows; row++, out += rowstep, ws += 8) 406 for (row = 0; row < rows; row++, out += rowstep, ws += 8)
345 { 407 {
408#if defined(CPU_ARM)
409 int t0, t1, t2, t3, t4;
410#if ARM_ARCH <= 4
411 int t5;
412#endif
413 asm volatile(
414 "ldrsh %[t4], [%[ws]]\n\t" /* t4 = tmp0 (ws[0]) */
415 "ldrsh %[t1], [%[ws], #4]\n\t" /* t1 = tmp2 (ws[2]) */
416 "add %[t4], %[t4], #16\n\t" /* add rounding to DC */
417 "add %[t4], %[t4], #4096\n\t" /* pre-add offset */
418 "ldrsh %[t2], [%[ws], #2]\n\t" /* t2 = z2 (ws[1]) */
419 "add %[t0], %[t4], %[t1]\n\t" /* t0 = tmp10 >> 13
420 (tmp0 + tmp2) */
421 "sub %[t1], %[t4], %[t1]\n\t" /* t1 = tmp12 >> 13
422 (tmp0 - tmp2) */
423 "ldrsh %[t3], [%[ws], #6]\n\t" /* t3 = z3 (ws[3] */
424 "add %[t4], %[t2], %[t3]\n\t" /* t4 = z2 + z3 */
425#if ARM_ARCH > 4
426 "smulbb %[t4], %[c1], %[t4]\n\t"
427 "smlatb %[t3], %[c2c3], %[t3], %[t4]\n\t"
428 "smlabb %[t2], %[c2c3], %[t2], %[t4]\n\t"
429#else
430 "add %[t5], %[t4], %[t4], lsl #3\n\t"
431 "rsb %[t4], %[t4], %[t5], lsl #4\n\t"
432 "rsb %[t4], %[t4], %[t4], lsl #5\n\t" /* t4 = z1 */
433 "mla %[t3], %[c2], %[t3], %[t4]\n\t"
434 "mla %[t2], %[c3], %[t2], %[t4]\n\t"
435#endif
436 "add %[t4], %[t2], %[t0], lsl #13\n\t" /* t4 = tmp10 + tmp2 */
437 "rsb %[t0], %[t2], %[t0], lsl #13\n\t" /* t0 = tmp10 - tmp2 */
438 "add %[t2], %[t3], %[t1], lsl #13\n\t" /* t2 = tmp12 + tmp0 */
439 "rsb %[t3], %[t3], %[t1], lsl #13\n\t" /* t3 = tmp12 - tmp0 */
440 "mov %[t4], %[t4], asr #18\n\t" /* descale results */
441 "mov %[t0], %[t0], asr #18\n\t"
442 "mov %[t2], %[t2], asr #18\n\t"
443 "mov %[t3], %[t3], asr #18\n\t"
444 "cmp %[t4], #255\n\t" /* range limit results */
445 "mvnhi %[t4], %[t4], asr #31\n\t"
446 "cmp %[t0], #255\n\t"
447 "mvnhi %[t0], %[t0], asr #31\n\t"
448 "cmp %[t2], #255\n\t"
449 "mvnhi %[t2], %[t2], asr #31\n\t"
450 "cmp %[t3], #255\n\t"
451 "mvnhi %[t3], %[t3], asr #31\n\t"
452 "cmp %[t4], #255\n\t"
453 "mvnhi %[t4], %[t4], asr #31\n\t"
454 "strb %[t4], [%[out]]\n\t"
455 "strb %[t0], [%[out], %[o3]]\n\t"
456 "strb %[t2], [%[out], %[o1]]\n\t"
457 "strb %[t3], [%[out], %[o2]]\n\t"
458 : [t0] "=&r" (t0),
459 [t1] "=&r" (t1),
460 [t2] "=&r" (t2),
461 [t3] "=&r" (t3),
462 [t4] "=&r" (t4)
463#if ARM_ARCH <= 4
464
465 ,[t5] "=&r" (t5)
466#endif
467 : [ws] "r" (ws),
468 [out] "r" (out),
469 [o1] "i" (JPEG_PIX_SZ),
470 [o2] "i" (JPEG_PIX_SZ*2),
471 [o3] "i" (JPEG_PIX_SZ*3),
472#if ARM_ARCH > 4
473 [c1] "r" (FIX_0_541196100),
474 [c2c3] "r" (((-FIX_1_847759065)<<16)|FIX_0_765366865)
475#else
476 [c2] "r" (-FIX_1_847759065),
477 [c3] "r" (FIX_0_765366865)
478#endif
479 );
480#else
481 int tmp0, tmp2, tmp10, tmp12;
482 int z1, z2, z3;
346 /* Even part */ 483 /* Even part */
347 484
348 tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2) 485 tmp0 = (int) ws[0] + (ONE << (PASS1_BITS + 2)
@@ -359,7 +496,7 @@ static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep)
359 z3 = (int) ws[3]; 496 z3 = (int) ws[3];
360 497
361 z1 = MULTIPLY16(z2 + z3, FIX_0_541196100); 498 z1 = MULTIPLY16(z2 + z3, FIX_0_541196100);
362 tmp0 = z1 + MULTIPLY16(z3, - FIX_1_847759065); 499 tmp0 = z1 - MULTIPLY16(z3, FIX_1_847759065);
363 tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865); 500 tmp2 = z1 + MULTIPLY16(z2, FIX_0_765366865);
364 501
365 /* Final output stage */ 502 /* Final output stage */
@@ -372,6 +509,7 @@ static void idct4h(int16_t *ws, unsigned char *out, int rows, int rowstep)
372 DS_OUT)); 509 DS_OUT));
373 out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp0, 510 out[JPEG_PIX_SZ*2] = range_limit((int) RIGHT_SHIFT(tmp12 - tmp0,
374 DS_OUT)); 511 DS_OUT));
512#endif
375 } 513 }
376} 514}
377 515