diff options
author | Andrew Mahone <andrew.mahone@gmail.com> | 2009-07-02 09:57:03 +0000 |
---|---|---|
committer | Andrew Mahone <andrew.mahone@gmail.com> | 2009-07-02 09:57:03 +0000 |
commit | 017c1a1027627e601cc5c22e43e42e1735835259 (patch) | |
tree | 1025d3aed96f33b48751bd276a5cc506e11856e0 /apps | |
parent | 293b499093baef544f7148a0fcfa18d28ed3d1ea (diff) | |
download | rockbox-017c1a1027627e601cc5c22e43e42e1735835259.tar.gz rockbox-017c1a1027627e601cc5c22e43e42e1735835259.zip |
Core JPEG IDCT8 optimizations for ARMv5+, small optimizations for ARMv4.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21612 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/recorder/jpeg_idct_arm.S | 247 |
1 files changed, 233 insertions, 14 deletions
diff --git a/apps/recorder/jpeg_idct_arm.S b/apps/recorder/jpeg_idct_arm.S index d84e5e7962..46ac479caa 100644 --- a/apps/recorder/jpeg_idct_arm.S +++ b/apps/recorder/jpeg_idct_arm.S | |||
@@ -434,6 +434,7 @@ jpeg_idct8v: | |||
434 | add r2, r0, #128 | 434 | add r2, r0, #128 |
435 | 1: | 435 | 1: |
436 | ldmia r0!, { r4-r7 } | 436 | ldmia r0!, { r4-r7 } |
437 | #if ARM_ARCH < 5 | ||
437 | mov r8, r4, lsl #16 | 438 | mov r8, r4, lsl #16 |
438 | orrs r9, r6, r7 | 439 | orrs r9, r6, r7 |
439 | orreqs r9, r5, r4, lsr #16 | 440 | orreqs r9, r5, r4, lsr #16 |
@@ -528,25 +529,125 @@ jpeg_idct8v: | |||
528 | strh r11, [r2, #80] | 529 | strh r11, [r2, #80] |
529 | strh r5, [r2, #96] | 530 | strh r5, [r2, #96] |
530 | strh r14, [r2, #112] | 531 | strh r14, [r2, #112] |
532 | #else /* ARMv5+ */ | ||
533 | mov r12, r4, lsl #16 | ||
534 | orrs r9, r6, r7 | ||
535 | orreqs r9, r5, r4, lsr #16 | ||
536 | bne 2f | ||
537 | mov r12, r12, asr #14 | ||
538 | strh r12, [r2] | ||
539 | strh r12, [r2, #16] | ||
540 | strh r12, [r2, #32] | ||
541 | strh r12, [r2, #48] | ||
542 | strh r12, [r2, #64] | ||
543 | strh r12, [r2, #80] | ||
544 | strh r12, [r2, #96] | ||
545 | strh r12, [r2, #112] | ||
546 | add r2, r2, #2 | ||
547 | cmp r0, r1 | ||
548 | bcc 1b | ||
549 | ldmia sp!, { r4-r11, pc } | ||
550 | 2: | ||
551 | ldrd r8, .Lpool8 | ||
552 | add r12, r12, #8192 | ||
553 | add r10, r5, r7 /* r10[15:0] = d2 + d6 */ | ||
554 | sub r14, r12, r6, lsl #16 /* r14 = tmp1 << 3 = (d0 - d4) << 16 */ | ||
555 | smulbb r10, r8, r10 /* r10 = z1 = (d2 + d6) * 4433 */ | ||
556 | add r12, r12, r6, lsl #16 /* r12 = tmp0 << 3= (d0 + d4) << 16 */ | ||
557 | smlatb r11, r8, r7, r10 /* r11 = tmp2 = z1 - d6 * 15137 */ | ||
558 | smlabb r10, r9, r5, r10 /* r10 = tmp3 = z1 + d2 * 6270 */ | ||
559 | add r8, r11, r14, asr #3 /* r8 = tmp11 */ | ||
560 | rsb r11, r11, r14, asr #3 /* r11 = tmp12 */ | ||
561 | add r14, r10, r12, asr #3 /* r14 = tmp10 */ | ||
562 | rsb r12, r10, r12, asr #3 /* r12 = tmp13 */ | ||
563 | stmdb sp, { r8, r11, r12, r14 }/* tmp11 tmp12 tmp13 tmp10 */ | ||
564 | mov r6, r6, asr #16 /* r6 = tmp1 = d5 */ | ||
565 | mov r7, r7, asr #16 /* r7 = tmp0 = d7 */ | ||
566 | add r12, r6, r4, asr #16 /* r12 = z4 = tmp1 + tmp3 */ | ||
567 | add r14, r7, r5, asr #16 /* r14 = z3 = tmp0 + tmp2 */ | ||
568 | add r8, r12, r14 /* r8 = z3 + z4 */ | ||
569 | ldrd r10, .Lpool8+8 | ||
570 | smultb r8, r9, r8 /* r8 = z5 = (z3 + z4) * 9633 */ | ||
571 | add r9, r7, r4, asr #16 /* r9 = z1 = tmp0 + tmp3 */ | ||
572 | smlabb r14, r10, r14, r8 /* r14 = z3 = z5 - z3 * 16069 */ | ||
573 | smlatb r12, r10, r12, r8 /* r12 = z4 = z5 - z4 * 3196 */ | ||
574 | smlabb r8, r11, r9, r14 /* r8 = z3 - z1 * 7373 */ | ||
575 | smlabb r9, r11, r9, r12 /* r9 = z4 - z1 * 7373 */ | ||
576 | add r10, r6, r5, asr #16 /* r10 = z2 = tmp1 + tmp2 */ | ||
577 | smlatb r12, r11, r10, r12 /* r12 = z4 - z2 * 20995 */ | ||
578 | smlatb r14, r11, r10, r14 /* r14 = z3 - z2 * 20995 */ | ||
579 | ldrd r10, .Lpool8+16 | ||
580 | smlabb r7, r10, r7, r8 /* r7 = tmp0 */ | ||
581 | smlatt r4, r10, r4, r9 /* r4 = tmp3 */ | ||
582 | smlabb r6, r11, r6, r12 /* r6 = tmp1 */ | ||
583 | smlatt r5, r11, r5, r14 /* r5 = tmp2 */ | ||
584 | ldmdb sp, { r8-r11 } /* tmp11 tmp12 tmp13 tmp10 */ | ||
585 | add r12, r8, r5 /* o1 */ | ||
586 | sub r14, r8, r5 /* o6 */ | ||
587 | add r8, r9, r6 /* o2 */ | ||
588 | sub r9, r9, r6 /* o5 */ | ||
589 | add r6, r10, r7 /* o3 */ | ||
590 | sub r7, r10, r7 /* o4 */ | ||
591 | add r10, r11, r4 /* o0 */ | ||
592 | sub r11, r11, r4 /* o7 */ | ||
593 | mov r12, r12, asr #11 | ||
594 | mov r14, r14, asr #11 | ||
595 | mov r8, r8, asr #11 | ||
596 | mov r9, r9, asr #11 | ||
597 | mov r6, r6, asr #11 | ||
598 | mov r7, r7, asr #11 | ||
599 | mov r10, r10, asr #11 | ||
600 | mov r11, r11, asr #11 | ||
601 | strh r10, [r2] | ||
602 | strh r12, [r2, #16] | ||
603 | strh r8, [r2, #32] | ||
604 | strh r6, [r2, #48] | ||
605 | strh r7, [r2, #64] | ||
606 | strh r9, [r2, #80] | ||
607 | strh r14, [r2, #96] | ||
608 | strh r11, [r2, #112] | ||
609 | #endif | ||
531 | cmp r0, r1 | 610 | cmp r0, r1 |
532 | add r2, r2, #2 | 611 | add r2, r2, #2 |
533 | bcc 1b | 612 | bcc 1b |
534 | ldmia sp!, { r4-r11, pc } | 613 | ldmia sp!, { r4-r11, pc } |
535 | .size jpeg_idct8v, .-jpeg_idct8v | 614 | .size jpeg_idct8v, .-jpeg_idct8v |
536 | 615 | ||
616 | #if ARM_ARCH > 4 | ||
617 | .align 4 | ||
618 | .Lpool8: | ||
619 | .short 4433 | ||
620 | .short -15137 | ||
621 | .short 6270 | ||
622 | .short 9633 | ||
623 | .short -16069 | ||
624 | .short -3196 | ||
625 | .short -7373 | ||
626 | .short -20995 | ||
627 | .short 2446 | ||
628 | .short 12299 | ||
629 | .short 16819 | ||
630 | .short 25172 | ||
631 | .align 2 | ||
632 | #endif | ||
633 | |||
537 | jpeg_idct8h: | 634 | jpeg_idct8h: |
538 | stmdb sp!, { r4-r11, lr } | 635 | stmdb sp!, { r4-r11, lr } |
539 | 1: | 636 | 1: |
540 | ldmia r0!, { r4-r7 } | 637 | ldmia r0!, { r4-r7 } |
541 | ldr r14, =4112 | 638 | ldr r14, =(4112<<16) |
542 | mov r8, r4, lsl #16 | 639 | #if ARM_ARCH < 5 |
543 | add r8, r8, r14, lsl #16 | 640 | add r8, r14, r4, lsl #16 |
544 | orrs r9, r6, r7 | 641 | orrs r9, r6, r7 |
545 | orreqs r9, r5, r4, lsr #16 | 642 | orreqs r9, r5, r4, lsr #16 |
546 | bne 2f | 643 | bne 2f |
644 | #if ARM_ARCH < 6 | ||
547 | mov r8, r8, asr #21 | 645 | mov r8, r8, asr #21 |
548 | cmp r8, #255 | 646 | cmp r8, #255 |
549 | mvnhi r8, r8, asr #31 | 647 | mvnhi r8, r8, asr #31 |
648 | #else | ||
649 | usat r8, #8, r8, asr #21 | ||
650 | #endif | ||
550 | #ifdef HAVE_LCD_COLOR | 651 | #ifdef HAVE_LCD_COLOR |
551 | strb r8, [r1] | 652 | strb r8, [r1] |
552 | strb r8, [r1, #4] | 653 | strb r8, [r1, #4] |
@@ -630,7 +731,6 @@ jpeg_idct8h: | |||
630 | add r10, r11, r6 /* o2 */ | 731 | add r10, r11, r6 /* o2 */ |
631 | sub r11, r11, r6 /* o5 */ | 732 | sub r11, r11, r6 /* o5 */ |
632 | /* output in order: r12 r4 r10 r8 r9 r11 r5 r14 */ | 733 | /* output in order: r12 r4 r10 r8 r9 r11 r5 r14 */ |
633 | #if ARM_ARCH < 6 | ||
634 | mov r12, r12, asr #18 | 734 | mov r12, r12, asr #18 |
635 | cmp r12, #255 | 735 | cmp r12, #255 |
636 | mvnhi r12, r12, asr #31 | 736 | mvnhi r12, r12, asr #31 |
@@ -655,16 +755,6 @@ jpeg_idct8h: | |||
655 | mov r14, r14, asr #18 | 755 | mov r14, r14, asr #18 |
656 | cmp r14, #255 | 756 | cmp r14, #255 |
657 | mvnhi r14, r14, asr #31 | 757 | mvnhi r14, r14, asr #31 |
658 | #else | ||
659 | usat r12, #8, r12, asr #18 | ||
660 | usat r4, #8, r4, asr #18 | ||
661 | usat r10, #8, r10, asr #18 | ||
662 | usat r8, #8, r8, asr #18 | ||
663 | usat r9, #8, r9, asr #18 | ||
664 | usat r11, #8, r11, asr #18 | ||
665 | usat r5, #8, r5, asr #18 | ||
666 | usat r14, #8, r14, asr #18 | ||
667 | #endif | ||
668 | #ifdef HAVE_LCD_COLOR | 758 | #ifdef HAVE_LCD_COLOR |
669 | strb r12, [r1] | 759 | strb r12, [r1] |
670 | strb r4, [r1, #4] | 760 | strb r4, [r1, #4] |
@@ -684,6 +774,135 @@ jpeg_idct8h: | |||
684 | strb r5, [r1, #6] | 774 | strb r5, [r1, #6] |
685 | strb r14, [r1, #7] | 775 | strb r14, [r1, #7] |
686 | #endif | 776 | #endif |
777 | #else /* ARMv5+ */ | ||
778 | add r12, r14, r4, lsl #16 | ||
779 | orrs r9, r6, r7 | ||
780 | orreqs r9, r5, r4, lsr #16 | ||
781 | bne 2f | ||
782 | mov r12, r12, asr #21 | ||
783 | cmp r12, #255 | ||
784 | mvnhi r12, r12, asr #31 | ||
785 | #ifdef HAVE_LCD_COLOR | ||
786 | strb r12, [r1] | ||
787 | strb r12, [r1, #4] | ||
788 | strb r12, [r1, #8] | ||
789 | strb r12, [r1, #12] | ||
790 | strb r12, [r1, #16] | ||
791 | strb r12, [r1, #20] | ||
792 | strb r12, [r1, #24] | ||
793 | strb r12, [r1, #28] | ||
794 | #else | ||
795 | strb r12, [r1] | ||
796 | strb r12, [r1, #1] | ||
797 | strb r12, [r1, #2] | ||
798 | strb r12, [r1, #3] | ||
799 | strb r12, [r1, #4] | ||
800 | strb r12, [r1, #5] | ||
801 | strb r12, [r1, #6] | ||
802 | strb r12, [r1, #7] | ||
803 | #endif | ||
804 | add r1, r1, r3 | ||
805 | cmp r0, r2 | ||
806 | bcc 1b | ||
807 | ldmia sp!, { r4-r11, pc } | ||
808 | 2: | ||
809 | ldrd r8, .Lpool8 | ||
810 | add r10, r5, r7 /* r10[15:0] = d2 + d6 */ | ||
811 | sub r14, r12, r6, lsl #16 /* r14 = tmp1 << 3 = (d0 - d4) << 16 */ | ||
812 | smulbb r10, r8, r10 /* r10 = z1 = (d2 + d6) * 4433 */ | ||
813 | add r12, r12, r6, lsl #16 /* r12 = tmp0 << 3= (d0 + d4) << 16 */ | ||
814 | smlatb r11, r8, r7, r10 /* r11 = tmp2 = z1 - d6 * 15137 */ | ||
815 | smlabb r10, r9, r5, r10 /* r10 = tmp3 = z1 + d2 * 6270 */ | ||
816 | add r8, r11, r14, asr #3 /* r8 = tmp11 */ | ||
817 | rsb r11, r11, r14, asr #3 /* r11 = tmp12 */ | ||
818 | add r14, r10, r12, asr #3 /* r14 = tmp10 */ | ||
819 | rsb r12, r10, r12, asr #3 /* r12 = tmp13 */ | ||
820 | stmdb sp, { r8, r11, r12, r14 }/* tmp11 tmp12 tmp13 tmp10 */ | ||
821 | mov r6, r6, asr #16 /* r6 = tmp1 = d5 */ | ||
822 | mov r7, r7, asr #16 /* r7 = tmp0 = d7 */ | ||
823 | add r12, r6, r4, asr #16 /* r12 = z4 = tmp1 + tmp3 */ | ||
824 | add r14, r7, r5, asr #16 /* r14 = z3 = tmp0 + tmp2 */ | ||
825 | add r8, r12, r14 /* r8 = z3 + z4 */ | ||
826 | ldrd r10, .Lpool8+8 | ||
827 | smultb r8, r9, r8 /* r8 = z5 = (z3 + z4) * 9633 */ | ||
828 | add r9, r7, r4, asr #16 /* r9 = z1 = tmp0 + tmp3 */ | ||
829 | smlabb r14, r10, r14, r8 /* r14 = z3 = z5 - z3 * 16069 */ | ||
830 | smlatb r12, r10, r12, r8 /* r12 = z4 = z5 - z4 * 3196 */ | ||
831 | smlabb r8, r11, r9, r14 /* r8 = z3 - z1 * 7373 */ | ||
832 | smlabb r9, r11, r9, r12 /* r9 = z4 - z1 * 7373 */ | ||
833 | add r10, r6, r5, asr #16 /* r10 = z2 = tmp1 + tmp2 */ | ||
834 | smlatb r12, r11, r10, r12 /* r12 = z4 - z2 * 20995 */ | ||
835 | smlatb r14, r11, r10, r14 /* r14 = z3 - z2 * 20995 */ | ||
836 | ldrd r10, .Lpool8+16 | ||
837 | smlabb r7, r10, r7, r8 /* r7 = tmp0 */ | ||
838 | smlatt r4, r10, r4, r9 /* r4 = tmp3 */ | ||
839 | smlabb r6, r11, r6, r12 /* r6 = tmp1 */ | ||
840 | smlatt r5, r11, r5, r14 /* r5 = tmp2 */ | ||
841 | ldmdb sp, { r8-r11 } /* tmp11 tmp12 tmp13 tmp10 */ | ||
842 | add r12, r8, r5 /* o1 */ | ||
843 | sub r14, r8, r5 /* o6 */ | ||
844 | add r8, r9, r6 /* o2 */ | ||
845 | sub r9, r9, r6 /* o5 */ | ||
846 | add r6, r10, r7 /* o3 */ | ||
847 | sub r7, r10, r7 /* o4 */ | ||
848 | add r10, r11, r4 /* o0 */ | ||
849 | sub r11, r11, r4 /* o7 */ | ||
850 | /* output in order: r10 r12 r8 r6 r7 r9 r14 r11 */ | ||
851 | #if ARM_ARCH < 6 | ||
852 | mov r10, r10, asr #18 | ||
853 | cmp r10, #255 | ||
854 | mvnhi r10, r10, asr #31 | ||
855 | mov r12, r12, asr #18 | ||
856 | cmp r12, #255 | ||
857 | mvnhi r12, r12, asr #31 | ||
858 | mov r8, r8, asr #18 | ||
859 | cmp r8, #255 | ||
860 | mvnhi r8, r8, asr #31 | ||
861 | mov r6, r6, asr #18 | ||
862 | cmp r6, #255 | ||
863 | mvnhi r6, r6, asr #31 | ||
864 | mov r7, r7, asr #18 | ||
865 | cmp r7, #255 | ||
866 | mvnhi r7, r7, asr #31 | ||
867 | mov r9, r9, asr #18 | ||
868 | cmp r9, #255 | ||
869 | mvnhi r9, r9, asr #31 | ||
870 | mov r14, r14, asr #18 | ||
871 | cmp r14, #255 | ||
872 | mvnhi r14, r14, asr #31 | ||
873 | mov r11, r11, asr #18 | ||
874 | cmp r11, #255 | ||
875 | mvnhi r11, r11, asr #31 | ||
876 | #else | ||
877 | usat r10, #8, r10, asr #18 | ||
878 | usat r12, #8, r12, asr #18 | ||
879 | usat r8, #8, r8, asr #18 | ||
880 | usat r6, #8, r6, asr #18 | ||
881 | usat r7, #8, r7, asr #18 | ||
882 | usat r9, #8, r9, asr #18 | ||
883 | usat r14, #8, r14, asr #18 | ||
884 | usat r11, #8, r11, asr #18 | ||
885 | #endif | ||
886 | #ifdef HAVE_LCD_COLOR | ||
887 | strb r10, [r1] | ||
888 | strb r12, [r1, #4] | ||
889 | strb r8, [r1, #8] | ||
890 | strb r6, [r1, #12] | ||
891 | strb r7, [r1, #16] | ||
892 | strb r9, [r1, #20] | ||
893 | strb r14, [r1, #24] | ||
894 | strb r11, [r1, #28] | ||
895 | #else | ||
896 | strb r10, [r1] | ||
897 | strb r12, [r1, #1] | ||
898 | strb r8, [r1, #2] | ||
899 | strb r6, [r1, #3] | ||
900 | strb r7, [r1, #4] | ||
901 | strb r9, [r1, #5] | ||
902 | strb r14, [r1, #6] | ||
903 | strb r11, [r1, #7] | ||
904 | #endif | ||
905 | #endif | ||
687 | add r1, r1, r3 | 906 | add r1, r1, r3 |
688 | cmp r0, r2 | 907 | cmp r0, r2 |
689 | bcc 1b | 908 | bcc 1b |