diff options
Diffstat (limited to 'apps/plugins')
-rw-r--r-- | apps/plugins/lib/gray_core.c | 536 | ||||
-rw-r--r-- | apps/plugins/lib/gray_draw.c | 496 | ||||
-rw-r--r-- | apps/plugins/lib/gray_scroll.c | 83 |
3 files changed, 692 insertions, 423 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c index c253a7112e..c162349f76 100644 --- a/apps/plugins/lib/gray_core.c +++ b/apps/plugins/lib/gray_core.c | |||
@@ -648,14 +648,165 @@ void gray_update_rect(int x, int y, int width, int height) | |||
648 | cbuf = _gray_info.cur_buffer + srcofs_row; | 648 | cbuf = _gray_info.cur_buffer + srcofs_row; |
649 | bbuf = _gray_info.back_buffer + srcofs_row; | 649 | bbuf = _gray_info.back_buffer + srcofs_row; |
650 | 650 | ||
651 | #if 0 /* CPU specific asm versions will go here */ | 651 | #ifdef CPU_ARM |
652 | asm volatile ( | ||
653 | "ldr r0, [%[cbuf]] \n" | ||
654 | "ldr r1, [%[bbuf]] \n" | ||
655 | "eor r1, r0, r1 \n" | ||
656 | "ldr r0, [%[cbuf], #4] \n" | ||
657 | "ldr %[chg], [%[bbuf], #4] \n" | ||
658 | "eor %[chg], r0, %[chg] \n" | ||
659 | "orr %[chg], %[chg], r1 \n" | ||
660 | : /* outputs */ | ||
661 | [chg] "=&r"(change) | ||
662 | : /* inputs */ | ||
663 | [cbuf]"r"(cbuf), | ||
664 | [bbuf]"r"(bbuf) | ||
665 | : /* clobbers */ | ||
666 | "r0", "r1" | ||
667 | ); | ||
668 | |||
669 | if (change != 0) | ||
670 | { | ||
671 | unsigned char *addr, *end; | ||
672 | unsigned mask, trash; | ||
673 | |||
674 | pat_ptr = &pat_stack[8]; | ||
675 | |||
676 | /* precalculate the bit patterns with random shifts | ||
677 | * for all 8 pixels and put them on an extra "stack" */ | ||
678 | asm volatile ( | ||
679 | "mov r3, #8 \n" /* loop count */ | ||
680 | "mov %[mask], #0 \n" | ||
681 | |||
682 | ".ur_pre_loop: \n" | ||
683 | "mov %[mask], %[mask], lsl #1 \n" /* shift mask */ | ||
684 | "ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */ | ||
685 | "ldrb r1, [%[bbuf]] \n" /* read back buffer */ | ||
686 | "strb r0, [%[bbuf]], #1 \n" /* update back buffer */ | ||
687 | "mov r2, #0 \n" /* preset for skipped pixel */ | ||
688 | "cmp r0, r1 \n" /* no change? */ | ||
689 | "beq .ur_skip \n" /* -> skip */ | ||
690 | |||
691 | "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */ | ||
692 | |||
693 | "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */ | ||
694 | "add %[rnd], %[rnd], %[rnd], lsl #1 \n" | ||
695 | "add %[rnd], %[rnd], r0, lsl #3 \n" | ||
696 | "add %[rnd], %[rnd], #74 \n" /* add another 74 */ | ||
697 | /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */ | ||
698 | "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */ | ||
699 | |||
700 | "cmp r1, %[dpth] \n" /* random >= depth ? */ | ||
701 | "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */ | ||
702 | |||
703 | "mov r0, r2, lsl r1 \n" /** rotate pattern **/ | ||
704 | "sub r1, %[dpth], r1 \n" | ||
705 | "orr r2, r0, r2, lsr r1 \n" | ||
706 | |||
707 | "orr %[mask], %[mask], #1 \n" /* set mask bit */ | ||
708 | |||
709 | ".ur_skip: \n" | ||
710 | "str r2, [%[patp], #-4]! \n" /* push on pattern stack */ | ||
711 | |||
712 | "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */ | ||
713 | "bne .ur_pre_loop \n" | ||
714 | : /* outputs */ | ||
715 | [cbuf]"+r"(cbuf), | ||
716 | [bbuf]"+r"(bbuf), | ||
717 | [patp]"+r"(pat_ptr), | ||
718 | [rnd] "+r"(_gray_random_buffer), | ||
719 | [mask]"=&r"(mask) | ||
720 | : /* inputs */ | ||
721 | [bpat]"r"(_gray_info.bitpattern), | ||
722 | [dpth]"r"(_gray_info.depth), | ||
723 | [rmsk]"r"(_gray_info.randmask) | ||
724 | : /* clobbers */ | ||
725 | "r0", "r1", "r2", "r3" | ||
726 | ); | ||
727 | |||
728 | addr = dst_row; | ||
729 | end = addr + MULU16(_gray_info.depth, _gray_info.plane_size); | ||
730 | |||
731 | /* set the bits for all 8 pixels in all bytes according to the | ||
732 | * precalculated patterns on the pattern stack */ | ||
733 | asm volatile ( | ||
734 | "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */ | ||
735 | |||
736 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | ||
737 | "ands %[mask], %[mask], #0xff \n" | ||
738 | "beq .ur_sloop \n" /* short loop if nothing to keep */ | ||
739 | |||
740 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
741 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
742 | "adc r0, r0, r0 \n" /* put bit into LSB for byte */ | ||
743 | "movs r8, r8, lsr #1 \n" | ||
744 | "adc r0, r0, r0 \n" | ||
745 | "movs r7, r7, lsr #1 \n" | ||
746 | "adc r0, r0, r0 \n" | ||
747 | "movs r6, r6, lsr #1 \n" | ||
748 | "adc r0, r0, r0 \n" | ||
749 | "movs r5, r5, lsr #1 \n" | ||
750 | "adc r0, r0, r0 \n" | ||
751 | "movs r4, r4, lsr #1 \n" | ||
752 | "adc r0, r0, r0 \n" | ||
753 | "movs r3, r3, lsr #1 \n" | ||
754 | "adc r0, r0, r0 \n" | ||
755 | "movs r2, r2, lsr #1 \n" | ||
756 | "adc r0, r0, r0 \n" | ||
757 | |||
758 | "ldrb r1, [%[addr]] \n" /* read old value */ | ||
759 | "and r1, r1, %[mask] \n" /* mask out replaced bits */ | ||
760 | "orr r1, r1, r0 \n" /* set new bits */ | ||
761 | "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */ | ||
762 | |||
763 | "cmp %[end], %[addr] \n" /* loop for all bitplanes */ | ||
764 | "bne .ur_floop \n" | ||
765 | |||
766 | "b .ur_end \n" | ||
767 | |||
768 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
769 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
770 | "adc r0, r0, r0 \n" /* put bit into LSB for byte */ | ||
771 | "movs r8, r8, lsr #1 \n" | ||
772 | "adc r0, r0, r0 \n" | ||
773 | "movs r7, r7, lsr #1 \n" | ||
774 | "adc r0, r0, r0 \n" | ||
775 | "movs r6, r6, lsr #1 \n" | ||
776 | "adc r0, r0, r0 \n" | ||
777 | "movs r5, r5, lsr #1 \n" | ||
778 | "adc r0, r0, r0 \n" | ||
779 | "movs r4, r4, lsr #1 \n" | ||
780 | "adc r0, r0, r0 \n" | ||
781 | "movs r3, r3, lsr #1 \n" | ||
782 | "adc r0, r0, r0 \n" | ||
783 | "movs r2, r2, lsr #1 \n" | ||
784 | "adc r0, r0, r0 \n" | ||
785 | |||
786 | "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */ | ||
787 | |||
788 | "cmp %[end], %[addr] \n" /* loop for all bitplanes */ | ||
789 | "bne .ur_sloop \n" | ||
790 | |||
791 | ".ur_end: \n" | ||
792 | : /* outputs */ | ||
793 | [addr]"+r"(addr), | ||
794 | [mask]"+r"(mask), | ||
795 | [rx] "=&r"(trash) | ||
796 | : /* inputs */ | ||
797 | [psiz]"r"(_gray_info.plane_size), | ||
798 | [end] "r"(end), | ||
799 | [patp]"[rx]"(pat_ptr) | ||
800 | : /* clobbers */ | ||
801 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" | ||
802 | ); | ||
803 | } | ||
652 | #else /* C version, for reference*/ | 804 | #else /* C version, for reference*/ |
805 | #warning C version of gray_update_rect() used | ||
653 | (void)pat_ptr; | 806 | (void)pat_ptr; |
654 | /* check whether anything changed in the 8-pixel block */ | 807 | /* check whether anything changed in the 8-pixel block */ |
655 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | 808 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; |
656 | cbuf += sizeof(uint32_t); | 809 | change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4); |
657 | bbuf += sizeof(uint32_t); | ||
658 | change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | ||
659 | 810 | ||
660 | if (change != 0) | 811 | if (change != 0) |
661 | { | 812 | { |
@@ -664,9 +815,6 @@ void gray_update_rect(int x, int y, int width, int height) | |||
664 | unsigned test = 1; | 815 | unsigned test = 1; |
665 | int i; | 816 | int i; |
666 | 817 | ||
667 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
668 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
669 | |||
670 | /* precalculate the bit patterns with random shifts | 818 | /* precalculate the bit patterns with random shifts |
671 | * for all 8 pixels and put them on an extra "stack" */ | 819 | * for all 8 pixels and put them on an extra "stack" */ |
672 | for (i = 7; i >= 0; i--) | 820 | for (i = 7; i >= 0; i--) |
@@ -711,7 +859,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
711 | 859 | ||
712 | for (i = 7; i >= 0; i--) | 860 | for (i = 7; i >= 0; i--) |
713 | data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); | 861 | data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); |
714 | 862 | ||
715 | *addr = data; | 863 | *addr = data; |
716 | addr += _gray_info.plane_size; | 864 | addr += _gray_info.plane_size; |
717 | test <<= 1; | 865 | test <<= 1; |
@@ -788,18 +936,18 @@ void gray_update_rect(int x, int y, int width, int height) | |||
788 | 936 | ||
789 | #if CONFIG_CPU == SH7034 | 937 | #if CONFIG_CPU == SH7034 |
790 | asm volatile ( | 938 | asm volatile ( |
791 | "mov.l @%[cbuf]+,r1 \n" | 939 | "mov.l @%[cbuf],r1 \n" |
792 | "mov.l @%[bbuf]+,r2 \n" | 940 | "mov.l @%[bbuf],r2 \n" |
793 | "xor r1,r2 \n" | 941 | "xor r1,r2 \n" |
794 | "mov.l @%[cbuf],r1 \n" | 942 | "mov.l @(4,%[cbuf]),r1 \n" |
795 | "mov.l @%[bbuf],%[chg] \n" | 943 | "mov.l @(4,%[bbuf]),%[chg] \n" |
796 | "xor r1,%[chg] \n" | 944 | "xor r1,%[chg] \n" |
797 | "or r2,%[chg] \n" | 945 | "or r2,%[chg] \n" |
798 | : /* outputs */ | 946 | : /* outputs */ |
799 | [cbuf]"+r"(cbuf), | ||
800 | [bbuf]"+r"(bbuf), | ||
801 | [chg] "=r"(change) | 947 | [chg] "=r"(change) |
802 | : /* inputs */ | 948 | : /* inputs */ |
949 | [cbuf]"r"(cbuf), | ||
950 | [bbuf]"r"(bbuf) | ||
803 | : /* clobbers */ | 951 | : /* clobbers */ |
804 | "r1", "r2" | 952 | "r1", "r2" |
805 | ); | 953 | ); |
@@ -810,13 +958,11 @@ void gray_update_rect(int x, int y, int width, int height) | |||
810 | unsigned mask, trash; | 958 | unsigned mask, trash; |
811 | 959 | ||
812 | pat_ptr = &pat_stack[8]; | 960 | pat_ptr = &pat_stack[8]; |
813 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
814 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
815 | 961 | ||
816 | /* precalculate the bit patterns with random shifts | 962 | /* precalculate the bit patterns with random shifts |
817 | * for all 8 pixels and put them on an extra "stack" */ | 963 | * for all 8 pixels and put them on an extra "stack" */ |
818 | asm volatile ( | 964 | asm volatile ( |
819 | "mov #8,r3 \n" /* loop count in r3: 8 pixels */ | 965 | "mov #8,r3 \n" /* loop count */ |
820 | 966 | ||
821 | ".ur_pre_loop: \n" | 967 | ".ur_pre_loop: \n" |
822 | "mov.b @%[cbuf]+,r0\n" /* read current buffer */ | 968 | "mov.b @%[cbuf]+,r0\n" /* read current buffer */ |
@@ -860,10 +1006,11 @@ void gray_update_rect(int x, int y, int width, int height) | |||
860 | "rotcr %[mask] \n" /* get mask bit */ | 1006 | "rotcr %[mask] \n" /* get mask bit */ |
861 | "mov.l r2,@-%[patp]\n" /* push on pattern stack */ | 1007 | "mov.l r2,@-%[patp]\n" /* push on pattern stack */ |
862 | 1008 | ||
863 | "add #-1,r3 \n" /* decrease loop count */ | 1009 | "add #-1,r3 \n" /* loop 8 times (pixel block) */ |
864 | "cmp/pl r3 \n" /* loop count > 0? */ | 1010 | "cmp/pl r3 \n" |
865 | "bt .ur_pre_loop\n" /* yes: loop */ | 1011 | "bt .ur_pre_loop\n" |
866 | "shlr8 %[mask] \n" | 1012 | |
1013 | "shlr8 %[mask] \n" /* shift mask to low byte */ | ||
867 | "shlr16 %[mask] \n" | 1014 | "shlr16 %[mask] \n" |
868 | : /* outputs */ | 1015 | : /* outputs */ |
869 | [cbuf]"+r"(cbuf), | 1016 | [cbuf]"+r"(cbuf), |
@@ -885,77 +1032,77 @@ void gray_update_rect(int x, int y, int width, int height) | |||
885 | /* set the bits for all 8 pixels in all bytes according to the | 1032 | /* set the bits for all 8 pixels in all bytes according to the |
886 | * precalculated patterns on the pattern stack */ | 1033 | * precalculated patterns on the pattern stack */ |
887 | asm volatile ( | 1034 | asm volatile ( |
888 | "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ | 1035 | "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */ |
889 | "mov.l @%[patp]+,r2\n" | 1036 | "mov.l @%[patp]+,r2 \n" |
890 | "mov.l @%[patp]+,r3\n" | 1037 | "mov.l @%[patp]+,r3 \n" |
891 | "mov.l @%[patp]+,r6\n" | 1038 | "mov.l @%[patp]+,r6 \n" |
892 | "mov.l @%[patp]+,r7\n" | 1039 | "mov.l @%[patp]+,r7 \n" |
893 | "mov.l @%[patp]+,r8\n" | 1040 | "mov.l @%[patp]+,r8 \n" |
894 | "mov.l @%[patp]+,r9\n" | 1041 | "mov.l @%[patp]+,r9 \n" |
895 | "mov.l @%[patp],r10\n" | 1042 | "mov.l @%[patp],r10 \n" |
896 | 1043 | ||
897 | "tst %[mask],%[mask] \n" /* nothing to keep? */ | 1044 | "tst %[mask],%[mask] \n" |
898 | "bt .ur_sloop \n" /* yes: jump to short loop */ | 1045 | "bt .ur_sloop \n" /* short loop if nothing to keep */ |
899 | 1046 | ||
900 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1047 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ |
901 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1048 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
902 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1049 | "rotcl r0 \n" /* rotate t bit into r0 */ |
903 | "shlr r2 \n" | 1050 | "shlr r2 \n" |
904 | "rotcl r0 \n" | 1051 | "rotcl r0 \n" |
905 | "shlr r3 \n" | 1052 | "shlr r3 \n" |
906 | "rotcl r0 \n" | 1053 | "rotcl r0 \n" |
907 | "shlr r6 \n" | 1054 | "shlr r6 \n" |
908 | "rotcl r0 \n" | 1055 | "rotcl r0 \n" |
909 | "shlr r7 \n" | 1056 | "shlr r7 \n" |
910 | "rotcl r0 \n" | 1057 | "rotcl r0 \n" |
911 | "shlr r8 \n" | 1058 | "shlr r8 \n" |
912 | "rotcl r0 \n" | 1059 | "rotcl r0 \n" |
913 | "shlr r9 \n" | 1060 | "shlr r9 \n" |
914 | "rotcl r0 \n" | 1061 | "rotcl r0 \n" |
915 | "shlr r10 \n" | 1062 | "shlr r10 \n" |
916 | "mov.b @%[addr],%[rx] \n" /* read old value */ | 1063 | "mov.b @%[addr],%[rx] \n" /* read old value */ |
917 | "rotcl r0 \n" | 1064 | "rotcl r0 \n" |
918 | "and %[mask],%[rx] \n" /* mask out unneeded bits */ | 1065 | "and %[mask],%[rx] \n" /* mask out replaced bits */ |
919 | "or %[rx],r0 \n" /* set new bits */ | 1066 | "or %[rx],r0 \n" /* set new bits */ |
920 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ | 1067 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ |
921 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1068 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
922 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1069 | "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */ |
923 | "bt .ur_floop \n" /* no: loop */ | 1070 | "bt .ur_floop \n" |
924 | 1071 | ||
925 | "bra .ur_end \n" | 1072 | "bra .ur_end \n" |
926 | "nop \n" | 1073 | "nop \n" |
927 | 1074 | ||
928 | /* References to C library routines used in the precalc block */ | 1075 | /* References to C library routines used in the precalc block */ |
929 | ".align 2 \n" | 1076 | ".align 2 \n" |
930 | ".ashlsi3: \n" /* C library routine: */ | 1077 | ".ashlsi3: \n" /* C library routine: */ |
931 | ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ | 1078 | ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ |
932 | ".lshrsi3: \n" /* C library routine: */ | 1079 | ".lshrsi3: \n" /* C library routine: */ |
933 | ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ | 1080 | ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ |
934 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ | 1081 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ |
935 | 1082 | ||
936 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1083 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ |
937 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1084 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
938 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1085 | "rotcl r0 \n" /* rotate t bit into r0 */ |
939 | "shlr r2 \n" | 1086 | "shlr r2 \n" |
940 | "rotcl r0 \n" | 1087 | "rotcl r0 \n" |
941 | "shlr r3 \n" | 1088 | "shlr r3 \n" |
942 | "rotcl r0 \n" | 1089 | "rotcl r0 \n" |
943 | "shlr r6 \n" | 1090 | "shlr r6 \n" |
944 | "rotcl r0 \n" | 1091 | "rotcl r0 \n" |
945 | "shlr r7 \n" | 1092 | "shlr r7 \n" |
946 | "rotcl r0 \n" | 1093 | "rotcl r0 \n" |
947 | "shlr r8 \n" | 1094 | "shlr r8 \n" |
948 | "rotcl r0 \n" | 1095 | "rotcl r0 \n" |
949 | "shlr r9 \n" | 1096 | "shlr r9 \n" |
950 | "rotcl r0 \n" | 1097 | "rotcl r0 \n" |
951 | "shlr r10 \n" | 1098 | "shlr r10 \n" |
952 | "rotcl r0 \n" | 1099 | "rotcl r0 \n" |
953 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ | 1100 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ |
954 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1101 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
955 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1102 | "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */ |
956 | "bt .ur_sloop \n" /* no: loop */ | 1103 | "bt .ur_sloop \n" |
957 | 1104 | ||
958 | ".ur_end: \n" | 1105 | ".ur_end: \n" |
959 | : /* outputs */ | 1106 | : /* outputs */ |
960 | [addr]"+r"(addr), | 1107 | [addr]"+r"(addr), |
961 | [mask]"+r"(mask), | 1108 | [mask]"+r"(mask), |
@@ -970,18 +1117,18 @@ void gray_update_rect(int x, int y, int width, int height) | |||
970 | } | 1117 | } |
971 | #elif defined(CPU_COLDFIRE) | 1118 | #elif defined(CPU_COLDFIRE) |
972 | asm volatile ( | 1119 | asm volatile ( |
973 | "move.l (%[cbuf])+,%%d0 \n" | 1120 | "move.l (%[cbuf]),%%d0 \n" |
974 | "move.l (%[bbuf])+,%%d1 \n" | 1121 | "move.l (%[bbuf]),%%d1 \n" |
975 | "eor.l %%d0,%%d1 \n" | 1122 | "eor.l %%d0,%%d1 \n" |
976 | "move.l (%[cbuf]),%%d0 \n" | 1123 | "move.l (4,%[cbuf]),%%d0 \n" |
977 | "move.l (%[bbuf]),%[chg]\n" | 1124 | "move.l (4,%[bbuf]),%[chg] \n" |
978 | "eor.l %%d0,%[chg] \n" | 1125 | "eor.l %%d0,%[chg] \n" |
979 | "or.l %%d1,%[chg] \n" | 1126 | "or.l %%d1,%[chg] \n" |
980 | : /* outputs */ | 1127 | : /* outputs */ |
981 | [cbuf]"+a"(cbuf), | ||
982 | [bbuf]"+a"(bbuf), | ||
983 | [chg] "=&d"(change) | 1128 | [chg] "=&d"(change) |
984 | : /* inputs */ | 1129 | : /* inputs */ |
1130 | [cbuf]"a"(cbuf), | ||
1131 | [bbuf]"a"(bbuf) | ||
985 | : /* clobbers */ | 1132 | : /* clobbers */ |
986 | "d0", "d1" | 1133 | "d0", "d1" |
987 | ); | 1134 | ); |
@@ -992,54 +1139,52 @@ void gray_update_rect(int x, int y, int width, int height) | |||
992 | unsigned mask, trash; | 1139 | unsigned mask, trash; |
993 | 1140 | ||
994 | pat_ptr = &pat_stack[8]; | 1141 | pat_ptr = &pat_stack[8]; |
995 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
996 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
997 | 1142 | ||
998 | /* precalculate the bit patterns with random shifts | 1143 | /* precalculate the bit patterns with random shifts |
999 | * for all 8 pixels and put them on an extra "stack" */ | 1144 | * for all 8 pixels and put them on an extra "stack" */ |
1000 | asm volatile ( | 1145 | asm volatile ( |
1001 | "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ | 1146 | "moveq.l #8,%%d3 \n" /* loop count */ |
1002 | "clr.l %[mask] \n" | 1147 | "clr.l %[mask] \n" |
1003 | 1148 | ||
1004 | ".ur_pre_loop: \n" | 1149 | ".ur_pre_loop: \n" |
1005 | "clr.l %%d0 \n" | 1150 | "clr.l %%d0 \n" |
1006 | "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ | 1151 | "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ |
1007 | "clr.l %%d1 \n" | 1152 | "clr.l %%d1 \n" |
1008 | "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ | 1153 | "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ |
1009 | "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ | 1154 | "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ |
1010 | "clr.l %%d2 \n" /* preset for skipped pixel */ | 1155 | "clr.l %%d2 \n" /* preset for skipped pixel */ |
1011 | "cmp.l %%d0,%%d1 \n" /* no change? */ | 1156 | "cmp.l %%d0,%%d1 \n" /* no change? */ |
1012 | "beq.b .ur_skip \n" /* -> skip */ | 1157 | "beq.b .ur_skip \n" /* -> skip */ |
1013 | 1158 | ||
1014 | "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ | 1159 | "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ |
1015 | 1160 | ||
1016 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ | 1161 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ |
1017 | "add.l #74,%[rnd] \n" /* add another 74 */ | 1162 | "add.l #74,%[rnd] \n" /* add another 74 */ |
1018 | /* Since the lower bits are not very random: */ | 1163 | /* Since the lower bits are not very random: */ |
1019 | "move.l %[rnd],%%d1 \n" | 1164 | "move.l %[rnd],%%d1 \n" |
1020 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ | 1165 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ |
1021 | "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ | 1166 | "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */ |
1022 | 1167 | ||
1023 | "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ | 1168 | "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */ |
1024 | "blo.b .ur_ntrim \n" | 1169 | "blo.b .ur_ntrim \n" |
1025 | "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ | 1170 | "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */ |
1026 | ".ur_ntrim: \n" | 1171 | ".ur_ntrim: \n" |
1027 | 1172 | ||
1028 | "move.l %%d2,%%d0 \n" | 1173 | "move.l %%d2,%%d0 \n" /** rotate pattern **/ |
1029 | "lsl.l %%d1,%%d0 \n" | 1174 | "lsl.l %%d1,%%d0 \n" |
1030 | "sub.l %[dpth],%%d1\n" | 1175 | "sub.l %[dpth],%%d1 \n" |
1031 | "neg.l %%d1 \n" /* d1 = depth - d1 */ | 1176 | "neg.l %%d1 \n" /* d1 = depth - d1 */ |
1032 | "lsr.l %%d1,%%d2 \n" | 1177 | "lsr.l %%d1,%%d2 \n" |
1033 | "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ | 1178 | "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ |
1034 | 1179 | ||
1035 | "or.l #0x0100,%[mask] \n" /* set mask bit */ | 1180 | "or.l #0x0100,%[mask] \n" /* set mask bit */ |
1036 | 1181 | ||
1037 | ".ur_skip: \n" | 1182 | ".ur_skip: \n" |
1038 | "lsr.l #1,%[mask] \n" /* shift mask */ | 1183 | "lsr.l #1,%[mask] \n" /* shift mask */ |
1039 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ | 1184 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ |
1040 | 1185 | ||
1041 | "subq.l #1,%%d3 \n" /* decrease loop count */ | 1186 | "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */ |
1042 | "bne.b .ur_pre_loop\n" /* yes: loop */ | 1187 | "bne.b .ur_pre_loop \n" |
1043 | : /* outputs */ | 1188 | : /* outputs */ |
1044 | [cbuf]"+a"(cbuf), | 1189 | [cbuf]"+a"(cbuf), |
1045 | [bbuf]"+a"(bbuf), | 1190 | [bbuf]"+a"(bbuf), |
@@ -1061,79 +1206,79 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1061 | * precalculated patterns on the pattern stack */ | 1206 | * precalculated patterns on the pattern stack */ |
1062 | asm volatile ( | 1207 | asm volatile ( |
1063 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" | 1208 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" |
1064 | /* pop all 8 patterns */ | 1209 | /* pop all 8 patterns */ |
1065 | "not.l %[mask] \n" /* set mask -> keep mask */ | 1210 | "not.l %[mask] \n" /* "set" mask -> "keep" mask */ |
1066 | "and.l #0xFF,%[mask] \n" | 1211 | "and.l #0xFF,%[mask] \n" |
1067 | "beq.b .ur_sstart \n" /* yes: jump to short loop */ | 1212 | "beq.b .ur_sstart \n" /* short loop if nothing to keep */ |
1068 | 1213 | ||
1069 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1214 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ |
1070 | "clr.l %%d0 \n" | 1215 | "clr.l %%d0 \n" |
1071 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1216 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1072 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1217 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1073 | "lsr.l #1,%%d3 \n" | 1218 | "lsr.l #1,%%d3 \n" |
1074 | "addx.l %%d0,%%d0 \n" | 1219 | "addx.l %%d0,%%d0 \n" |
1075 | "lsr.l #1,%%d4 \n" | 1220 | "lsr.l #1,%%d4 \n" |
1076 | "addx.l %%d0,%%d0 \n" | 1221 | "addx.l %%d0,%%d0 \n" |
1077 | "lsr.l #1,%%d5 \n" | 1222 | "lsr.l #1,%%d5 \n" |
1078 | "addx.l %%d0,%%d0 \n" | 1223 | "addx.l %%d0,%%d0 \n" |
1079 | "lsr.l #1,%%d6 \n" | 1224 | "lsr.l #1,%%d6 \n" |
1080 | "addx.l %%d0,%%d0 \n" | 1225 | "addx.l %%d0,%%d0 \n" |
1081 | "move.l %%a0,%%d1 \n" | 1226 | "move.l %%a0,%%d1 \n" |
1082 | "lsr.l #1,%%d1 \n" | 1227 | "lsr.l #1,%%d1 \n" |
1083 | "addx.l %%d0,%%d0 \n" | 1228 | "addx.l %%d0,%%d0 \n" |
1084 | "move.l %%d1,%%a0 \n" | 1229 | "move.l %%d1,%%a0 \n" |
1085 | "move.l %%a1,%%d1 \n" | 1230 | "move.l %%a1,%%d1 \n" |
1086 | "lsr.l #1,%%d1 \n" | 1231 | "lsr.l #1,%%d1 \n" |
1087 | "addx.l %%d0,%%d0 \n" | 1232 | "addx.l %%d0,%%d0 \n" |
1088 | "move.l %%d1,%%a1 \n" | 1233 | "move.l %%d1,%%a1 \n" |
1089 | "move.l %[ax],%%d1 \n" | 1234 | "move.l %[ax],%%d1 \n" |
1090 | "lsr.l #1,%%d1 \n" | 1235 | "lsr.l #1,%%d1 \n" |
1091 | "addx.l %%d0,%%d0 \n" | 1236 | "addx.l %%d0,%%d0 \n" |
1092 | "move.l %%d1,%[ax] \n" | 1237 | "move.l %%d1,%[ax] \n" |
1093 | 1238 | ||
1094 | "move.b (%[addr]),%%d1 \n" /* read old value */ | 1239 | "move.b (%[addr]),%%d1 \n" /* read old value */ |
1095 | "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ | 1240 | "and.l %[mask],%%d1 \n" /* mask out replaced bits */ |
1096 | "or.l %%d0,%%d1 \n" /* set new bits */ | 1241 | "or.l %%d0,%%d1 \n" /* set new bits */ |
1097 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ | 1242 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ |
1098 | 1243 | ||
1099 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1244 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1100 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1245 | "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */ |
1101 | "bhi.b .ur_floop \n" /* no: loop */ | 1246 | "bhi.b .ur_floop \n" |
1102 | 1247 | ||
1103 | "bra.b .ur_end \n" | 1248 | "bra.b .ur_end \n" |
1104 | 1249 | ||
1105 | ".ur_sstart: \n" | 1250 | ".ur_sstart: \n" |
1106 | "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ | 1251 | "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */ |
1107 | 1252 | ||
1108 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1253 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ |
1109 | "clr.l %%d0 \n" | 1254 | "clr.l %%d0 \n" |
1110 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1255 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1111 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1256 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1112 | "lsr.l #1,%%d3 \n" | 1257 | "lsr.l #1,%%d3 \n" |
1113 | "addx.l %%d0,%%d0 \n" | 1258 | "addx.l %%d0,%%d0 \n" |
1114 | "lsr.l #1,%%d4 \n" | 1259 | "lsr.l #1,%%d4 \n" |
1115 | "addx.l %%d0,%%d0 \n" | 1260 | "addx.l %%d0,%%d0 \n" |
1116 | "lsr.l #1,%%d5 \n" | 1261 | "lsr.l #1,%%d5 \n" |
1117 | "addx.l %%d0,%%d0 \n" | 1262 | "addx.l %%d0,%%d0 \n" |
1118 | "lsr.l #1,%%d6 \n" | 1263 | "lsr.l #1,%%d6 \n" |
1119 | "addx.l %%d0,%%d0 \n" | 1264 | "addx.l %%d0,%%d0 \n" |
1120 | "lsr.l #1,%[mask] \n" | 1265 | "lsr.l #1,%[mask] \n" |
1121 | "addx.l %%d0,%%d0 \n" | 1266 | "addx.l %%d0,%%d0 \n" |
1122 | "move.l %%a1,%%d1 \n" | 1267 | "move.l %%a1,%%d1 \n" |
1123 | "lsr.l #1,%%d1 \n" | 1268 | "lsr.l #1,%%d1 \n" |
1124 | "addx.l %%d0,%%d0 \n" | 1269 | "addx.l %%d0,%%d0 \n" |
1125 | "move.l %%d1,%%a1 \n" | 1270 | "move.l %%d1,%%a1 \n" |
1126 | "move.l %[ax],%%d1 \n" | 1271 | "move.l %[ax],%%d1 \n" |
1127 | "lsr.l #1,%%d1 \n" | 1272 | "lsr.l #1,%%d1 \n" |
1128 | "addx.l %%d0,%%d0 \n" | 1273 | "addx.l %%d0,%%d0 \n" |
1129 | "move.l %%d1,%[ax] \n" | 1274 | "move.l %%d1,%[ax] \n" |
1130 | 1275 | ||
1131 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ | 1276 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ |
1132 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1277 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1133 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1278 | "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */ |
1134 | "bhi.b .ur_sloop \n" /* no: loop */ | 1279 | "bhi.b .ur_sloop \n" |
1135 | 1280 | ||
1136 | ".ur_end: \n" | 1281 | ".ur_end: \n" |
1137 | : /* outputs */ | 1282 | : /* outputs */ |
1138 | [addr]"+a"(addr), | 1283 | [addr]"+a"(addr), |
1139 | [mask]"+d"(mask), | 1284 | [mask]"+d"(mask), |
@@ -1151,9 +1296,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1151 | (void)pat_ptr; | 1296 | (void)pat_ptr; |
1152 | /* check whether anything changed in the 8-pixel block */ | 1297 | /* check whether anything changed in the 8-pixel block */ |
1153 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | 1298 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; |
1154 | cbuf += sizeof(uint32_t); | 1299 | change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4); |
1155 | bbuf += sizeof(uint32_t); | ||
1156 | change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | ||
1157 | 1300 | ||
1158 | if (change != 0) | 1301 | if (change != 0) |
1159 | { | 1302 | { |
@@ -1162,9 +1305,6 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1162 | unsigned test = 1; | 1305 | unsigned test = 1; |
1163 | int i; | 1306 | int i; |
1164 | 1307 | ||
1165 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
1166 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
1167 | |||
1168 | /* precalculate the bit patterns with random shifts | 1308 | /* precalculate the bit patterns with random shifts |
1169 | * for all 8 pixels and put them on an extra "stack" */ | 1309 | * for all 8 pixels and put them on an extra "stack" */ |
1170 | for (i = 0; i < 8; i++) | 1310 | for (i = 0; i < 8; i++) |
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c index 396046d1e6..7df3e13c56 100644 --- a/apps/plugins/lib/gray_draw.c +++ b/apps/plugins/lib/gray_draw.c | |||
@@ -876,8 +876,140 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
876 | unsigned long pat_stack[8]; | 876 | unsigned long pat_stack[8]; |
877 | unsigned long *pat_ptr = &pat_stack[8]; | 877 | unsigned long *pat_ptr = &pat_stack[8]; |
878 | unsigned char *addr, *end; | 878 | unsigned char *addr, *end; |
879 | #if 0 /* CPU specific asm versions will go here */ | 879 | #ifdef CPU_ARM |
880 | const unsigned char *_src; | ||
881 | unsigned _mask, trash; | ||
882 | |||
883 | _mask = mask; | ||
884 | _src = src; | ||
885 | |||
886 | /* precalculate the bit patterns with random shifts | ||
887 | for all 8 pixels and put them on an extra "stack" */ | ||
888 | asm volatile ( | ||
889 | "mov %[mask], %[mask], lsl #24 \n" /* shift mask to upper byte */ | ||
890 | "mov r3, #8 \n" /* loop count */ | ||
891 | |||
892 | ".wa_loop: \n" /** load pattern for pixel **/ | ||
893 | "mov r2, #0 \n" /* pattern for skipped pixel must be 0 */ | ||
894 | "movs %[mask], %[mask], lsl #1 \n" /* shift out msb of mask */ | ||
895 | "bcc .wa_skip \n" /* skip this pixel */ | ||
896 | |||
897 | "ldrb r0, [%[src]] \n" /* load src byte */ | ||
898 | "ldrb r0, [%[trns], r0] \n" /* idxtable into pattern index */ | ||
899 | "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */ | ||
900 | |||
901 | "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */ | ||
902 | "add %[rnd], %[rnd], %[rnd], lsl #1 \n" | ||
903 | "add %[rnd], %[rnd], r0, lsl #3 \n" | ||
904 | "add %[rnd], %[rnd], #74 \n" /* add another 74 */ | ||
905 | /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */ | ||
906 | "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */ | ||
907 | |||
908 | "cmp r1, %[dpth] \n" /* random >= depth ? */ | ||
909 | "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */ | ||
910 | |||
911 | "mov r0, r2, lsl r1 \n" /** rotate pattern **/ | ||
912 | "sub r1, %[dpth], r1 \n" | ||
913 | "orr r2, r0, r2, lsr r1 \n" | ||
914 | |||
915 | ".wa_skip: \n" | ||
916 | "str r2, [%[patp], #-4]! \n" /* push on pattern stack */ | ||
917 | |||
918 | "add %[src], %[src], #1 \n" /* src++; */ | ||
919 | "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */ | ||
920 | "bne .wa_loop \n" | ||
921 | : /* outputs */ | ||
922 | [src] "+r"(_src), | ||
923 | [patp]"+r"(pat_ptr), | ||
924 | [rnd] "+r"(_gray_random_buffer), | ||
925 | [mask]"+r"(_mask) | ||
926 | : /* inputs */ | ||
927 | [bpat]"r"(_gray_info.bitpattern), | ||
928 | [trns]"r"(_gray_info.idxtable), | ||
929 | [dpth]"r"(_gray_info.depth), | ||
930 | [rmsk]"r"(_gray_info.randmask) | ||
931 | : /* clobbers */ | ||
932 | "r0", "r1", "r2", "r3" | ||
933 | ); | ||
934 | |||
935 | addr = address; | ||
936 | end = addr + MULU16(_gray_info.depth, _gray_info.plane_size); | ||
937 | _mask = mask; | ||
938 | |||
939 | /* set the bits for all 8 pixels in all bytes according to the | ||
940 | * precalculated patterns on the pattern stack */ | ||
941 | asm volatile ( | ||
942 | "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */ | ||
943 | |||
944 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | ||
945 | "ands %[mask], %[mask], #0xff \n" | ||
946 | "beq .wa_sloop \n" /* short loop if nothing to keep */ | ||
947 | |||
948 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
949 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
950 | "adc r0, r0, r0 \n" /* put bit into LSB of byte */ | ||
951 | "movs r8, r8, lsr #1 \n" | ||
952 | "adc r0, r0, r0 \n" | ||
953 | "movs r7, r7, lsr #1 \n" | ||
954 | "adc r0, r0, r0 \n" | ||
955 | "movs r6, r6, lsr #1 \n" | ||
956 | "adc r0, r0, r0 \n" | ||
957 | "movs r5, r5, lsr #1 \n" | ||
958 | "adc r0, r0, r0 \n" | ||
959 | "movs r4, r4, lsr #1 \n" | ||
960 | "adc r0, r0, r0 \n" | ||
961 | "movs r3, r3, lsr #1 \n" | ||
962 | "adc r0, r0, r0 \n" | ||
963 | "movs r2, r2, lsr #1 \n" | ||
964 | "adc r0, r0, r0 \n" | ||
965 | |||
966 | "ldrb r1, [%[addr]] \n" /* read old value */ | ||
967 | "and r1, r1, %[mask] \n" /* mask out replaced bits */ | ||
968 | "orr r1, r1, r0 \n" /* set new bits */ | ||
969 | "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */ | ||
970 | |||
971 | "cmp %[end], %[addr] \n" /* loop through all bitplanes */ | ||
972 | "bne .wa_floop \n" | ||
973 | |||
974 | "b .wa_end \n" | ||
975 | |||
976 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
977 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
978 | "adc r0, r0, r0 \n" /* put bit into LSB of byte */ | ||
979 | "movs r8, r8, lsr #1 \n" | ||
980 | "adc r0, r0, r0 \n" | ||
981 | "movs r7, r7, lsr #1 \n" | ||
982 | "adc r0, r0, r0 \n" | ||
983 | "movs r6, r6, lsr #1 \n" | ||
984 | "adc r0, r0, r0 \n" | ||
985 | "movs r5, r5, lsr #1 \n" | ||
986 | "adc r0, r0, r0 \n" | ||
987 | "movs r4, r4, lsr #1 \n" | ||
988 | "adc r0, r0, r0 \n" | ||
989 | "movs r3, r3, lsr #1 \n" | ||
990 | "adc r0, r0, r0 \n" | ||
991 | "movs r2, r2, lsr #1 \n" | ||
992 | "adc r0, r0, r0 \n" | ||
993 | |||
994 | "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */ | ||
995 | |||
996 | "cmp %[end], %[addr] \n" /* loop through all bitplanes */ | ||
997 | "bne .wa_sloop \n" | ||
998 | |||
999 | ".wa_end: \n" | ||
1000 | : /* outputs */ | ||
1001 | [addr]"+r"(addr), | ||
1002 | [mask]"+r"(_mask), | ||
1003 | [rx] "=&r"(trash) | ||
1004 | : /* inputs */ | ||
1005 | [psiz]"r"(_gray_info.plane_size), | ||
1006 | [end] "r"(end), | ||
1007 | [patp]"[rx]"(pat_ptr) | ||
1008 | : /* clobbers */ | ||
1009 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" | ||
1010 | ); | ||
880 | #else /* C version, for reference*/ | 1011 | #else /* C version, for reference*/ |
1012 | #warning C version of _writearray() used | ||
881 | unsigned test = 0x80; | 1013 | unsigned test = 0x80; |
882 | int i; | 1014 | int i; |
883 | 1015 | ||
@@ -1027,52 +1159,52 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1027 | /* precalculate the bit patterns with random shifts | 1159 | /* precalculate the bit patterns with random shifts |
1028 | for all 8 pixels and put them on an extra "stack" */ | 1160 | for all 8 pixels and put them on an extra "stack" */ |
1029 | asm volatile ( | 1161 | asm volatile ( |
1030 | "mov #8,r3 \n" /* loop count in r3: 8 pixels */ | 1162 | "mov #8,r3 \n" /* loop count */ |
1031 | 1163 | ||
1032 | ".wa_loop: \n" /** load pattern for pixel **/ | 1164 | ".wa_loop: \n" /** load pattern for pixel **/ |
1033 | "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */ | 1165 | "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */ |
1034 | "shlr %[mask] \n" /* shift out lsb of mask */ | 1166 | "shlr %[mask] \n" /* shift out lsb of mask */ |
1035 | "bf .wa_skip \n" /* skip this pixel */ | 1167 | "bf .wa_skip \n" /* skip this pixel */ |
1036 | 1168 | ||
1037 | "mov.b @%[src],r0 \n" /* load src byte */ | 1169 | "mov.b @%[src],r0 \n" /* load src byte */ |
1038 | "extu.b r0,r0 \n" /* extend unsigned */ | 1170 | "extu.b r0,r0 \n" /* extend unsigned */ |
1039 | "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */ | 1171 | "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */ |
1040 | "extu.b r0,r0 \n" /* extend unsigned */ | 1172 | "extu.b r0,r0 \n" /* extend unsigned */ |
1041 | "shll2 r0 \n" | 1173 | "shll2 r0 \n" |
1042 | "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */ | 1174 | "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */ |
1043 | 1175 | ||
1044 | "mov #75,r0 \n" | 1176 | "mov #75,r0 \n" |
1045 | "mulu r0,%[rnd] \n" /* multiply by 75 */ | 1177 | "mulu r0,%[rnd] \n" /* multiply by 75 */ |
1046 | "sts macl,%[rnd] \n" | 1178 | "sts macl,%[rnd] \n" |
1047 | "add #74,%[rnd] \n" /* add another 74 */ | 1179 | "add #74,%[rnd] \n" /* add another 74 */ |
1048 | /* Since the lower bits are not very random: */ | 1180 | /* Since the lower bits are not very random: */ |
1049 | "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */ | 1181 | "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */ |
1050 | "and %[rmsk],r1 \n" /* mask out unneeded bits */ | 1182 | "and %[rmsk],r1 \n" /* mask out unneeded bits */ |
1051 | 1183 | ||
1052 | "cmp/hs %[dpth],r1 \n" /* random >= depth ? */ | 1184 | "cmp/hs %[dpth],r1 \n" /* random >= depth ? */ |
1053 | "bf .wa_ntrim \n" | 1185 | "bf .wa_ntrim \n" |
1054 | "sub %[dpth],r1 \n" /* yes: random -= depth; */ | 1186 | "sub %[dpth],r1 \n" /* yes: random -= depth; */ |
1055 | ".wa_ntrim: \n" | 1187 | ".wa_ntrim: \n" |
1056 | 1188 | ||
1057 | "mov.l .ashlsi3,r0 \n" /** rotate pattern **/ | 1189 | "mov.l .ashlsi3,r0 \n" /** rotate pattern **/ |
1058 | "jsr @r0 \n" /* r4 -> r0, shift left by r5 */ | 1190 | "jsr @r0 \n" /* r4 -> r0, shift left by r5 */ |
1059 | "mov r1,r5 \n" | 1191 | "mov r1,r5 \n" |
1060 | 1192 | ||
1061 | "mov %[dpth],r5 \n" | 1193 | "mov %[dpth],r5 \n" |
1062 | "sub r1,r5 \n" /* r5 = depth - r1 */ | 1194 | "sub r1,r5 \n" /* r5 = depth - r1 */ |
1063 | "mov.l .lshrsi3,r1 \n" | 1195 | "mov.l .lshrsi3,r1 \n" |
1064 | "jsr @r1 \n" /* r4 -> r0, shift right by r5 */ | 1196 | "jsr @r1 \n" /* r4 -> r0, shift right by r5 */ |
1065 | "mov r0,r1 \n" /* store previous result in r1 */ | 1197 | "mov r0,r1 \n" /* store previous result in r1 */ |
1066 | 1198 | ||
1067 | "or r1,r0 \n" /* rotated_pattern = r0 | r1 */ | 1199 | "or r1,r0 \n" /* rotated_pattern = r0 | r1 */ |
1068 | 1200 | ||
1069 | ".wa_skip: \n" | 1201 | ".wa_skip: \n" |
1070 | "mov.l r0,@-%[patp]\n" /* push on pattern stack */ | 1202 | "mov.l r0,@-%[patp] \n" /* push on pattern stack */ |
1071 | 1203 | ||
1072 | "add %[stri],%[src] \n" /* src += stride; */ | 1204 | "add %[stri],%[src] \n" /* src += stride; */ |
1073 | "add #-1,r3 \n" /* decrease loop count */ | 1205 | "add #-1,r3 \n" /* loop 8 times (pixel block) */ |
1074 | "cmp/pl r3 \n" /* loop count > 0? */ | 1206 | "cmp/pl r3 \n" |
1075 | "bt .wa_loop \n" /* yes: loop */ | 1207 | "bt .wa_loop \n" |
1076 | : /* outputs */ | 1208 | : /* outputs */ |
1077 | [src] "+r"(_src), | 1209 | [src] "+r"(_src), |
1078 | [rnd] "+r"(_gray_random_buffer), | 1210 | [rnd] "+r"(_gray_random_buffer), |
@@ -1095,79 +1227,79 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1095 | /* set the bits for all 8 pixels in all bytes according to the | 1227 | /* set the bits for all 8 pixels in all bytes according to the |
1096 | * precalculated patterns on the pattern stack */ | 1228 | * precalculated patterns on the pattern stack */ |
1097 | asm volatile ( | 1229 | asm volatile ( |
1098 | "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ | 1230 | "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */ |
1099 | "mov.l @%[patp]+,r2\n" | 1231 | "mov.l @%[patp]+,r2 \n" |
1100 | "mov.l @%[patp]+,r3\n" | 1232 | "mov.l @%[patp]+,r3 \n" |
1101 | "mov.l @%[patp]+,r6\n" | 1233 | "mov.l @%[patp]+,r6 \n" |
1102 | "mov.l @%[patp]+,r7\n" | 1234 | "mov.l @%[patp]+,r7 \n" |
1103 | "mov.l @%[patp]+,r8\n" | 1235 | "mov.l @%[patp]+,r8 \n" |
1104 | "mov.l @%[patp]+,r9\n" | 1236 | "mov.l @%[patp]+,r9 \n" |
1105 | "mov.l @%[patp],r10\n" | 1237 | "mov.l @%[patp],r10 \n" |
1106 | 1238 | ||
1107 | "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */ | 1239 | "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */ |
1108 | "extu.b %[mask],%[mask] \n" /* mask out high bits */ | 1240 | "extu.b %[mask],%[mask] \n" /* mask out high bits */ |
1109 | "tst %[mask],%[mask] \n" /* nothing to keep? */ | 1241 | "tst %[mask],%[mask] \n" |
1110 | "bt .wa_sloop \n" /* yes: jump to short loop */ | 1242 | "bt .wa_sloop \n" /* short loop if nothing to keep */ |
1111 | 1243 | ||
1112 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | 1244 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ |
1113 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1245 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
1114 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1246 | "rotcl r0 \n" /* rotate t bit into r0 */ |
1115 | "shlr r2 \n" | 1247 | "shlr r2 \n" |
1116 | "rotcl r0 \n" | 1248 | "rotcl r0 \n" |
1117 | "shlr r3 \n" | 1249 | "shlr r3 \n" |
1118 | "rotcl r0 \n" | 1250 | "rotcl r0 \n" |
1119 | "shlr r6 \n" | 1251 | "shlr r6 \n" |
1120 | "rotcl r0 \n" | 1252 | "rotcl r0 \n" |
1121 | "shlr r7 \n" | 1253 | "shlr r7 \n" |
1122 | "rotcl r0 \n" | 1254 | "rotcl r0 \n" |
1123 | "shlr r8 \n" | 1255 | "shlr r8 \n" |
1124 | "rotcl r0 \n" | 1256 | "rotcl r0 \n" |
1125 | "shlr r9 \n" | 1257 | "shlr r9 \n" |
1126 | "rotcl r0 \n" | 1258 | "rotcl r0 \n" |
1127 | "shlr r10 \n" | 1259 | "shlr r10 \n" |
1128 | "mov.b @%[addr],%[rx] \n" /* read old value */ | 1260 | "mov.b @%[addr],%[rx] \n" /* read old value */ |
1129 | "rotcl r0 \n" | 1261 | "rotcl r0 \n" |
1130 | "and %[mask],%[rx] \n" /* mask out unneeded bits */ | 1262 | "and %[mask],%[rx] \n" /* mask out replaced bits */ |
1131 | "or %[rx],r0 \n" /* set new bits */ | 1263 | "or %[rx],r0 \n" /* set new bits */ |
1132 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ | 1264 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ |
1133 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1265 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
1134 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1266 | "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */ |
1135 | "bt .wa_floop \n" /* no: loop */ | 1267 | "bt .wa_floop \n" |
1136 | 1268 | ||
1137 | "bra .wa_end \n" | 1269 | "bra .wa_end \n" |
1138 | "nop \n" | 1270 | "nop \n" |
1139 | 1271 | ||
1140 | /* References to C library routines used in the precalc block */ | 1272 | /* References to C library routines used in the precalc block */ |
1141 | ".align 2 \n" | 1273 | ".align 2 \n" |
1142 | ".ashlsi3: \n" /* C library routine: */ | 1274 | ".ashlsi3: \n" /* C library routine: */ |
1143 | ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */ | 1275 | ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */ |
1144 | ".lshrsi3: \n" /* C library routine: */ | 1276 | ".lshrsi3: \n" /* C library routine: */ |
1145 | ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */ | 1277 | ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */ |
1146 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ | 1278 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ |
1147 | 1279 | ||
1148 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1280 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ |
1149 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1281 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
1150 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1282 | "rotcl r0 \n" /* rotate t bit into r0 */ |
1151 | "shlr r2 \n" | 1283 | "shlr r2 \n" |
1152 | "rotcl r0 \n" | 1284 | "rotcl r0 \n" |
1153 | "shlr r3 \n" | 1285 | "shlr r3 \n" |
1154 | "rotcl r0 \n" | 1286 | "rotcl r0 \n" |
1155 | "shlr r6 \n" | 1287 | "shlr r6 \n" |
1156 | "rotcl r0 \n" | 1288 | "rotcl r0 \n" |
1157 | "shlr r7 \n" | 1289 | "shlr r7 \n" |
1158 | "rotcl r0 \n" | 1290 | "rotcl r0 \n" |
1159 | "shlr r8 \n" | 1291 | "shlr r8 \n" |
1160 | "rotcl r0 \n" | 1292 | "rotcl r0 \n" |
1161 | "shlr r9 \n" | 1293 | "shlr r9 \n" |
1162 | "rotcl r0 \n" | 1294 | "rotcl r0 \n" |
1163 | "shlr r10 \n" | 1295 | "shlr r10 \n" |
1164 | "rotcl r0 \n" | 1296 | "rotcl r0 \n" |
1165 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ | 1297 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ |
1166 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1298 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
1167 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1299 | "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */ |
1168 | "bt .wa_sloop \n" /* no: loop */ | 1300 | "bt .wa_sloop \n" |
1169 | 1301 | ||
1170 | ".wa_end: \n" | 1302 | ".wa_end: \n" |
1171 | : /* outputs */ | 1303 | : /* outputs */ |
1172 | [addr]"+r"(addr), | 1304 | [addr]"+r"(addr), |
1173 | [mask]"+r"(_mask), | 1305 | [mask]"+r"(_mask), |
@@ -1189,43 +1321,43 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1189 | /* precalculate the bit patterns with random shifts | 1321 | /* precalculate the bit patterns with random shifts |
1190 | for all 8 pixels and put them on an extra "stack" */ | 1322 | for all 8 pixels and put them on an extra "stack" */ |
1191 | asm volatile ( | 1323 | asm volatile ( |
1192 | "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ | 1324 | "moveq.l #8,%%d3 \n" /* loop count */ |
1193 | 1325 | ||
1194 | ".wa_loop: \n" /** load pattern for pixel **/ | 1326 | ".wa_loop: \n" /** load pattern for pixel **/ |
1195 | "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */ | 1327 | "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */ |
1196 | "lsr.l #1,%[mask] \n" /* shift out lsb of mask */ | 1328 | "lsr.l #1,%[mask] \n" /* shift out lsb of mask */ |
1197 | "bcc.b .wa_skip \n" /* skip this pixel */ | 1329 | "bcc.b .wa_skip \n" /* skip this pixel */ |
1198 | 1330 | ||
1199 | "clr.l %%d0 \n" | 1331 | "clr.l %%d0 \n" |
1200 | "move.b (%[src]),%%d0 \n" /* load src byte */ | 1332 | "move.b (%[src]),%%d0 \n" /* load src byte */ |
1201 | "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */ | 1333 | "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */ |
1202 | "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */ | 1334 | "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */ |
1203 | 1335 | ||
1204 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ | 1336 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ |
1205 | "add.l #74,%[rnd] \n" /* add another 74 */ | 1337 | "add.l #74,%[rnd] \n" /* add another 74 */ |
1206 | /* Since the lower bits are not very random: */ | 1338 | /* Since the lower bits are not very random: */ |
1207 | "move.l %[rnd],%%d1 \n" | 1339 | "move.l %[rnd],%%d1 \n" |
1208 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ | 1340 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ |
1209 | "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ | 1341 | "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */ |
1210 | 1342 | ||
1211 | "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ | 1343 | "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */ |
1212 | "blo.b .wa_ntrim \n" | 1344 | "blo.b .wa_ntrim \n" |
1213 | "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ | 1345 | "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */ |
1214 | ".wa_ntrim: \n" | 1346 | ".wa_ntrim: \n" |
1215 | 1347 | ||
1216 | "move.l %%d2,%%d0 \n" | 1348 | "move.l %%d2,%%d0 \n" /** rotate pattern **/ |
1217 | "lsl.l %%d1,%%d0 \n" | 1349 | "lsl.l %%d1,%%d0 \n" |
1218 | "sub.l %[dpth],%%d1\n" | 1350 | "sub.l %[dpth],%%d1 \n" |
1219 | "neg.l %%d1 \n" /* d1 = depth - d1 */ | 1351 | "neg.l %%d1 \n" /* d1 = depth - d1 */ |
1220 | "lsr.l %%d1,%%d2 \n" | 1352 | "lsr.l %%d1,%%d2 \n" |
1221 | "or.l %%d0,%%d2 \n" | 1353 | "or.l %%d0,%%d2 \n" |
1222 | 1354 | ||
1223 | ".wa_skip: \n" | 1355 | ".wa_skip: \n" |
1224 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ | 1356 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ |
1225 | 1357 | ||
1226 | "add.l %[stri],%[src] \n" /* src += stride; */ | 1358 | "add.l %[stri],%[src] \n" /* src += stride; */ |
1227 | "subq.l #1,%%d3 \n" /* decrease loop count */ | 1359 | "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */ |
1228 | "bne.b .wa_loop \n" /* yes: loop */ | 1360 | "bne.b .wa_loop \n" |
1229 | : /* outputs */ | 1361 | : /* outputs */ |
1230 | [src] "+a"(_src), | 1362 | [src] "+a"(_src), |
1231 | [patp]"+a"(pat_ptr), | 1363 | [patp]"+a"(pat_ptr), |
@@ -1250,78 +1382,76 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1250 | asm volatile ( | 1382 | asm volatile ( |
1251 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" | 1383 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" |
1252 | /* pop all 8 patterns */ | 1384 | /* pop all 8 patterns */ |
1253 | "not.l %[mask] \n" /* "set" mask -> "keep" mask */ | 1385 | "not.l %[mask] \n" /* "set" mask -> "keep" mask */ |
1254 | "and.l #0xFF,%[mask] \n" | 1386 | "and.l #0xFF,%[mask] \n" |
1255 | "beq.b .wa_sstart \n" /* yes: jump to short loop */ | 1387 | "beq.b .wa_sstart \n" /* short loop if nothing to keep */ |
1256 | 1388 | ||
1257 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | 1389 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ |
1258 | "clr.l %%d0 \n" | 1390 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1259 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1391 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1260 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1392 | "lsr.l #1,%%d3 \n" |
1261 | "lsr.l #1,%%d3 \n" | 1393 | "addx.l %%d0,%%d0 \n" |
1262 | "addx.l %%d0,%%d0 \n" | 1394 | "lsr.l #1,%%d4 \n" |
1263 | "lsr.l #1,%%d4 \n" | 1395 | "addx.l %%d0,%%d0 \n" |
1264 | "addx.l %%d0,%%d0 \n" | 1396 | "lsr.l #1,%%d5 \n" |
1265 | "lsr.l #1,%%d5 \n" | 1397 | "addx.l %%d0,%%d0 \n" |
1266 | "addx.l %%d0,%%d0 \n" | 1398 | "lsr.l #1,%%d6 \n" |
1267 | "lsr.l #1,%%d6 \n" | 1399 | "addx.l %%d0,%%d0 \n" |
1268 | "addx.l %%d0,%%d0 \n" | 1400 | "move.l %%a0,%%d1 \n" |
1269 | "move.l %%a0,%%d1 \n" | 1401 | "lsr.l #1,%%d1 \n" |
1270 | "lsr.l #1,%%d1 \n" | 1402 | "addx.l %%d0,%%d0 \n" |
1271 | "addx.l %%d0,%%d0 \n" | 1403 | "move.l %%d1,%%a0 \n" |
1272 | "move.l %%d1,%%a0 \n" | 1404 | "move.l %%a1,%%d1 \n" |
1273 | "move.l %%a1,%%d1 \n" | 1405 | "lsr.l #1,%%d1 \n" |
1274 | "lsr.l #1,%%d1 \n" | 1406 | "addx.l %%d0,%%d0 \n" |
1275 | "addx.l %%d0,%%d0 \n" | 1407 | "move.l %%d1,%%a1 \n" |
1276 | "move.l %%d1,%%a1 \n" | 1408 | "move.l %[ax],%%d1 \n" |
1277 | "move.l %[ax],%%d1 \n" | 1409 | "lsr.l #1,%%d1 \n" |
1278 | "lsr.l #1,%%d1 \n" | 1410 | "addx.l %%d0,%%d0 \n" |
1279 | "addx.l %%d0,%%d0 \n" | 1411 | "move.l %%d1,%[ax] \n" |
1280 | "move.l %%d1,%[ax] \n" | ||
1281 | 1412 | ||
1282 | "move.b (%[addr]),%%d1 \n" /* read old value */ | 1413 | "move.b (%[addr]),%%d1 \n" /* read old value */ |
1283 | "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ | 1414 | "and.l %[mask],%%d1 \n" /* mask out replaced bits */ |
1284 | "or.l %%d0,%%d1 \n" /* set new bits */ | 1415 | "or.l %%d0,%%d1 \n" /* set new bits */ |
1285 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ | 1416 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ |
1286 | 1417 | ||
1287 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1418 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1288 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1419 | "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */ |
1289 | "bhi.b .wa_floop \n" /* no: loop */ | 1420 | "bhi.b .wa_floop \n" |
1290 | 1421 | ||
1291 | "bra.b .wa_end \n" | 1422 | "bra.b .wa_end \n" |
1292 | 1423 | ||
1293 | ".wa_sstart: \n" | 1424 | ".wa_sstart: \n" |
1294 | "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ | 1425 | "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */ |
1295 | 1426 | ||
1296 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1427 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ |
1297 | "clr.l %%d0 \n" | 1428 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1298 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1429 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1299 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1430 | "lsr.l #1,%%d3 \n" |
1300 | "lsr.l #1,%%d3 \n" | 1431 | "addx.l %%d0,%%d0 \n" |
1301 | "addx.l %%d0,%%d0 \n" | 1432 | "lsr.l #1,%%d4 \n" |
1302 | "lsr.l #1,%%d4 \n" | 1433 | "addx.l %%d0,%%d0 \n" |
1303 | "addx.l %%d0,%%d0 \n" | 1434 | "lsr.l #1,%%d5 \n" |
1304 | "lsr.l #1,%%d5 \n" | 1435 | "addx.l %%d0,%%d0 \n" |
1305 | "addx.l %%d0,%%d0 \n" | 1436 | "lsr.l #1,%%d6 \n" |
1306 | "lsr.l #1,%%d6 \n" | 1437 | "addx.l %%d0,%%d0 \n" |
1307 | "addx.l %%d0,%%d0 \n" | 1438 | "lsr.l #1,%[mask] \n" |
1308 | "lsr.l #1,%[mask] \n" | 1439 | "addx.l %%d0,%%d0 \n" |
1309 | "addx.l %%d0,%%d0 \n" | 1440 | "move.l %%a1,%%d1 \n" |
1310 | "move.l %%a1,%%d1 \n" | 1441 | "lsr.l #1,%%d1 \n" |
1311 | "lsr.l #1,%%d1 \n" | 1442 | "addx.l %%d0,%%d0 \n" |
1312 | "addx.l %%d0,%%d0 \n" | 1443 | "move.l %%d1,%%a1 \n" |
1313 | "move.l %%d1,%%a1 \n" | 1444 | "move.l %[ax],%%d1 \n" |
1314 | "move.l %[ax],%%d1 \n" | 1445 | "lsr.l #1,%%d1 \n" |
1315 | "lsr.l #1,%%d1 \n" | 1446 | "addx.l %%d0,%%d0 \n" |
1316 | "addx.l %%d0,%%d0 \n" | 1447 | "move.l %%d1,%[ax] \n" |
1317 | "move.l %%d1,%[ax] \n" | ||
1318 | 1448 | ||
1319 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ | 1449 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ |
1320 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1450 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1321 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1451 | "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */ |
1322 | "bhi.b .wa_sloop \n" /* no: loop */ | 1452 | "bhi.b .wa_sloop \n" |
1323 | 1453 | ||
1324 | ".wa_end: \n" | 1454 | ".wa_end: \n" |
1325 | : /* outputs */ | 1455 | : /* outputs */ |
1326 | [addr]"+a"(addr), | 1456 | [addr]"+a"(addr), |
1327 | [mask]"+d"(_mask), | 1457 | [mask]"+d"(_mask), |
diff --git a/apps/plugins/lib/gray_scroll.c b/apps/plugins/lib/gray_scroll.c index df5dc57044..8f60e7cef1 100644 --- a/apps/plugins/lib/gray_scroll.c +++ b/apps/plugins/lib/gray_scroll.c | |||
@@ -283,32 +283,32 @@ void gray_ub_scroll_left(int count) | |||
283 | if (count) | 283 | if (count) |
284 | { | 284 | { |
285 | asm ( | 285 | asm ( |
286 | "mov r4, %[high] \n" | 286 | "mov r4, %[high] \n" /* rows = height */ |
287 | 287 | ||
288 | ".sl_rloop: \n" | 288 | ".sl_rloop: \n" /* repeat for every row */ |
289 | "mov r5, %[addr] \n" | 289 | "mov r5, %[addr] \n" /* get start address */ |
290 | "mov r2, %[dpth] \n" | 290 | "mov r2, %[dpth] \n" /* planes = depth */ |
291 | 291 | ||
292 | ".sl_oloop: \n" | 292 | ".sl_oloop: \n" /* repeat for every bitplane */ |
293 | "mov r6, r5 \n" | 293 | "mov r6, r5 \n" /* get start address */ |
294 | "mov r3, %[cols] \n" | 294 | "mov r3, %[cols] \n" /* cols = col_count */ |
295 | "mov r1, #0 \n" | 295 | "mov r1, #0 \n" /* fill with zero */ |
296 | 296 | ||
297 | ".sl_iloop: \n" | 297 | ".sl_iloop: \n" /* repeat for all cols */ |
298 | "mov r1, r1, lsr #8 \n" | 298 | "mov r1, r1, lsr #8 \n" /* shift right to get residue */ |
299 | "ldrb r0, [r6, #-1]! \n" | 299 | "ldrb r0, [r6, #-1]! \n" /* decrement addr & get data byte */ |
300 | "orr r1, r1, r0, lsl %[cnt] \n" | 300 | "orr r1, r1, r0, lsl %[cnt] \n" /* combine with last residue */ |
301 | "strb r1, [r6] \n" | 301 | "strb r1, [r6] \n" /* store data */ |
302 | 302 | ||
303 | "subs r3, r3, #1 \n" | 303 | "subs r3, r3, #1 \n" /* cols-- */ |
304 | "bne .sl_iloop \n" | 304 | "bne .sl_iloop \n" |
305 | 305 | ||
306 | "add r5, r5, %[psiz] \n" | 306 | "add r5, r5, %[psiz] \n" /* start_address += plane_size */ |
307 | "subs r2, r2, #1 \n" | 307 | "subs r2, r2, #1 \n" /* planes-- */ |
308 | "bne .sl_oloop \n" | 308 | "bne .sl_oloop \n" |
309 | 309 | ||
310 | "add %[addr],%[addr],%[bwid] \n" | 310 | "add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */ |
311 | "subs r4, r4, #1 \n" | 311 | "subs r4, r4, #1 \n" /* rows-- */ |
312 | "bne .sl_rloop \n" | 312 | "bne .sl_rloop \n" |
313 | : /* outputs */ | 313 | : /* outputs */ |
314 | : /* inputs */ | 314 | : /* inputs */ |
@@ -364,32 +364,32 @@ void gray_ub_scroll_right(int count) | |||
364 | if (count) | 364 | if (count) |
365 | { | 365 | { |
366 | asm ( | 366 | asm ( |
367 | "mov r4, %[high] \n" | 367 | "mov r4, %[high] \n" /* rows = height */ |
368 | 368 | ||
369 | ".sr_rloop: \n" | 369 | ".sr_rloop: \n" /* repeat for every row */ |
370 | "mov r5, %[addr] \n" | 370 | "mov r5, %[addr] \n" /* get start address */ |
371 | "mov r2, %[dpth] \n" | 371 | "mov r2, %[dpth] \n" /* planes = depth */ |
372 | 372 | ||
373 | ".sr_oloop: \n" | 373 | ".sr_oloop: \n" /* repeat for every bitplane */ |
374 | "mov r6, r5 \n" | 374 | "mov r6, r5 \n" /* get start address */ |
375 | "mov r3, %[cols] \n" | 375 | "mov r3, %[cols] \n" /* cols = col_count */ |
376 | "mov r1, #0 \n" | 376 | "mov r1, #0 \n" /* fill with zero */ |
377 | 377 | ||
378 | ".sr_iloop: \n" | 378 | ".sr_iloop: \n" /* repeat for all cols */ |
379 | "ldrb r0, [r6] \n" | 379 | "ldrb r0, [r6] \n" /* get data byte */ |
380 | "orr r1, r0, r1, lsl #8 \n" | 380 | "orr r1, r0, r1, lsl #8 \n" /* combine w/ old data shifted to 2nd byte */ |
381 | "mov r0, r1, lsr %[cnt] \n" | 381 | "mov r0, r1, lsr %[cnt] \n" /* shift right */ |
382 | "strb r0, [r6], #1 \n" | 382 | "strb r0, [r6], #1 \n" /* store data, increment addr */ |
383 | 383 | ||
384 | "subs r3, r3, #1 \n" | 384 | "subs r3, r3, #1 \n" /* cols-- */ |
385 | "bne .sr_iloop \n" | 385 | "bne .sr_iloop \n" |
386 | 386 | ||
387 | "add r5, r5, %[psiz] \n" | 387 | "add r5, r5, %[psiz] \n" /* start_address += plane_size */ |
388 | "subs r2, r2, #1 \n" | 388 | "subs r2, r2, #1 \n" /* planes-- */ |
389 | "bne .sr_oloop \n" | 389 | "bne .sr_oloop \n" |
390 | 390 | ||
391 | "add %[addr],%[addr],%[bwid] \n" | 391 | "add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */ |
392 | "subs r4, r4, #1 \n" | 392 | "subs r4, r4, #1 \n" /* rows-- */ |
393 | "bne .sr_rloop \n" | 393 | "bne .sr_rloop \n" |
394 | : /* outputs */ | 394 | : /* outputs */ |
395 | : /* inputs */ | 395 | : /* inputs */ |
@@ -714,8 +714,7 @@ void gray_ub_scroll_up(int count) | |||
714 | "move.b (%%a1),%%d0 \n" /* get data byte */ | 714 | "move.b (%%a1),%%d0 \n" /* get data byte */ |
715 | "lsl.l #8,%%d1 \n" /* old data to 2nd byte */ | 715 | "lsl.l #8,%%d1 \n" /* old data to 2nd byte */ |
716 | "or.l %%d1,%%d0 \n" /* combine old data */ | 716 | "or.l %%d1,%%d0 \n" /* combine old data */ |
717 | "clr.l %%d1 \n" | 717 | "move.l %%d0,%%d1 \n" /* keep data for next round */ |
718 | "move.b %%d0,%%d1 \n" /* keep data for next round */ | ||
719 | "lsr.l %[cnt],%%d0 \n" /* shift right */ | 718 | "lsr.l %[cnt],%%d0 \n" /* shift right */ |
720 | "move.b %%d0,(%%a1) \n" /* store data */ | 719 | "move.b %%d0,(%%a1) \n" /* store data */ |
721 | 720 | ||