diff options
Diffstat (limited to 'apps/plugins/lib/gray_core.c')
-rw-r--r-- | apps/plugins/lib/gray_core.c | 536 |
1 files changed, 338 insertions, 198 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c index c253a7112e..c162349f76 100644 --- a/apps/plugins/lib/gray_core.c +++ b/apps/plugins/lib/gray_core.c | |||
@@ -648,14 +648,165 @@ void gray_update_rect(int x, int y, int width, int height) | |||
648 | cbuf = _gray_info.cur_buffer + srcofs_row; | 648 | cbuf = _gray_info.cur_buffer + srcofs_row; |
649 | bbuf = _gray_info.back_buffer + srcofs_row; | 649 | bbuf = _gray_info.back_buffer + srcofs_row; |
650 | 650 | ||
651 | #if 0 /* CPU specific asm versions will go here */ | 651 | #ifdef CPU_ARM |
652 | asm volatile ( | ||
653 | "ldr r0, [%[cbuf]] \n" | ||
654 | "ldr r1, [%[bbuf]] \n" | ||
655 | "eor r1, r0, r1 \n" | ||
656 | "ldr r0, [%[cbuf], #4] \n" | ||
657 | "ldr %[chg], [%[bbuf], #4] \n" | ||
658 | "eor %[chg], r0, %[chg] \n" | ||
659 | "orr %[chg], %[chg], r1 \n" | ||
660 | : /* outputs */ | ||
661 | [chg] "=&r"(change) | ||
662 | : /* inputs */ | ||
663 | [cbuf]"r"(cbuf), | ||
664 | [bbuf]"r"(bbuf) | ||
665 | : /* clobbers */ | ||
666 | "r0", "r1" | ||
667 | ); | ||
668 | |||
669 | if (change != 0) | ||
670 | { | ||
671 | unsigned char *addr, *end; | ||
672 | unsigned mask, trash; | ||
673 | |||
674 | pat_ptr = &pat_stack[8]; | ||
675 | |||
676 | /* precalculate the bit patterns with random shifts | ||
677 | * for all 8 pixels and put them on an extra "stack" */ | ||
678 | asm volatile ( | ||
679 | "mov r3, #8 \n" /* loop count */ | ||
680 | "mov %[mask], #0 \n" | ||
681 | |||
682 | ".ur_pre_loop: \n" | ||
683 | "mov %[mask], %[mask], lsl #1 \n" /* shift mask */ | ||
684 | "ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */ | ||
685 | "ldrb r1, [%[bbuf]] \n" /* read back buffer */ | ||
686 | "strb r0, [%[bbuf]], #1 \n" /* update back buffer */ | ||
687 | "mov r2, #0 \n" /* preset for skipped pixel */ | ||
688 | "cmp r0, r1 \n" /* no change? */ | ||
689 | "beq .ur_skip \n" /* -> skip */ | ||
690 | |||
691 | "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */ | ||
692 | |||
693 | "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */ | ||
694 | "add %[rnd], %[rnd], %[rnd], lsl #1 \n" | ||
695 | "add %[rnd], %[rnd], r0, lsl #3 \n" | ||
696 | "add %[rnd], %[rnd], #74 \n" /* add another 74 */ | ||
697 | /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */ | ||
698 | "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */ | ||
699 | |||
700 | "cmp r1, %[dpth] \n" /* random >= depth ? */ | ||
701 | "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */ | ||
702 | |||
703 | "mov r0, r2, lsl r1 \n" /** rotate pattern **/ | ||
704 | "sub r1, %[dpth], r1 \n" | ||
705 | "orr r2, r0, r2, lsr r1 \n" | ||
706 | |||
707 | "orr %[mask], %[mask], #1 \n" /* set mask bit */ | ||
708 | |||
709 | ".ur_skip: \n" | ||
710 | "str r2, [%[patp], #-4]! \n" /* push on pattern stack */ | ||
711 | |||
712 | "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */ | ||
713 | "bne .ur_pre_loop \n" | ||
714 | : /* outputs */ | ||
715 | [cbuf]"+r"(cbuf), | ||
716 | [bbuf]"+r"(bbuf), | ||
717 | [patp]"+r"(pat_ptr), | ||
718 | [rnd] "+r"(_gray_random_buffer), | ||
719 | [mask]"=&r"(mask) | ||
720 | : /* inputs */ | ||
721 | [bpat]"r"(_gray_info.bitpattern), | ||
722 | [dpth]"r"(_gray_info.depth), | ||
723 | [rmsk]"r"(_gray_info.randmask) | ||
724 | : /* clobbers */ | ||
725 | "r0", "r1", "r2", "r3" | ||
726 | ); | ||
727 | |||
728 | addr = dst_row; | ||
729 | end = addr + MULU16(_gray_info.depth, _gray_info.plane_size); | ||
730 | |||
731 | /* set the bits for all 8 pixels in all bytes according to the | ||
732 | * precalculated patterns on the pattern stack */ | ||
733 | asm volatile ( | ||
734 | "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */ | ||
735 | |||
736 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | ||
737 | "ands %[mask], %[mask], #0xff \n" | ||
738 | "beq .ur_sloop \n" /* short loop if nothing to keep */ | ||
739 | |||
740 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | ||
741 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
742 | "adc r0, r0, r0 \n" /* put bit into LSB for byte */ | ||
743 | "movs r8, r8, lsr #1 \n" | ||
744 | "adc r0, r0, r0 \n" | ||
745 | "movs r7, r7, lsr #1 \n" | ||
746 | "adc r0, r0, r0 \n" | ||
747 | "movs r6, r6, lsr #1 \n" | ||
748 | "adc r0, r0, r0 \n" | ||
749 | "movs r5, r5, lsr #1 \n" | ||
750 | "adc r0, r0, r0 \n" | ||
751 | "movs r4, r4, lsr #1 \n" | ||
752 | "adc r0, r0, r0 \n" | ||
753 | "movs r3, r3, lsr #1 \n" | ||
754 | "adc r0, r0, r0 \n" | ||
755 | "movs r2, r2, lsr #1 \n" | ||
756 | "adc r0, r0, r0 \n" | ||
757 | |||
758 | "ldrb r1, [%[addr]] \n" /* read old value */ | ||
759 | "and r1, r1, %[mask] \n" /* mask out replaced bits */ | ||
760 | "orr r1, r1, r0 \n" /* set new bits */ | ||
761 | "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */ | ||
762 | |||
763 | "cmp %[end], %[addr] \n" /* loop for all bitplanes */ | ||
764 | "bne .ur_floop \n" | ||
765 | |||
766 | "b .ur_end \n" | ||
767 | |||
768 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | ||
769 | "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */ | ||
770 | "adc r0, r0, r0 \n" /* put bit into LSB for byte */ | ||
771 | "movs r8, r8, lsr #1 \n" | ||
772 | "adc r0, r0, r0 \n" | ||
773 | "movs r7, r7, lsr #1 \n" | ||
774 | "adc r0, r0, r0 \n" | ||
775 | "movs r6, r6, lsr #1 \n" | ||
776 | "adc r0, r0, r0 \n" | ||
777 | "movs r5, r5, lsr #1 \n" | ||
778 | "adc r0, r0, r0 \n" | ||
779 | "movs r4, r4, lsr #1 \n" | ||
780 | "adc r0, r0, r0 \n" | ||
781 | "movs r3, r3, lsr #1 \n" | ||
782 | "adc r0, r0, r0 \n" | ||
783 | "movs r2, r2, lsr #1 \n" | ||
784 | "adc r0, r0, r0 \n" | ||
785 | |||
786 | "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */ | ||
787 | |||
788 | "cmp %[end], %[addr] \n" /* loop for all bitplanes */ | ||
789 | "bne .ur_sloop \n" | ||
790 | |||
791 | ".ur_end: \n" | ||
792 | : /* outputs */ | ||
793 | [addr]"+r"(addr), | ||
794 | [mask]"+r"(mask), | ||
795 | [rx] "=&r"(trash) | ||
796 | : /* inputs */ | ||
797 | [psiz]"r"(_gray_info.plane_size), | ||
798 | [end] "r"(end), | ||
799 | [patp]"[rx]"(pat_ptr) | ||
800 | : /* clobbers */ | ||
801 | "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" | ||
802 | ); | ||
803 | } | ||
652 | #else /* C version, for reference*/ | 804 | #else /* C version, for reference*/ |
805 | #warning C version of gray_update_rect() used | ||
653 | (void)pat_ptr; | 806 | (void)pat_ptr; |
654 | /* check whether anything changed in the 8-pixel block */ | 807 | /* check whether anything changed in the 8-pixel block */ |
655 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | 808 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; |
656 | cbuf += sizeof(uint32_t); | 809 | change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4); |
657 | bbuf += sizeof(uint32_t); | ||
658 | change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | ||
659 | 810 | ||
660 | if (change != 0) | 811 | if (change != 0) |
661 | { | 812 | { |
@@ -664,9 +815,6 @@ void gray_update_rect(int x, int y, int width, int height) | |||
664 | unsigned test = 1; | 815 | unsigned test = 1; |
665 | int i; | 816 | int i; |
666 | 817 | ||
667 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
668 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
669 | |||
670 | /* precalculate the bit patterns with random shifts | 818 | /* precalculate the bit patterns with random shifts |
671 | * for all 8 pixels and put them on an extra "stack" */ | 819 | * for all 8 pixels and put them on an extra "stack" */ |
672 | for (i = 7; i >= 0; i--) | 820 | for (i = 7; i >= 0; i--) |
@@ -711,7 +859,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
711 | 859 | ||
712 | for (i = 7; i >= 0; i--) | 860 | for (i = 7; i >= 0; i--) |
713 | data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); | 861 | data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); |
714 | 862 | ||
715 | *addr = data; | 863 | *addr = data; |
716 | addr += _gray_info.plane_size; | 864 | addr += _gray_info.plane_size; |
717 | test <<= 1; | 865 | test <<= 1; |
@@ -788,18 +936,18 @@ void gray_update_rect(int x, int y, int width, int height) | |||
788 | 936 | ||
789 | #if CONFIG_CPU == SH7034 | 937 | #if CONFIG_CPU == SH7034 |
790 | asm volatile ( | 938 | asm volatile ( |
791 | "mov.l @%[cbuf]+,r1 \n" | 939 | "mov.l @%[cbuf],r1 \n" |
792 | "mov.l @%[bbuf]+,r2 \n" | 940 | "mov.l @%[bbuf],r2 \n" |
793 | "xor r1,r2 \n" | 941 | "xor r1,r2 \n" |
794 | "mov.l @%[cbuf],r1 \n" | 942 | "mov.l @(4,%[cbuf]),r1 \n" |
795 | "mov.l @%[bbuf],%[chg] \n" | 943 | "mov.l @(4,%[bbuf]),%[chg] \n" |
796 | "xor r1,%[chg] \n" | 944 | "xor r1,%[chg] \n" |
797 | "or r2,%[chg] \n" | 945 | "or r2,%[chg] \n" |
798 | : /* outputs */ | 946 | : /* outputs */ |
799 | [cbuf]"+r"(cbuf), | ||
800 | [bbuf]"+r"(bbuf), | ||
801 | [chg] "=r"(change) | 947 | [chg] "=r"(change) |
802 | : /* inputs */ | 948 | : /* inputs */ |
949 | [cbuf]"r"(cbuf), | ||
950 | [bbuf]"r"(bbuf) | ||
803 | : /* clobbers */ | 951 | : /* clobbers */ |
804 | "r1", "r2" | 952 | "r1", "r2" |
805 | ); | 953 | ); |
@@ -810,13 +958,11 @@ void gray_update_rect(int x, int y, int width, int height) | |||
810 | unsigned mask, trash; | 958 | unsigned mask, trash; |
811 | 959 | ||
812 | pat_ptr = &pat_stack[8]; | 960 | pat_ptr = &pat_stack[8]; |
813 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
814 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
815 | 961 | ||
816 | /* precalculate the bit patterns with random shifts | 962 | /* precalculate the bit patterns with random shifts |
817 | * for all 8 pixels and put them on an extra "stack" */ | 963 | * for all 8 pixels and put them on an extra "stack" */ |
818 | asm volatile ( | 964 | asm volatile ( |
819 | "mov #8,r3 \n" /* loop count in r3: 8 pixels */ | 965 | "mov #8,r3 \n" /* loop count */ |
820 | 966 | ||
821 | ".ur_pre_loop: \n" | 967 | ".ur_pre_loop: \n" |
822 | "mov.b @%[cbuf]+,r0\n" /* read current buffer */ | 968 | "mov.b @%[cbuf]+,r0\n" /* read current buffer */ |
@@ -860,10 +1006,11 @@ void gray_update_rect(int x, int y, int width, int height) | |||
860 | "rotcr %[mask] \n" /* get mask bit */ | 1006 | "rotcr %[mask] \n" /* get mask bit */ |
861 | "mov.l r2,@-%[patp]\n" /* push on pattern stack */ | 1007 | "mov.l r2,@-%[patp]\n" /* push on pattern stack */ |
862 | 1008 | ||
863 | "add #-1,r3 \n" /* decrease loop count */ | 1009 | "add #-1,r3 \n" /* loop 8 times (pixel block) */ |
864 | "cmp/pl r3 \n" /* loop count > 0? */ | 1010 | "cmp/pl r3 \n" |
865 | "bt .ur_pre_loop\n" /* yes: loop */ | 1011 | "bt .ur_pre_loop\n" |
866 | "shlr8 %[mask] \n" | 1012 | |
1013 | "shlr8 %[mask] \n" /* shift mask to low byte */ | ||
867 | "shlr16 %[mask] \n" | 1014 | "shlr16 %[mask] \n" |
868 | : /* outputs */ | 1015 | : /* outputs */ |
869 | [cbuf]"+r"(cbuf), | 1016 | [cbuf]"+r"(cbuf), |
@@ -885,77 +1032,77 @@ void gray_update_rect(int x, int y, int width, int height) | |||
885 | /* set the bits for all 8 pixels in all bytes according to the | 1032 | /* set the bits for all 8 pixels in all bytes according to the |
886 | * precalculated patterns on the pattern stack */ | 1033 | * precalculated patterns on the pattern stack */ |
887 | asm volatile ( | 1034 | asm volatile ( |
888 | "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ | 1035 | "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */ |
889 | "mov.l @%[patp]+,r2\n" | 1036 | "mov.l @%[patp]+,r2 \n" |
890 | "mov.l @%[patp]+,r3\n" | 1037 | "mov.l @%[patp]+,r3 \n" |
891 | "mov.l @%[patp]+,r6\n" | 1038 | "mov.l @%[patp]+,r6 \n" |
892 | "mov.l @%[patp]+,r7\n" | 1039 | "mov.l @%[patp]+,r7 \n" |
893 | "mov.l @%[patp]+,r8\n" | 1040 | "mov.l @%[patp]+,r8 \n" |
894 | "mov.l @%[patp]+,r9\n" | 1041 | "mov.l @%[patp]+,r9 \n" |
895 | "mov.l @%[patp],r10\n" | 1042 | "mov.l @%[patp],r10 \n" |
896 | 1043 | ||
897 | "tst %[mask],%[mask] \n" /* nothing to keep? */ | 1044 | "tst %[mask],%[mask] \n" |
898 | "bt .ur_sloop \n" /* yes: jump to short loop */ | 1045 | "bt .ur_sloop \n" /* short loop if nothing to keep */ |
899 | 1046 | ||
900 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1047 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ |
901 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1048 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
902 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1049 | "rotcl r0 \n" /* rotate t bit into r0 */ |
903 | "shlr r2 \n" | 1050 | "shlr r2 \n" |
904 | "rotcl r0 \n" | 1051 | "rotcl r0 \n" |
905 | "shlr r3 \n" | 1052 | "shlr r3 \n" |
906 | "rotcl r0 \n" | 1053 | "rotcl r0 \n" |
907 | "shlr r6 \n" | 1054 | "shlr r6 \n" |
908 | "rotcl r0 \n" | 1055 | "rotcl r0 \n" |
909 | "shlr r7 \n" | 1056 | "shlr r7 \n" |
910 | "rotcl r0 \n" | 1057 | "rotcl r0 \n" |
911 | "shlr r8 \n" | 1058 | "shlr r8 \n" |
912 | "rotcl r0 \n" | 1059 | "rotcl r0 \n" |
913 | "shlr r9 \n" | 1060 | "shlr r9 \n" |
914 | "rotcl r0 \n" | 1061 | "rotcl r0 \n" |
915 | "shlr r10 \n" | 1062 | "shlr r10 \n" |
916 | "mov.b @%[addr],%[rx] \n" /* read old value */ | 1063 | "mov.b @%[addr],%[rx] \n" /* read old value */ |
917 | "rotcl r0 \n" | 1064 | "rotcl r0 \n" |
918 | "and %[mask],%[rx] \n" /* mask out unneeded bits */ | 1065 | "and %[mask],%[rx] \n" /* mask out replaced bits */ |
919 | "or %[rx],r0 \n" /* set new bits */ | 1066 | "or %[rx],r0 \n" /* set new bits */ |
920 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ | 1067 | "mov.b r0,@%[addr] \n" /* store value to bitplane */ |
921 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1068 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
922 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1069 | "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */ |
923 | "bt .ur_floop \n" /* no: loop */ | 1070 | "bt .ur_floop \n" |
924 | 1071 | ||
925 | "bra .ur_end \n" | 1072 | "bra .ur_end \n" |
926 | "nop \n" | 1073 | "nop \n" |
927 | 1074 | ||
928 | /* References to C library routines used in the precalc block */ | 1075 | /* References to C library routines used in the precalc block */ |
929 | ".align 2 \n" | 1076 | ".align 2 \n" |
930 | ".ashlsi3: \n" /* C library routine: */ | 1077 | ".ashlsi3: \n" /* C library routine: */ |
931 | ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ | 1078 | ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ |
932 | ".lshrsi3: \n" /* C library routine: */ | 1079 | ".lshrsi3: \n" /* C library routine: */ |
933 | ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ | 1080 | ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ |
934 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ | 1081 | /* both routines preserve r4, destroy r5 and take ~16 cycles */ |
935 | 1082 | ||
936 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1083 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ |
937 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ | 1084 | "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ |
938 | "rotcl r0 \n" /* rotate t bit into r0 */ | 1085 | "rotcl r0 \n" /* rotate t bit into r0 */ |
939 | "shlr r2 \n" | 1086 | "shlr r2 \n" |
940 | "rotcl r0 \n" | 1087 | "rotcl r0 \n" |
941 | "shlr r3 \n" | 1088 | "shlr r3 \n" |
942 | "rotcl r0 \n" | 1089 | "rotcl r0 \n" |
943 | "shlr r6 \n" | 1090 | "shlr r6 \n" |
944 | "rotcl r0 \n" | 1091 | "rotcl r0 \n" |
945 | "shlr r7 \n" | 1092 | "shlr r7 \n" |
946 | "rotcl r0 \n" | 1093 | "rotcl r0 \n" |
947 | "shlr r8 \n" | 1094 | "shlr r8 \n" |
948 | "rotcl r0 \n" | 1095 | "rotcl r0 \n" |
949 | "shlr r9 \n" | 1096 | "shlr r9 \n" |
950 | "rotcl r0 \n" | 1097 | "rotcl r0 \n" |
951 | "shlr r10 \n" | 1098 | "shlr r10 \n" |
952 | "rotcl r0 \n" | 1099 | "rotcl r0 \n" |
953 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ | 1100 | "mov.b r0,@%[addr] \n" /* store byte to bitplane */ |
954 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ | 1101 | "add %[psiz],%[addr] \n" /* advance to next bitplane */ |
955 | "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ | 1102 | "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */ |
956 | "bt .ur_sloop \n" /* no: loop */ | 1103 | "bt .ur_sloop \n" |
957 | 1104 | ||
958 | ".ur_end: \n" | 1105 | ".ur_end: \n" |
959 | : /* outputs */ | 1106 | : /* outputs */ |
960 | [addr]"+r"(addr), | 1107 | [addr]"+r"(addr), |
961 | [mask]"+r"(mask), | 1108 | [mask]"+r"(mask), |
@@ -970,18 +1117,18 @@ void gray_update_rect(int x, int y, int width, int height) | |||
970 | } | 1117 | } |
971 | #elif defined(CPU_COLDFIRE) | 1118 | #elif defined(CPU_COLDFIRE) |
972 | asm volatile ( | 1119 | asm volatile ( |
973 | "move.l (%[cbuf])+,%%d0 \n" | 1120 | "move.l (%[cbuf]),%%d0 \n" |
974 | "move.l (%[bbuf])+,%%d1 \n" | 1121 | "move.l (%[bbuf]),%%d1 \n" |
975 | "eor.l %%d0,%%d1 \n" | 1122 | "eor.l %%d0,%%d1 \n" |
976 | "move.l (%[cbuf]),%%d0 \n" | 1123 | "move.l (4,%[cbuf]),%%d0 \n" |
977 | "move.l (%[bbuf]),%[chg]\n" | 1124 | "move.l (4,%[bbuf]),%[chg] \n" |
978 | "eor.l %%d0,%[chg] \n" | 1125 | "eor.l %%d0,%[chg] \n" |
979 | "or.l %%d1,%[chg] \n" | 1126 | "or.l %%d1,%[chg] \n" |
980 | : /* outputs */ | 1127 | : /* outputs */ |
981 | [cbuf]"+a"(cbuf), | ||
982 | [bbuf]"+a"(bbuf), | ||
983 | [chg] "=&d"(change) | 1128 | [chg] "=&d"(change) |
984 | : /* inputs */ | 1129 | : /* inputs */ |
1130 | [cbuf]"a"(cbuf), | ||
1131 | [bbuf]"a"(bbuf) | ||
985 | : /* clobbers */ | 1132 | : /* clobbers */ |
986 | "d0", "d1" | 1133 | "d0", "d1" |
987 | ); | 1134 | ); |
@@ -992,54 +1139,52 @@ void gray_update_rect(int x, int y, int width, int height) | |||
992 | unsigned mask, trash; | 1139 | unsigned mask, trash; |
993 | 1140 | ||
994 | pat_ptr = &pat_stack[8]; | 1141 | pat_ptr = &pat_stack[8]; |
995 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
996 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
997 | 1142 | ||
998 | /* precalculate the bit patterns with random shifts | 1143 | /* precalculate the bit patterns with random shifts |
999 | * for all 8 pixels and put them on an extra "stack" */ | 1144 | * for all 8 pixels and put them on an extra "stack" */ |
1000 | asm volatile ( | 1145 | asm volatile ( |
1001 | "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ | 1146 | "moveq.l #8,%%d3 \n" /* loop count */ |
1002 | "clr.l %[mask] \n" | 1147 | "clr.l %[mask] \n" |
1003 | 1148 | ||
1004 | ".ur_pre_loop: \n" | 1149 | ".ur_pre_loop: \n" |
1005 | "clr.l %%d0 \n" | 1150 | "clr.l %%d0 \n" |
1006 | "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ | 1151 | "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ |
1007 | "clr.l %%d1 \n" | 1152 | "clr.l %%d1 \n" |
1008 | "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ | 1153 | "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ |
1009 | "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ | 1154 | "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ |
1010 | "clr.l %%d2 \n" /* preset for skipped pixel */ | 1155 | "clr.l %%d2 \n" /* preset for skipped pixel */ |
1011 | "cmp.l %%d0,%%d1 \n" /* no change? */ | 1156 | "cmp.l %%d0,%%d1 \n" /* no change? */ |
1012 | "beq.b .ur_skip \n" /* -> skip */ | 1157 | "beq.b .ur_skip \n" /* -> skip */ |
1013 | 1158 | ||
1014 | "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ | 1159 | "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ |
1015 | 1160 | ||
1016 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ | 1161 | "mulu.w #75,%[rnd] \n" /* multiply by 75 */ |
1017 | "add.l #74,%[rnd] \n" /* add another 74 */ | 1162 | "add.l #74,%[rnd] \n" /* add another 74 */ |
1018 | /* Since the lower bits are not very random: */ | 1163 | /* Since the lower bits are not very random: */ |
1019 | "move.l %[rnd],%%d1 \n" | 1164 | "move.l %[rnd],%%d1 \n" |
1020 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ | 1165 | "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ |
1021 | "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ | 1166 | "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */ |
1022 | 1167 | ||
1023 | "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ | 1168 | "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */ |
1024 | "blo.b .ur_ntrim \n" | 1169 | "blo.b .ur_ntrim \n" |
1025 | "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ | 1170 | "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */ |
1026 | ".ur_ntrim: \n" | 1171 | ".ur_ntrim: \n" |
1027 | 1172 | ||
1028 | "move.l %%d2,%%d0 \n" | 1173 | "move.l %%d2,%%d0 \n" /** rotate pattern **/ |
1029 | "lsl.l %%d1,%%d0 \n" | 1174 | "lsl.l %%d1,%%d0 \n" |
1030 | "sub.l %[dpth],%%d1\n" | 1175 | "sub.l %[dpth],%%d1 \n" |
1031 | "neg.l %%d1 \n" /* d1 = depth - d1 */ | 1176 | "neg.l %%d1 \n" /* d1 = depth - d1 */ |
1032 | "lsr.l %%d1,%%d2 \n" | 1177 | "lsr.l %%d1,%%d2 \n" |
1033 | "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ | 1178 | "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ |
1034 | 1179 | ||
1035 | "or.l #0x0100,%[mask] \n" /* set mask bit */ | 1180 | "or.l #0x0100,%[mask] \n" /* set mask bit */ |
1036 | 1181 | ||
1037 | ".ur_skip: \n" | 1182 | ".ur_skip: \n" |
1038 | "lsr.l #1,%[mask] \n" /* shift mask */ | 1183 | "lsr.l #1,%[mask] \n" /* shift mask */ |
1039 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ | 1184 | "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ |
1040 | 1185 | ||
1041 | "subq.l #1,%%d3 \n" /* decrease loop count */ | 1186 | "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */ |
1042 | "bne.b .ur_pre_loop\n" /* yes: loop */ | 1187 | "bne.b .ur_pre_loop \n" |
1043 | : /* outputs */ | 1188 | : /* outputs */ |
1044 | [cbuf]"+a"(cbuf), | 1189 | [cbuf]"+a"(cbuf), |
1045 | [bbuf]"+a"(bbuf), | 1190 | [bbuf]"+a"(bbuf), |
@@ -1061,79 +1206,79 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1061 | * precalculated patterns on the pattern stack */ | 1206 | * precalculated patterns on the pattern stack */ |
1062 | asm volatile ( | 1207 | asm volatile ( |
1063 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" | 1208 | "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" |
1064 | /* pop all 8 patterns */ | 1209 | /* pop all 8 patterns */ |
1065 | "not.l %[mask] \n" /* set mask -> keep mask */ | 1210 | "not.l %[mask] \n" /* "set" mask -> "keep" mask */ |
1066 | "and.l #0xFF,%[mask] \n" | 1211 | "and.l #0xFF,%[mask] \n" |
1067 | "beq.b .ur_sstart \n" /* yes: jump to short loop */ | 1212 | "beq.b .ur_sstart \n" /* short loop if nothing to keep */ |
1068 | 1213 | ||
1069 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ | 1214 | ".ur_floop: \n" /** full loop (there are bits to keep)**/ |
1070 | "clr.l %%d0 \n" | 1215 | "clr.l %%d0 \n" |
1071 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1216 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1072 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1217 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1073 | "lsr.l #1,%%d3 \n" | 1218 | "lsr.l #1,%%d3 \n" |
1074 | "addx.l %%d0,%%d0 \n" | 1219 | "addx.l %%d0,%%d0 \n" |
1075 | "lsr.l #1,%%d4 \n" | 1220 | "lsr.l #1,%%d4 \n" |
1076 | "addx.l %%d0,%%d0 \n" | 1221 | "addx.l %%d0,%%d0 \n" |
1077 | "lsr.l #1,%%d5 \n" | 1222 | "lsr.l #1,%%d5 \n" |
1078 | "addx.l %%d0,%%d0 \n" | 1223 | "addx.l %%d0,%%d0 \n" |
1079 | "lsr.l #1,%%d6 \n" | 1224 | "lsr.l #1,%%d6 \n" |
1080 | "addx.l %%d0,%%d0 \n" | 1225 | "addx.l %%d0,%%d0 \n" |
1081 | "move.l %%a0,%%d1 \n" | 1226 | "move.l %%a0,%%d1 \n" |
1082 | "lsr.l #1,%%d1 \n" | 1227 | "lsr.l #1,%%d1 \n" |
1083 | "addx.l %%d0,%%d0 \n" | 1228 | "addx.l %%d0,%%d0 \n" |
1084 | "move.l %%d1,%%a0 \n" | 1229 | "move.l %%d1,%%a0 \n" |
1085 | "move.l %%a1,%%d1 \n" | 1230 | "move.l %%a1,%%d1 \n" |
1086 | "lsr.l #1,%%d1 \n" | 1231 | "lsr.l #1,%%d1 \n" |
1087 | "addx.l %%d0,%%d0 \n" | 1232 | "addx.l %%d0,%%d0 \n" |
1088 | "move.l %%d1,%%a1 \n" | 1233 | "move.l %%d1,%%a1 \n" |
1089 | "move.l %[ax],%%d1 \n" | 1234 | "move.l %[ax],%%d1 \n" |
1090 | "lsr.l #1,%%d1 \n" | 1235 | "lsr.l #1,%%d1 \n" |
1091 | "addx.l %%d0,%%d0 \n" | 1236 | "addx.l %%d0,%%d0 \n" |
1092 | "move.l %%d1,%[ax] \n" | 1237 | "move.l %%d1,%[ax] \n" |
1093 | 1238 | ||
1094 | "move.b (%[addr]),%%d1 \n" /* read old value */ | 1239 | "move.b (%[addr]),%%d1 \n" /* read old value */ |
1095 | "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ | 1240 | "and.l %[mask],%%d1 \n" /* mask out replaced bits */ |
1096 | "or.l %%d0,%%d1 \n" /* set new bits */ | 1241 | "or.l %%d0,%%d1 \n" /* set new bits */ |
1097 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ | 1242 | "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ |
1098 | 1243 | ||
1099 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1244 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1100 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1245 | "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */ |
1101 | "bhi.b .ur_floop \n" /* no: loop */ | 1246 | "bhi.b .ur_floop \n" |
1102 | 1247 | ||
1103 | "bra.b .ur_end \n" | 1248 | "bra.b .ur_end \n" |
1104 | 1249 | ||
1105 | ".ur_sstart: \n" | 1250 | ".ur_sstart: \n" |
1106 | "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ | 1251 | "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */ |
1107 | 1252 | ||
1108 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ | 1253 | ".ur_sloop: \n" /** short loop (nothing to keep) **/ |
1109 | "clr.l %%d0 \n" | 1254 | "clr.l %%d0 \n" |
1110 | "lsr.l #1,%%d2 \n" /* shift out mask bit */ | 1255 | "lsr.l #1,%%d2 \n" /* shift out pattern bit */ |
1111 | "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ | 1256 | "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */ |
1112 | "lsr.l #1,%%d3 \n" | 1257 | "lsr.l #1,%%d3 \n" |
1113 | "addx.l %%d0,%%d0 \n" | 1258 | "addx.l %%d0,%%d0 \n" |
1114 | "lsr.l #1,%%d4 \n" | 1259 | "lsr.l #1,%%d4 \n" |
1115 | "addx.l %%d0,%%d0 \n" | 1260 | "addx.l %%d0,%%d0 \n" |
1116 | "lsr.l #1,%%d5 \n" | 1261 | "lsr.l #1,%%d5 \n" |
1117 | "addx.l %%d0,%%d0 \n" | 1262 | "addx.l %%d0,%%d0 \n" |
1118 | "lsr.l #1,%%d6 \n" | 1263 | "lsr.l #1,%%d6 \n" |
1119 | "addx.l %%d0,%%d0 \n" | 1264 | "addx.l %%d0,%%d0 \n" |
1120 | "lsr.l #1,%[mask] \n" | 1265 | "lsr.l #1,%[mask] \n" |
1121 | "addx.l %%d0,%%d0 \n" | 1266 | "addx.l %%d0,%%d0 \n" |
1122 | "move.l %%a1,%%d1 \n" | 1267 | "move.l %%a1,%%d1 \n" |
1123 | "lsr.l #1,%%d1 \n" | 1268 | "lsr.l #1,%%d1 \n" |
1124 | "addx.l %%d0,%%d0 \n" | 1269 | "addx.l %%d0,%%d0 \n" |
1125 | "move.l %%d1,%%a1 \n" | 1270 | "move.l %%d1,%%a1 \n" |
1126 | "move.l %[ax],%%d1 \n" | 1271 | "move.l %[ax],%%d1 \n" |
1127 | "lsr.l #1,%%d1 \n" | 1272 | "lsr.l #1,%%d1 \n" |
1128 | "addx.l %%d0,%%d0 \n" | 1273 | "addx.l %%d0,%%d0 \n" |
1129 | "move.l %%d1,%[ax] \n" | 1274 | "move.l %%d1,%[ax] \n" |
1130 | 1275 | ||
1131 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ | 1276 | "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ |
1132 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ | 1277 | "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ |
1133 | "cmp.l %[addr],%[end] \n" /* last bitplane done? */ | 1278 | "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */ |
1134 | "bhi.b .ur_sloop \n" /* no: loop */ | 1279 | "bhi.b .ur_sloop \n" |
1135 | 1280 | ||
1136 | ".ur_end: \n" | 1281 | ".ur_end: \n" |
1137 | : /* outputs */ | 1282 | : /* outputs */ |
1138 | [addr]"+a"(addr), | 1283 | [addr]"+a"(addr), |
1139 | [mask]"+d"(mask), | 1284 | [mask]"+d"(mask), |
@@ -1151,9 +1296,7 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1151 | (void)pat_ptr; | 1296 | (void)pat_ptr; |
1152 | /* check whether anything changed in the 8-pixel block */ | 1297 | /* check whether anything changed in the 8-pixel block */ |
1153 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | 1298 | change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; |
1154 | cbuf += sizeof(uint32_t); | 1299 | change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4); |
1155 | bbuf += sizeof(uint32_t); | ||
1156 | change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; | ||
1157 | 1300 | ||
1158 | if (change != 0) | 1301 | if (change != 0) |
1159 | { | 1302 | { |
@@ -1162,9 +1305,6 @@ void gray_update_rect(int x, int y, int width, int height) | |||
1162 | unsigned test = 1; | 1305 | unsigned test = 1; |
1163 | int i; | 1306 | int i; |
1164 | 1307 | ||
1165 | cbuf = _gray_info.cur_buffer + srcofs_row; | ||
1166 | bbuf = _gray_info.back_buffer + srcofs_row; | ||
1167 | |||
1168 | /* precalculate the bit patterns with random shifts | 1308 | /* precalculate the bit patterns with random shifts |
1169 | * for all 8 pixels and put them on an extra "stack" */ | 1309 | * for all 8 pixels and put them on an extra "stack" */ |
1170 | for (i = 0; i < 8; i++) | 1310 | for (i = 0; i < 8; i++) |