summaryrefslogtreecommitdiff
path: root/apps/plugins/lib/gray_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins/lib/gray_core.c')
-rw-r--r--apps/plugins/lib/gray_core.c536
1 files changed, 338 insertions, 198 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c
index c253a7112e..c162349f76 100644
--- a/apps/plugins/lib/gray_core.c
+++ b/apps/plugins/lib/gray_core.c
@@ -648,14 +648,165 @@ void gray_update_rect(int x, int y, int width, int height)
648 cbuf = _gray_info.cur_buffer + srcofs_row; 648 cbuf = _gray_info.cur_buffer + srcofs_row;
649 bbuf = _gray_info.back_buffer + srcofs_row; 649 bbuf = _gray_info.back_buffer + srcofs_row;
650 650
651#if 0 /* CPU specific asm versions will go here */ 651#ifdef CPU_ARM
652 asm volatile (
653 "ldr r0, [%[cbuf]] \n"
654 "ldr r1, [%[bbuf]] \n"
655 "eor r1, r0, r1 \n"
656 "ldr r0, [%[cbuf], #4] \n"
657 "ldr %[chg], [%[bbuf], #4] \n"
658 "eor %[chg], r0, %[chg] \n"
659 "orr %[chg], %[chg], r1 \n"
660 : /* outputs */
661 [chg] "=&r"(change)
662 : /* inputs */
663 [cbuf]"r"(cbuf),
664 [bbuf]"r"(bbuf)
665 : /* clobbers */
666 "r0", "r1"
667 );
668
669 if (change != 0)
670 {
671 unsigned char *addr, *end;
672 unsigned mask, trash;
673
674 pat_ptr = &pat_stack[8];
675
676 /* precalculate the bit patterns with random shifts
677 * for all 8 pixels and put them on an extra "stack" */
678 asm volatile (
679 "mov r3, #8 \n" /* loop count */
680 "mov %[mask], #0 \n"
681
682 ".ur_pre_loop: \n"
683 "mov %[mask], %[mask], lsl #1 \n" /* shift mask */
684 "ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */
685 "ldrb r1, [%[bbuf]] \n" /* read back buffer */
686 "strb r0, [%[bbuf]], #1 \n" /* update back buffer */
687 "mov r2, #0 \n" /* preset for skipped pixel */
688 "cmp r0, r1 \n" /* no change? */
689 "beq .ur_skip \n" /* -> skip */
690
691 "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
692
693 "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
694 "add %[rnd], %[rnd], %[rnd], lsl #1 \n"
695 "add %[rnd], %[rnd], r0, lsl #3 \n"
696 "add %[rnd], %[rnd], #74 \n" /* add another 74 */
697 /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
698 "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
699
700 "cmp r1, %[dpth] \n" /* random >= depth ? */
701 "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
702
703 "mov r0, r2, lsl r1 \n" /** rotate pattern **/
704 "sub r1, %[dpth], r1 \n"
705 "orr r2, r0, r2, lsr r1 \n"
706
707 "orr %[mask], %[mask], #1 \n" /* set mask bit */
708
709 ".ur_skip: \n"
710 "str r2, [%[patp], #-4]! \n" /* push on pattern stack */
711
712 "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
713 "bne .ur_pre_loop \n"
714 : /* outputs */
715 [cbuf]"+r"(cbuf),
716 [bbuf]"+r"(bbuf),
717 [patp]"+r"(pat_ptr),
718 [rnd] "+r"(_gray_random_buffer),
719 [mask]"=&r"(mask)
720 : /* inputs */
721 [bpat]"r"(_gray_info.bitpattern),
722 [dpth]"r"(_gray_info.depth),
723 [rmsk]"r"(_gray_info.randmask)
724 : /* clobbers */
725 "r0", "r1", "r2", "r3"
726 );
727
728 addr = dst_row;
729 end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
730
731 /* set the bits for all 8 pixels in all bytes according to the
732 * precalculated patterns on the pattern stack */
733 asm volatile (
734 "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
735
736 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
737 "ands %[mask], %[mask], #0xff \n"
738 "beq .ur_sloop \n" /* short loop if nothing to keep */
739
740 ".ur_floop: \n" /** full loop (there are bits to keep)**/
741 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
742 "adc r0, r0, r0 \n" /* put bit into LSB for byte */
743 "movs r8, r8, lsr #1 \n"
744 "adc r0, r0, r0 \n"
745 "movs r7, r7, lsr #1 \n"
746 "adc r0, r0, r0 \n"
747 "movs r6, r6, lsr #1 \n"
748 "adc r0, r0, r0 \n"
749 "movs r5, r5, lsr #1 \n"
750 "adc r0, r0, r0 \n"
751 "movs r4, r4, lsr #1 \n"
752 "adc r0, r0, r0 \n"
753 "movs r3, r3, lsr #1 \n"
754 "adc r0, r0, r0 \n"
755 "movs r2, r2, lsr #1 \n"
756 "adc r0, r0, r0 \n"
757
758 "ldrb r1, [%[addr]] \n" /* read old value */
759 "and r1, r1, %[mask] \n" /* mask out replaced bits */
760 "orr r1, r1, r0 \n" /* set new bits */
761 "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
762
763 "cmp %[end], %[addr] \n" /* loop for all bitplanes */
764 "bne .ur_floop \n"
765
766 "b .ur_end \n"
767
768 ".ur_sloop: \n" /** short loop (nothing to keep) **/
769 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
770 "adc r0, r0, r0 \n" /* put bit into LSB for byte */
771 "movs r8, r8, lsr #1 \n"
772 "adc r0, r0, r0 \n"
773 "movs r7, r7, lsr #1 \n"
774 "adc r0, r0, r0 \n"
775 "movs r6, r6, lsr #1 \n"
776 "adc r0, r0, r0 \n"
777 "movs r5, r5, lsr #1 \n"
778 "adc r0, r0, r0 \n"
779 "movs r4, r4, lsr #1 \n"
780 "adc r0, r0, r0 \n"
781 "movs r3, r3, lsr #1 \n"
782 "adc r0, r0, r0 \n"
783 "movs r2, r2, lsr #1 \n"
784 "adc r0, r0, r0 \n"
785
786 "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
787
788 "cmp %[end], %[addr] \n" /* loop for all bitplanes */
789 "bne .ur_sloop \n"
790
791 ".ur_end: \n"
792 : /* outputs */
793 [addr]"+r"(addr),
794 [mask]"+r"(mask),
795 [rx] "=&r"(trash)
796 : /* inputs */
797 [psiz]"r"(_gray_info.plane_size),
798 [end] "r"(end),
799 [patp]"[rx]"(pat_ptr)
800 : /* clobbers */
801 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
802 );
803 }
652#else /* C version, for reference*/ 804#else /* C version, for reference*/
805#warning C version of gray_update_rect() used
653 (void)pat_ptr; 806 (void)pat_ptr;
654 /* check whether anything changed in the 8-pixel block */ 807 /* check whether anything changed in the 8-pixel block */
655 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; 808 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
656 cbuf += sizeof(uint32_t); 809 change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
657 bbuf += sizeof(uint32_t);
658 change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
659 810
660 if (change != 0) 811 if (change != 0)
661 { 812 {
@@ -664,9 +815,6 @@ void gray_update_rect(int x, int y, int width, int height)
664 unsigned test = 1; 815 unsigned test = 1;
665 int i; 816 int i;
666 817
667 cbuf = _gray_info.cur_buffer + srcofs_row;
668 bbuf = _gray_info.back_buffer + srcofs_row;
669
670 /* precalculate the bit patterns with random shifts 818 /* precalculate the bit patterns with random shifts
671 * for all 8 pixels and put them on an extra "stack" */ 819 * for all 8 pixels and put them on an extra "stack" */
672 for (i = 7; i >= 0; i--) 820 for (i = 7; i >= 0; i--)
@@ -711,7 +859,7 @@ void gray_update_rect(int x, int y, int width, int height)
711 859
712 for (i = 7; i >= 0; i--) 860 for (i = 7; i >= 0; i--)
713 data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); 861 data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
714 862
715 *addr = data; 863 *addr = data;
716 addr += _gray_info.plane_size; 864 addr += _gray_info.plane_size;
717 test <<= 1; 865 test <<= 1;
@@ -788,18 +936,18 @@ void gray_update_rect(int x, int y, int width, int height)
788 936
789#if CONFIG_CPU == SH7034 937#if CONFIG_CPU == SH7034
790 asm volatile ( 938 asm volatile (
791 "mov.l @%[cbuf]+,r1 \n" 939 "mov.l @%[cbuf],r1 \n"
792 "mov.l @%[bbuf]+,r2 \n" 940 "mov.l @%[bbuf],r2 \n"
793 "xor r1,r2 \n" 941 "xor r1,r2 \n"
794 "mov.l @%[cbuf],r1 \n" 942 "mov.l @(4,%[cbuf]),r1 \n"
795 "mov.l @%[bbuf],%[chg] \n" 943 "mov.l @(4,%[bbuf]),%[chg] \n"
796 "xor r1,%[chg] \n" 944 "xor r1,%[chg] \n"
797 "or r2,%[chg] \n" 945 "or r2,%[chg] \n"
798 : /* outputs */ 946 : /* outputs */
799 [cbuf]"+r"(cbuf),
800 [bbuf]"+r"(bbuf),
801 [chg] "=r"(change) 947 [chg] "=r"(change)
802 : /* inputs */ 948 : /* inputs */
949 [cbuf]"r"(cbuf),
950 [bbuf]"r"(bbuf)
803 : /* clobbers */ 951 : /* clobbers */
804 "r1", "r2" 952 "r1", "r2"
805 ); 953 );
@@ -810,13 +958,11 @@ void gray_update_rect(int x, int y, int width, int height)
810 unsigned mask, trash; 958 unsigned mask, trash;
811 959
812 pat_ptr = &pat_stack[8]; 960 pat_ptr = &pat_stack[8];
813 cbuf = _gray_info.cur_buffer + srcofs_row;
814 bbuf = _gray_info.back_buffer + srcofs_row;
815 961
816 /* precalculate the bit patterns with random shifts 962 /* precalculate the bit patterns with random shifts
817 * for all 8 pixels and put them on an extra "stack" */ 963 * for all 8 pixels and put them on an extra "stack" */
818 asm volatile ( 964 asm volatile (
819 "mov #8,r3 \n" /* loop count in r3: 8 pixels */ 965 "mov #8,r3 \n" /* loop count */
820 966
821 ".ur_pre_loop: \n" 967 ".ur_pre_loop: \n"
822 "mov.b @%[cbuf]+,r0\n" /* read current buffer */ 968 "mov.b @%[cbuf]+,r0\n" /* read current buffer */
@@ -860,10 +1006,11 @@ void gray_update_rect(int x, int y, int width, int height)
860 "rotcr %[mask] \n" /* get mask bit */ 1006 "rotcr %[mask] \n" /* get mask bit */
861 "mov.l r2,@-%[patp]\n" /* push on pattern stack */ 1007 "mov.l r2,@-%[patp]\n" /* push on pattern stack */
862 1008
863 "add #-1,r3 \n" /* decrease loop count */ 1009 "add #-1,r3 \n" /* loop 8 times (pixel block) */
864 "cmp/pl r3 \n" /* loop count > 0? */ 1010 "cmp/pl r3 \n"
865 "bt .ur_pre_loop\n" /* yes: loop */ 1011 "bt .ur_pre_loop\n"
866 "shlr8 %[mask] \n" 1012
1013 "shlr8 %[mask] \n" /* shift mask to low byte */
867 "shlr16 %[mask] \n" 1014 "shlr16 %[mask] \n"
868 : /* outputs */ 1015 : /* outputs */
869 [cbuf]"+r"(cbuf), 1016 [cbuf]"+r"(cbuf),
@@ -885,77 +1032,77 @@ void gray_update_rect(int x, int y, int width, int height)
885 /* set the bits for all 8 pixels in all bytes according to the 1032 /* set the bits for all 8 pixels in all bytes according to the
886 * precalculated patterns on the pattern stack */ 1033 * precalculated patterns on the pattern stack */
887 asm volatile ( 1034 asm volatile (
888 "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ 1035 "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
889 "mov.l @%[patp]+,r2\n" 1036 "mov.l @%[patp]+,r2 \n"
890 "mov.l @%[patp]+,r3\n" 1037 "mov.l @%[patp]+,r3 \n"
891 "mov.l @%[patp]+,r6\n" 1038 "mov.l @%[patp]+,r6 \n"
892 "mov.l @%[patp]+,r7\n" 1039 "mov.l @%[patp]+,r7 \n"
893 "mov.l @%[patp]+,r8\n" 1040 "mov.l @%[patp]+,r8 \n"
894 "mov.l @%[patp]+,r9\n" 1041 "mov.l @%[patp]+,r9 \n"
895 "mov.l @%[patp],r10\n" 1042 "mov.l @%[patp],r10 \n"
896 1043
897 "tst %[mask],%[mask] \n" /* nothing to keep? */ 1044 "tst %[mask],%[mask] \n"
898 "bt .ur_sloop \n" /* yes: jump to short loop */ 1045 "bt .ur_sloop \n" /* short loop if nothing to keep */
899 1046
900 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1047 ".ur_floop: \n" /** full loop (there are bits to keep)**/
901 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1048 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
902 "rotcl r0 \n" /* rotate t bit into r0 */ 1049 "rotcl r0 \n" /* rotate t bit into r0 */
903 "shlr r2 \n" 1050 "shlr r2 \n"
904 "rotcl r0 \n" 1051 "rotcl r0 \n"
905 "shlr r3 \n" 1052 "shlr r3 \n"
906 "rotcl r0 \n" 1053 "rotcl r0 \n"
907 "shlr r6 \n" 1054 "shlr r6 \n"
908 "rotcl r0 \n" 1055 "rotcl r0 \n"
909 "shlr r7 \n" 1056 "shlr r7 \n"
910 "rotcl r0 \n" 1057 "rotcl r0 \n"
911 "shlr r8 \n" 1058 "shlr r8 \n"
912 "rotcl r0 \n" 1059 "rotcl r0 \n"
913 "shlr r9 \n" 1060 "shlr r9 \n"
914 "rotcl r0 \n" 1061 "rotcl r0 \n"
915 "shlr r10 \n" 1062 "shlr r10 \n"
916 "mov.b @%[addr],%[rx] \n" /* read old value */ 1063 "mov.b @%[addr],%[rx] \n" /* read old value */
917 "rotcl r0 \n" 1064 "rotcl r0 \n"
918 "and %[mask],%[rx] \n" /* mask out unneeded bits */ 1065 "and %[mask],%[rx] \n" /* mask out replaced bits */
919 "or %[rx],r0 \n" /* set new bits */ 1066 "or %[rx],r0 \n" /* set new bits */
920 "mov.b r0,@%[addr] \n" /* store value to bitplane */ 1067 "mov.b r0,@%[addr] \n" /* store value to bitplane */
921 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1068 "add %[psiz],%[addr] \n" /* advance to next bitplane */
922 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1069 "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
923 "bt .ur_floop \n" /* no: loop */ 1070 "bt .ur_floop \n"
924 1071
925 "bra .ur_end \n" 1072 "bra .ur_end \n"
926 "nop \n" 1073 "nop \n"
927 1074
928 /* References to C library routines used in the precalc block */ 1075 /* References to C library routines used in the precalc block */
929 ".align 2 \n" 1076 ".align 2 \n"
930 ".ashlsi3: \n" /* C library routine: */ 1077 ".ashlsi3: \n" /* C library routine: */
931 ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ 1078 ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */
932 ".lshrsi3: \n" /* C library routine: */ 1079 ".lshrsi3: \n" /* C library routine: */
933 ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ 1080 ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */
934 /* both routines preserve r4, destroy r5 and take ~16 cycles */ 1081 /* both routines preserve r4, destroy r5 and take ~16 cycles */
935 1082
936 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1083 ".ur_sloop: \n" /** short loop (nothing to keep) **/
937 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1084 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
938 "rotcl r0 \n" /* rotate t bit into r0 */ 1085 "rotcl r0 \n" /* rotate t bit into r0 */
939 "shlr r2 \n" 1086 "shlr r2 \n"
940 "rotcl r0 \n" 1087 "rotcl r0 \n"
941 "shlr r3 \n" 1088 "shlr r3 \n"
942 "rotcl r0 \n" 1089 "rotcl r0 \n"
943 "shlr r6 \n" 1090 "shlr r6 \n"
944 "rotcl r0 \n" 1091 "rotcl r0 \n"
945 "shlr r7 \n" 1092 "shlr r7 \n"
946 "rotcl r0 \n" 1093 "rotcl r0 \n"
947 "shlr r8 \n" 1094 "shlr r8 \n"
948 "rotcl r0 \n" 1095 "rotcl r0 \n"
949 "shlr r9 \n" 1096 "shlr r9 \n"
950 "rotcl r0 \n" 1097 "rotcl r0 \n"
951 "shlr r10 \n" 1098 "shlr r10 \n"
952 "rotcl r0 \n" 1099 "rotcl r0 \n"
953 "mov.b r0,@%[addr] \n" /* store byte to bitplane */ 1100 "mov.b r0,@%[addr] \n" /* store byte to bitplane */
954 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1101 "add %[psiz],%[addr] \n" /* advance to next bitplane */
955 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1102 "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
956 "bt .ur_sloop \n" /* no: loop */ 1103 "bt .ur_sloop \n"
957 1104
958 ".ur_end: \n" 1105 ".ur_end: \n"
959 : /* outputs */ 1106 : /* outputs */
960 [addr]"+r"(addr), 1107 [addr]"+r"(addr),
961 [mask]"+r"(mask), 1108 [mask]"+r"(mask),
@@ -970,18 +1117,18 @@ void gray_update_rect(int x, int y, int width, int height)
970 } 1117 }
971#elif defined(CPU_COLDFIRE) 1118#elif defined(CPU_COLDFIRE)
972 asm volatile ( 1119 asm volatile (
973 "move.l (%[cbuf])+,%%d0 \n" 1120 "move.l (%[cbuf]),%%d0 \n"
974 "move.l (%[bbuf])+,%%d1 \n" 1121 "move.l (%[bbuf]),%%d1 \n"
975 "eor.l %%d0,%%d1 \n" 1122 "eor.l %%d0,%%d1 \n"
976 "move.l (%[cbuf]),%%d0 \n" 1123 "move.l (4,%[cbuf]),%%d0 \n"
977 "move.l (%[bbuf]),%[chg]\n" 1124 "move.l (4,%[bbuf]),%[chg] \n"
978 "eor.l %%d0,%[chg] \n" 1125 "eor.l %%d0,%[chg] \n"
979 "or.l %%d1,%[chg] \n" 1126 "or.l %%d1,%[chg] \n"
980 : /* outputs */ 1127 : /* outputs */
981 [cbuf]"+a"(cbuf),
982 [bbuf]"+a"(bbuf),
983 [chg] "=&d"(change) 1128 [chg] "=&d"(change)
984 : /* inputs */ 1129 : /* inputs */
1130 [cbuf]"a"(cbuf),
1131 [bbuf]"a"(bbuf)
985 : /* clobbers */ 1132 : /* clobbers */
986 "d0", "d1" 1133 "d0", "d1"
987 ); 1134 );
@@ -992,54 +1139,52 @@ void gray_update_rect(int x, int y, int width, int height)
992 unsigned mask, trash; 1139 unsigned mask, trash;
993 1140
994 pat_ptr = &pat_stack[8]; 1141 pat_ptr = &pat_stack[8];
995 cbuf = _gray_info.cur_buffer + srcofs_row;
996 bbuf = _gray_info.back_buffer + srcofs_row;
997 1142
998 /* precalculate the bit patterns with random shifts 1143 /* precalculate the bit patterns with random shifts
999 * for all 8 pixels and put them on an extra "stack" */ 1144 * for all 8 pixels and put them on an extra "stack" */
1000 asm volatile ( 1145 asm volatile (
1001 "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ 1146 "moveq.l #8,%%d3 \n" /* loop count */
1002 "clr.l %[mask] \n" 1147 "clr.l %[mask] \n"
1003 1148
1004 ".ur_pre_loop: \n" 1149 ".ur_pre_loop: \n"
1005 "clr.l %%d0 \n" 1150 "clr.l %%d0 \n"
1006 "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ 1151 "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */
1007 "clr.l %%d1 \n" 1152 "clr.l %%d1 \n"
1008 "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ 1153 "move.b (%[bbuf]),%%d1 \n" /* read back buffer */
1009 "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ 1154 "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */
1010 "clr.l %%d2 \n" /* preset for skipped pixel */ 1155 "clr.l %%d2 \n" /* preset for skipped pixel */
1011 "cmp.l %%d0,%%d1 \n" /* no change? */ 1156 "cmp.l %%d0,%%d1 \n" /* no change? */
1012 "beq.b .ur_skip \n" /* -> skip */ 1157 "beq.b .ur_skip \n" /* -> skip */
1013 1158
1014 "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ 1159 "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */
1015 1160
1016 "mulu.w #75,%[rnd] \n" /* multiply by 75 */ 1161 "mulu.w #75,%[rnd] \n" /* multiply by 75 */
1017 "add.l #74,%[rnd] \n" /* add another 74 */ 1162 "add.l #74,%[rnd] \n" /* add another 74 */
1018 /* Since the lower bits are not very random: */ 1163 /* Since the lower bits are not very random: */
1019 "move.l %[rnd],%%d1 \n" 1164 "move.l %[rnd],%%d1 \n"
1020 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ 1165 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
1021 "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ 1166 "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
1022 1167
1023 "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ 1168 "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
1024 "blo.b .ur_ntrim \n" 1169 "blo.b .ur_ntrim \n"
1025 "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ 1170 "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
1026 ".ur_ntrim: \n" 1171 ".ur_ntrim: \n"
1027 1172
1028 "move.l %%d2,%%d0 \n" 1173 "move.l %%d2,%%d0 \n" /** rotate pattern **/
1029 "lsl.l %%d1,%%d0 \n" 1174 "lsl.l %%d1,%%d0 \n"
1030 "sub.l %[dpth],%%d1\n" 1175 "sub.l %[dpth],%%d1 \n"
1031 "neg.l %%d1 \n" /* d1 = depth - d1 */ 1176 "neg.l %%d1 \n" /* d1 = depth - d1 */
1032 "lsr.l %%d1,%%d2 \n" 1177 "lsr.l %%d1,%%d2 \n"
1033 "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ 1178 "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */
1034 1179
1035 "or.l #0x0100,%[mask] \n" /* set mask bit */ 1180 "or.l #0x0100,%[mask] \n" /* set mask bit */
1036 1181
1037 ".ur_skip: \n" 1182 ".ur_skip: \n"
1038 "lsr.l #1,%[mask] \n" /* shift mask */ 1183 "lsr.l #1,%[mask] \n" /* shift mask */
1039 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ 1184 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
1040 1185
1041 "subq.l #1,%%d3 \n" /* decrease loop count */ 1186 "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
1042 "bne.b .ur_pre_loop\n" /* yes: loop */ 1187 "bne.b .ur_pre_loop \n"
1043 : /* outputs */ 1188 : /* outputs */
1044 [cbuf]"+a"(cbuf), 1189 [cbuf]"+a"(cbuf),
1045 [bbuf]"+a"(bbuf), 1190 [bbuf]"+a"(bbuf),
@@ -1061,79 +1206,79 @@ void gray_update_rect(int x, int y, int width, int height)
1061 * precalculated patterns on the pattern stack */ 1206 * precalculated patterns on the pattern stack */
1062 asm volatile ( 1207 asm volatile (
1063 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" 1208 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
1064 /* pop all 8 patterns */ 1209 /* pop all 8 patterns */
1065 "not.l %[mask] \n" /* set mask -> keep mask */ 1210 "not.l %[mask] \n" /* "set" mask -> "keep" mask */
1066 "and.l #0xFF,%[mask] \n" 1211 "and.l #0xFF,%[mask] \n"
1067 "beq.b .ur_sstart \n" /* yes: jump to short loop */ 1212 "beq.b .ur_sstart \n" /* short loop if nothing to keep */
1068 1213
1069 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1214 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1070 "clr.l %%d0 \n" 1215 "clr.l %%d0 \n"
1071 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1216 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1072 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1217 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1073 "lsr.l #1,%%d3 \n" 1218 "lsr.l #1,%%d3 \n"
1074 "addx.l %%d0,%%d0 \n" 1219 "addx.l %%d0,%%d0 \n"
1075 "lsr.l #1,%%d4 \n" 1220 "lsr.l #1,%%d4 \n"
1076 "addx.l %%d0,%%d0 \n" 1221 "addx.l %%d0,%%d0 \n"
1077 "lsr.l #1,%%d5 \n" 1222 "lsr.l #1,%%d5 \n"
1078 "addx.l %%d0,%%d0 \n" 1223 "addx.l %%d0,%%d0 \n"
1079 "lsr.l #1,%%d6 \n" 1224 "lsr.l #1,%%d6 \n"
1080 "addx.l %%d0,%%d0 \n" 1225 "addx.l %%d0,%%d0 \n"
1081 "move.l %%a0,%%d1 \n" 1226 "move.l %%a0,%%d1 \n"
1082 "lsr.l #1,%%d1 \n" 1227 "lsr.l #1,%%d1 \n"
1083 "addx.l %%d0,%%d0 \n" 1228 "addx.l %%d0,%%d0 \n"
1084 "move.l %%d1,%%a0 \n" 1229 "move.l %%d1,%%a0 \n"
1085 "move.l %%a1,%%d1 \n" 1230 "move.l %%a1,%%d1 \n"
1086 "lsr.l #1,%%d1 \n" 1231 "lsr.l #1,%%d1 \n"
1087 "addx.l %%d0,%%d0 \n" 1232 "addx.l %%d0,%%d0 \n"
1088 "move.l %%d1,%%a1 \n" 1233 "move.l %%d1,%%a1 \n"
1089 "move.l %[ax],%%d1 \n" 1234 "move.l %[ax],%%d1 \n"
1090 "lsr.l #1,%%d1 \n" 1235 "lsr.l #1,%%d1 \n"
1091 "addx.l %%d0,%%d0 \n" 1236 "addx.l %%d0,%%d0 \n"
1092 "move.l %%d1,%[ax] \n" 1237 "move.l %%d1,%[ax] \n"
1093 1238
1094 "move.b (%[addr]),%%d1 \n" /* read old value */ 1239 "move.b (%[addr]),%%d1 \n" /* read old value */
1095 "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ 1240 "and.l %[mask],%%d1 \n" /* mask out replaced bits */
1096 "or.l %%d0,%%d1 \n" /* set new bits */ 1241 "or.l %%d0,%%d1 \n" /* set new bits */
1097 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ 1242 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */
1098 1243
1099 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1244 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1100 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1245 "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
1101 "bhi.b .ur_floop \n" /* no: loop */ 1246 "bhi.b .ur_floop \n"
1102 1247
1103 "bra.b .ur_end \n" 1248 "bra.b .ur_end \n"
1104 1249
1105 ".ur_sstart: \n" 1250 ".ur_sstart: \n"
1106 "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ 1251 "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
1107 1252
1108 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1253 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1109 "clr.l %%d0 \n" 1254 "clr.l %%d0 \n"
1110 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1255 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1111 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1256 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1112 "lsr.l #1,%%d3 \n" 1257 "lsr.l #1,%%d3 \n"
1113 "addx.l %%d0,%%d0 \n" 1258 "addx.l %%d0,%%d0 \n"
1114 "lsr.l #1,%%d4 \n" 1259 "lsr.l #1,%%d4 \n"
1115 "addx.l %%d0,%%d0 \n" 1260 "addx.l %%d0,%%d0 \n"
1116 "lsr.l #1,%%d5 \n" 1261 "lsr.l #1,%%d5 \n"
1117 "addx.l %%d0,%%d0 \n" 1262 "addx.l %%d0,%%d0 \n"
1118 "lsr.l #1,%%d6 \n" 1263 "lsr.l #1,%%d6 \n"
1119 "addx.l %%d0,%%d0 \n" 1264 "addx.l %%d0,%%d0 \n"
1120 "lsr.l #1,%[mask] \n" 1265 "lsr.l #1,%[mask] \n"
1121 "addx.l %%d0,%%d0 \n" 1266 "addx.l %%d0,%%d0 \n"
1122 "move.l %%a1,%%d1 \n" 1267 "move.l %%a1,%%d1 \n"
1123 "lsr.l #1,%%d1 \n" 1268 "lsr.l #1,%%d1 \n"
1124 "addx.l %%d0,%%d0 \n" 1269 "addx.l %%d0,%%d0 \n"
1125 "move.l %%d1,%%a1 \n" 1270 "move.l %%d1,%%a1 \n"
1126 "move.l %[ax],%%d1 \n" 1271 "move.l %[ax],%%d1 \n"
1127 "lsr.l #1,%%d1 \n" 1272 "lsr.l #1,%%d1 \n"
1128 "addx.l %%d0,%%d0 \n" 1273 "addx.l %%d0,%%d0 \n"
1129 "move.l %%d1,%[ax] \n" 1274 "move.l %%d1,%[ax] \n"
1130 1275
1131 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ 1276 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
1132 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1277 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1133 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1278 "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
1134 "bhi.b .ur_sloop \n" /* no: loop */ 1279 "bhi.b .ur_sloop \n"
1135 1280
1136 ".ur_end: \n" 1281 ".ur_end: \n"
1137 : /* outputs */ 1282 : /* outputs */
1138 [addr]"+a"(addr), 1283 [addr]"+a"(addr),
1139 [mask]"+d"(mask), 1284 [mask]"+d"(mask),
@@ -1151,9 +1296,7 @@ void gray_update_rect(int x, int y, int width, int height)
1151 (void)pat_ptr; 1296 (void)pat_ptr;
1152 /* check whether anything changed in the 8-pixel block */ 1297 /* check whether anything changed in the 8-pixel block */
1153 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; 1298 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
1154 cbuf += sizeof(uint32_t); 1299 change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
1155 bbuf += sizeof(uint32_t);
1156 change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
1157 1300
1158 if (change != 0) 1301 if (change != 0)
1159 { 1302 {
@@ -1162,9 +1305,6 @@ void gray_update_rect(int x, int y, int width, int height)
1162 unsigned test = 1; 1305 unsigned test = 1;
1163 int i; 1306 int i;
1164 1307
1165 cbuf = _gray_info.cur_buffer + srcofs_row;
1166 bbuf = _gray_info.back_buffer + srcofs_row;
1167
1168 /* precalculate the bit patterns with random shifts 1308 /* precalculate the bit patterns with random shifts
1169 * for all 8 pixels and put them on an extra "stack" */ 1309 * for all 8 pixels and put them on an extra "stack" */
1170 for (i = 0; i < 8; i++) 1310 for (i = 0; i < 8; i++)