summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/plugins/lib/gray_core.c536
-rw-r--r--apps/plugins/lib/gray_draw.c496
-rw-r--r--apps/plugins/lib/gray_scroll.c83
3 files changed, 692 insertions, 423 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c
index c253a7112e..c162349f76 100644
--- a/apps/plugins/lib/gray_core.c
+++ b/apps/plugins/lib/gray_core.c
@@ -648,14 +648,165 @@ void gray_update_rect(int x, int y, int width, int height)
648 cbuf = _gray_info.cur_buffer + srcofs_row; 648 cbuf = _gray_info.cur_buffer + srcofs_row;
649 bbuf = _gray_info.back_buffer + srcofs_row; 649 bbuf = _gray_info.back_buffer + srcofs_row;
650 650
651#if 0 /* CPU specific asm versions will go here */ 651#ifdef CPU_ARM
652 asm volatile (
653 "ldr r0, [%[cbuf]] \n"
654 "ldr r1, [%[bbuf]] \n"
655 "eor r1, r0, r1 \n"
656 "ldr r0, [%[cbuf], #4] \n"
657 "ldr %[chg], [%[bbuf], #4] \n"
658 "eor %[chg], r0, %[chg] \n"
659 "orr %[chg], %[chg], r1 \n"
660 : /* outputs */
661 [chg] "=&r"(change)
662 : /* inputs */
663 [cbuf]"r"(cbuf),
664 [bbuf]"r"(bbuf)
665 : /* clobbers */
666 "r0", "r1"
667 );
668
669 if (change != 0)
670 {
671 unsigned char *addr, *end;
672 unsigned mask, trash;
673
674 pat_ptr = &pat_stack[8];
675
676 /* precalculate the bit patterns with random shifts
677 * for all 8 pixels and put them on an extra "stack" */
678 asm volatile (
679 "mov r3, #8 \n" /* loop count */
680 "mov %[mask], #0 \n"
681
682 ".ur_pre_loop: \n"
683 "mov %[mask], %[mask], lsl #1 \n" /* shift mask */
684 "ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */
685 "ldrb r1, [%[bbuf]] \n" /* read back buffer */
686 "strb r0, [%[bbuf]], #1 \n" /* update back buffer */
687 "mov r2, #0 \n" /* preset for skipped pixel */
688 "cmp r0, r1 \n" /* no change? */
689 "beq .ur_skip \n" /* -> skip */
690
691 "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
692
693 "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
694 "add %[rnd], %[rnd], %[rnd], lsl #1 \n"
695 "add %[rnd], %[rnd], r0, lsl #3 \n"
696 "add %[rnd], %[rnd], #74 \n" /* add another 74 */
697 /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
698 "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
699
700 "cmp r1, %[dpth] \n" /* random >= depth ? */
701 "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
702
703 "mov r0, r2, lsl r1 \n" /** rotate pattern **/
704 "sub r1, %[dpth], r1 \n"
705 "orr r2, r0, r2, lsr r1 \n"
706
707 "orr %[mask], %[mask], #1 \n" /* set mask bit */
708
709 ".ur_skip: \n"
710 "str r2, [%[patp], #-4]! \n" /* push on pattern stack */
711
712 "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
713 "bne .ur_pre_loop \n"
714 : /* outputs */
715 [cbuf]"+r"(cbuf),
716 [bbuf]"+r"(bbuf),
717 [patp]"+r"(pat_ptr),
718 [rnd] "+r"(_gray_random_buffer),
719 [mask]"=&r"(mask)
720 : /* inputs */
721 [bpat]"r"(_gray_info.bitpattern),
722 [dpth]"r"(_gray_info.depth),
723 [rmsk]"r"(_gray_info.randmask)
724 : /* clobbers */
725 "r0", "r1", "r2", "r3"
726 );
727
728 addr = dst_row;
729 end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
730
731 /* set the bits for all 8 pixels in all bytes according to the
732 * precalculated patterns on the pattern stack */
733 asm volatile (
734 "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
735
736 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
737 "ands %[mask], %[mask], #0xff \n"
738 "beq .ur_sloop \n" /* short loop if nothing to keep */
739
740 ".ur_floop: \n" /** full loop (there are bits to keep)**/
741 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
742 "adc r0, r0, r0 \n" /* put bit into LSB for byte */
743 "movs r8, r8, lsr #1 \n"
744 "adc r0, r0, r0 \n"
745 "movs r7, r7, lsr #1 \n"
746 "adc r0, r0, r0 \n"
747 "movs r6, r6, lsr #1 \n"
748 "adc r0, r0, r0 \n"
749 "movs r5, r5, lsr #1 \n"
750 "adc r0, r0, r0 \n"
751 "movs r4, r4, lsr #1 \n"
752 "adc r0, r0, r0 \n"
753 "movs r3, r3, lsr #1 \n"
754 "adc r0, r0, r0 \n"
755 "movs r2, r2, lsr #1 \n"
756 "adc r0, r0, r0 \n"
757
758 "ldrb r1, [%[addr]] \n" /* read old value */
759 "and r1, r1, %[mask] \n" /* mask out replaced bits */
760 "orr r1, r1, r0 \n" /* set new bits */
761 "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
762
763 "cmp %[end], %[addr] \n" /* loop for all bitplanes */
764 "bne .ur_floop \n"
765
766 "b .ur_end \n"
767
768 ".ur_sloop: \n" /** short loop (nothing to keep) **/
769 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
770 "adc r0, r0, r0 \n" /* put bit into LSB for byte */
771 "movs r8, r8, lsr #1 \n"
772 "adc r0, r0, r0 \n"
773 "movs r7, r7, lsr #1 \n"
774 "adc r0, r0, r0 \n"
775 "movs r6, r6, lsr #1 \n"
776 "adc r0, r0, r0 \n"
777 "movs r5, r5, lsr #1 \n"
778 "adc r0, r0, r0 \n"
779 "movs r4, r4, lsr #1 \n"
780 "adc r0, r0, r0 \n"
781 "movs r3, r3, lsr #1 \n"
782 "adc r0, r0, r0 \n"
783 "movs r2, r2, lsr #1 \n"
784 "adc r0, r0, r0 \n"
785
786 "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
787
788 "cmp %[end], %[addr] \n" /* loop for all bitplanes */
789 "bne .ur_sloop \n"
790
791 ".ur_end: \n"
792 : /* outputs */
793 [addr]"+r"(addr),
794 [mask]"+r"(mask),
795 [rx] "=&r"(trash)
796 : /* inputs */
797 [psiz]"r"(_gray_info.plane_size),
798 [end] "r"(end),
799 [patp]"[rx]"(pat_ptr)
800 : /* clobbers */
801 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
802 );
803 }
652#else /* C version, for reference*/ 804#else /* C version, for reference*/
805#warning C version of gray_update_rect() used
653 (void)pat_ptr; 806 (void)pat_ptr;
654 /* check whether anything changed in the 8-pixel block */ 807 /* check whether anything changed in the 8-pixel block */
655 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; 808 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
656 cbuf += sizeof(uint32_t); 809 change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
657 bbuf += sizeof(uint32_t);
658 change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
659 810
660 if (change != 0) 811 if (change != 0)
661 { 812 {
@@ -664,9 +815,6 @@ void gray_update_rect(int x, int y, int width, int height)
664 unsigned test = 1; 815 unsigned test = 1;
665 int i; 816 int i;
666 817
667 cbuf = _gray_info.cur_buffer + srcofs_row;
668 bbuf = _gray_info.back_buffer + srcofs_row;
669
670 /* precalculate the bit patterns with random shifts 818 /* precalculate the bit patterns with random shifts
671 * for all 8 pixels and put them on an extra "stack" */ 819 * for all 8 pixels and put them on an extra "stack" */
672 for (i = 7; i >= 0; i--) 820 for (i = 7; i >= 0; i--)
@@ -711,7 +859,7 @@ void gray_update_rect(int x, int y, int width, int height)
711 859
712 for (i = 7; i >= 0; i--) 860 for (i = 7; i >= 0; i--)
713 data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0); 861 data = (data << 1) | ((pat_stack[i] & test) ? 1 : 0);
714 862
715 *addr = data; 863 *addr = data;
716 addr += _gray_info.plane_size; 864 addr += _gray_info.plane_size;
717 test <<= 1; 865 test <<= 1;
@@ -788,18 +936,18 @@ void gray_update_rect(int x, int y, int width, int height)
788 936
789#if CONFIG_CPU == SH7034 937#if CONFIG_CPU == SH7034
790 asm volatile ( 938 asm volatile (
791 "mov.l @%[cbuf]+,r1 \n" 939 "mov.l @%[cbuf],r1 \n"
792 "mov.l @%[bbuf]+,r2 \n" 940 "mov.l @%[bbuf],r2 \n"
793 "xor r1,r2 \n" 941 "xor r1,r2 \n"
794 "mov.l @%[cbuf],r1 \n" 942 "mov.l @(4,%[cbuf]),r1 \n"
795 "mov.l @%[bbuf],%[chg] \n" 943 "mov.l @(4,%[bbuf]),%[chg] \n"
796 "xor r1,%[chg] \n" 944 "xor r1,%[chg] \n"
797 "or r2,%[chg] \n" 945 "or r2,%[chg] \n"
798 : /* outputs */ 946 : /* outputs */
799 [cbuf]"+r"(cbuf),
800 [bbuf]"+r"(bbuf),
801 [chg] "=r"(change) 947 [chg] "=r"(change)
802 : /* inputs */ 948 : /* inputs */
949 [cbuf]"r"(cbuf),
950 [bbuf]"r"(bbuf)
803 : /* clobbers */ 951 : /* clobbers */
804 "r1", "r2" 952 "r1", "r2"
805 ); 953 );
@@ -810,13 +958,11 @@ void gray_update_rect(int x, int y, int width, int height)
810 unsigned mask, trash; 958 unsigned mask, trash;
811 959
812 pat_ptr = &pat_stack[8]; 960 pat_ptr = &pat_stack[8];
813 cbuf = _gray_info.cur_buffer + srcofs_row;
814 bbuf = _gray_info.back_buffer + srcofs_row;
815 961
816 /* precalculate the bit patterns with random shifts 962 /* precalculate the bit patterns with random shifts
817 * for all 8 pixels and put them on an extra "stack" */ 963 * for all 8 pixels and put them on an extra "stack" */
818 asm volatile ( 964 asm volatile (
819 "mov #8,r3 \n" /* loop count in r3: 8 pixels */ 965 "mov #8,r3 \n" /* loop count */
820 966
821 ".ur_pre_loop: \n" 967 ".ur_pre_loop: \n"
822 "mov.b @%[cbuf]+,r0\n" /* read current buffer */ 968 "mov.b @%[cbuf]+,r0\n" /* read current buffer */
@@ -860,10 +1006,11 @@ void gray_update_rect(int x, int y, int width, int height)
860 "rotcr %[mask] \n" /* get mask bit */ 1006 "rotcr %[mask] \n" /* get mask bit */
861 "mov.l r2,@-%[patp]\n" /* push on pattern stack */ 1007 "mov.l r2,@-%[patp]\n" /* push on pattern stack */
862 1008
863 "add #-1,r3 \n" /* decrease loop count */ 1009 "add #-1,r3 \n" /* loop 8 times (pixel block) */
864 "cmp/pl r3 \n" /* loop count > 0? */ 1010 "cmp/pl r3 \n"
865 "bt .ur_pre_loop\n" /* yes: loop */ 1011 "bt .ur_pre_loop\n"
866 "shlr8 %[mask] \n" 1012
1013 "shlr8 %[mask] \n" /* shift mask to low byte */
867 "shlr16 %[mask] \n" 1014 "shlr16 %[mask] \n"
868 : /* outputs */ 1015 : /* outputs */
869 [cbuf]"+r"(cbuf), 1016 [cbuf]"+r"(cbuf),
@@ -885,77 +1032,77 @@ void gray_update_rect(int x, int y, int width, int height)
885 /* set the bits for all 8 pixels in all bytes according to the 1032 /* set the bits for all 8 pixels in all bytes according to the
886 * precalculated patterns on the pattern stack */ 1033 * precalculated patterns on the pattern stack */
887 asm volatile ( 1034 asm volatile (
888 "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ 1035 "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
889 "mov.l @%[patp]+,r2\n" 1036 "mov.l @%[patp]+,r2 \n"
890 "mov.l @%[patp]+,r3\n" 1037 "mov.l @%[patp]+,r3 \n"
891 "mov.l @%[patp]+,r6\n" 1038 "mov.l @%[patp]+,r6 \n"
892 "mov.l @%[patp]+,r7\n" 1039 "mov.l @%[patp]+,r7 \n"
893 "mov.l @%[patp]+,r8\n" 1040 "mov.l @%[patp]+,r8 \n"
894 "mov.l @%[patp]+,r9\n" 1041 "mov.l @%[patp]+,r9 \n"
895 "mov.l @%[patp],r10\n" 1042 "mov.l @%[patp],r10 \n"
896 1043
897 "tst %[mask],%[mask] \n" /* nothing to keep? */ 1044 "tst %[mask],%[mask] \n"
898 "bt .ur_sloop \n" /* yes: jump to short loop */ 1045 "bt .ur_sloop \n" /* short loop if nothing to keep */
899 1046
900 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1047 ".ur_floop: \n" /** full loop (there are bits to keep)**/
901 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1048 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
902 "rotcl r0 \n" /* rotate t bit into r0 */ 1049 "rotcl r0 \n" /* rotate t bit into r0 */
903 "shlr r2 \n" 1050 "shlr r2 \n"
904 "rotcl r0 \n" 1051 "rotcl r0 \n"
905 "shlr r3 \n" 1052 "shlr r3 \n"
906 "rotcl r0 \n" 1053 "rotcl r0 \n"
907 "shlr r6 \n" 1054 "shlr r6 \n"
908 "rotcl r0 \n" 1055 "rotcl r0 \n"
909 "shlr r7 \n" 1056 "shlr r7 \n"
910 "rotcl r0 \n" 1057 "rotcl r0 \n"
911 "shlr r8 \n" 1058 "shlr r8 \n"
912 "rotcl r0 \n" 1059 "rotcl r0 \n"
913 "shlr r9 \n" 1060 "shlr r9 \n"
914 "rotcl r0 \n" 1061 "rotcl r0 \n"
915 "shlr r10 \n" 1062 "shlr r10 \n"
916 "mov.b @%[addr],%[rx] \n" /* read old value */ 1063 "mov.b @%[addr],%[rx] \n" /* read old value */
917 "rotcl r0 \n" 1064 "rotcl r0 \n"
918 "and %[mask],%[rx] \n" /* mask out unneeded bits */ 1065 "and %[mask],%[rx] \n" /* mask out replaced bits */
919 "or %[rx],r0 \n" /* set new bits */ 1066 "or %[rx],r0 \n" /* set new bits */
920 "mov.b r0,@%[addr] \n" /* store value to bitplane */ 1067 "mov.b r0,@%[addr] \n" /* store value to bitplane */
921 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1068 "add %[psiz],%[addr] \n" /* advance to next bitplane */
922 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1069 "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
923 "bt .ur_floop \n" /* no: loop */ 1070 "bt .ur_floop \n"
924 1071
925 "bra .ur_end \n" 1072 "bra .ur_end \n"
926 "nop \n" 1073 "nop \n"
927 1074
928 /* References to C library routines used in the precalc block */ 1075 /* References to C library routines used in the precalc block */
929 ".align 2 \n" 1076 ".align 2 \n"
930 ".ashlsi3: \n" /* C library routine: */ 1077 ".ashlsi3: \n" /* C library routine: */
931 ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */ 1078 ".long ___ashlsi3 \n" /* shift r4 left by r5, res. in r0 */
932 ".lshrsi3: \n" /* C library routine: */ 1079 ".lshrsi3: \n" /* C library routine: */
933 ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */ 1080 ".long ___lshrsi3 \n" /* shift r4 right by r5, res. in r0 */
934 /* both routines preserve r4, destroy r5 and take ~16 cycles */ 1081 /* both routines preserve r4, destroy r5 and take ~16 cycles */
935 1082
936 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1083 ".ur_sloop: \n" /** short loop (nothing to keep) **/
937 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1084 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
938 "rotcl r0 \n" /* rotate t bit into r0 */ 1085 "rotcl r0 \n" /* rotate t bit into r0 */
939 "shlr r2 \n" 1086 "shlr r2 \n"
940 "rotcl r0 \n" 1087 "rotcl r0 \n"
941 "shlr r3 \n" 1088 "shlr r3 \n"
942 "rotcl r0 \n" 1089 "rotcl r0 \n"
943 "shlr r6 \n" 1090 "shlr r6 \n"
944 "rotcl r0 \n" 1091 "rotcl r0 \n"
945 "shlr r7 \n" 1092 "shlr r7 \n"
946 "rotcl r0 \n" 1093 "rotcl r0 \n"
947 "shlr r8 \n" 1094 "shlr r8 \n"
948 "rotcl r0 \n" 1095 "rotcl r0 \n"
949 "shlr r9 \n" 1096 "shlr r9 \n"
950 "rotcl r0 \n" 1097 "rotcl r0 \n"
951 "shlr r10 \n" 1098 "shlr r10 \n"
952 "rotcl r0 \n" 1099 "rotcl r0 \n"
953 "mov.b r0,@%[addr] \n" /* store byte to bitplane */ 1100 "mov.b r0,@%[addr] \n" /* store byte to bitplane */
954 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1101 "add %[psiz],%[addr] \n" /* advance to next bitplane */
955 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1102 "cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
956 "bt .ur_sloop \n" /* no: loop */ 1103 "bt .ur_sloop \n"
957 1104
958 ".ur_end: \n" 1105 ".ur_end: \n"
959 : /* outputs */ 1106 : /* outputs */
960 [addr]"+r"(addr), 1107 [addr]"+r"(addr),
961 [mask]"+r"(mask), 1108 [mask]"+r"(mask),
@@ -970,18 +1117,18 @@ void gray_update_rect(int x, int y, int width, int height)
970 } 1117 }
971#elif defined(CPU_COLDFIRE) 1118#elif defined(CPU_COLDFIRE)
972 asm volatile ( 1119 asm volatile (
973 "move.l (%[cbuf])+,%%d0 \n" 1120 "move.l (%[cbuf]),%%d0 \n"
974 "move.l (%[bbuf])+,%%d1 \n" 1121 "move.l (%[bbuf]),%%d1 \n"
975 "eor.l %%d0,%%d1 \n" 1122 "eor.l %%d0,%%d1 \n"
976 "move.l (%[cbuf]),%%d0 \n" 1123 "move.l (4,%[cbuf]),%%d0 \n"
977 "move.l (%[bbuf]),%[chg]\n" 1124 "move.l (4,%[bbuf]),%[chg] \n"
978 "eor.l %%d0,%[chg] \n" 1125 "eor.l %%d0,%[chg] \n"
979 "or.l %%d1,%[chg] \n" 1126 "or.l %%d1,%[chg] \n"
980 : /* outputs */ 1127 : /* outputs */
981 [cbuf]"+a"(cbuf),
982 [bbuf]"+a"(bbuf),
983 [chg] "=&d"(change) 1128 [chg] "=&d"(change)
984 : /* inputs */ 1129 : /* inputs */
1130 [cbuf]"a"(cbuf),
1131 [bbuf]"a"(bbuf)
985 : /* clobbers */ 1132 : /* clobbers */
986 "d0", "d1" 1133 "d0", "d1"
987 ); 1134 );
@@ -992,54 +1139,52 @@ void gray_update_rect(int x, int y, int width, int height)
992 unsigned mask, trash; 1139 unsigned mask, trash;
993 1140
994 pat_ptr = &pat_stack[8]; 1141 pat_ptr = &pat_stack[8];
995 cbuf = _gray_info.cur_buffer + srcofs_row;
996 bbuf = _gray_info.back_buffer + srcofs_row;
997 1142
998 /* precalculate the bit patterns with random shifts 1143 /* precalculate the bit patterns with random shifts
999 * for all 8 pixels and put them on an extra "stack" */ 1144 * for all 8 pixels and put them on an extra "stack" */
1000 asm volatile ( 1145 asm volatile (
1001 "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ 1146 "moveq.l #8,%%d3 \n" /* loop count */
1002 "clr.l %[mask] \n" 1147 "clr.l %[mask] \n"
1003 1148
1004 ".ur_pre_loop: \n" 1149 ".ur_pre_loop: \n"
1005 "clr.l %%d0 \n" 1150 "clr.l %%d0 \n"
1006 "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */ 1151 "move.b (%[cbuf])+,%%d0 \n" /* read current buffer */
1007 "clr.l %%d1 \n" 1152 "clr.l %%d1 \n"
1008 "move.b (%[bbuf]),%%d1 \n" /* read back buffer */ 1153 "move.b (%[bbuf]),%%d1 \n" /* read back buffer */
1009 "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */ 1154 "move.b %%d0,(%[bbuf])+ \n" /* update back buffer */
1010 "clr.l %%d2 \n" /* preset for skipped pixel */ 1155 "clr.l %%d2 \n" /* preset for skipped pixel */
1011 "cmp.l %%d0,%%d1 \n" /* no change? */ 1156 "cmp.l %%d0,%%d1 \n" /* no change? */
1012 "beq.b .ur_skip \n" /* -> skip */ 1157 "beq.b .ur_skip \n" /* -> skip */
1013 1158
1014 "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */ 1159 "move.l (%%d0:l:4,%[bpat]),%%d2 \n" /* d2 = bitpattern[byte]; */
1015 1160
1016 "mulu.w #75,%[rnd] \n" /* multiply by 75 */ 1161 "mulu.w #75,%[rnd] \n" /* multiply by 75 */
1017 "add.l #74,%[rnd] \n" /* add another 74 */ 1162 "add.l #74,%[rnd] \n" /* add another 74 */
1018 /* Since the lower bits are not very random: */ 1163 /* Since the lower bits are not very random: */
1019 "move.l %[rnd],%%d1 \n" 1164 "move.l %[rnd],%%d1 \n"
1020 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ 1165 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
1021 "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ 1166 "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
1022 1167
1023 "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ 1168 "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
1024 "blo.b .ur_ntrim \n" 1169 "blo.b .ur_ntrim \n"
1025 "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ 1170 "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
1026 ".ur_ntrim: \n" 1171 ".ur_ntrim: \n"
1027 1172
1028 "move.l %%d2,%%d0 \n" 1173 "move.l %%d2,%%d0 \n" /** rotate pattern **/
1029 "lsl.l %%d1,%%d0 \n" 1174 "lsl.l %%d1,%%d0 \n"
1030 "sub.l %[dpth],%%d1\n" 1175 "sub.l %[dpth],%%d1 \n"
1031 "neg.l %%d1 \n" /* d1 = depth - d1 */ 1176 "neg.l %%d1 \n" /* d1 = depth - d1 */
1032 "lsr.l %%d1,%%d2 \n" 1177 "lsr.l %%d1,%%d2 \n"
1033 "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */ 1178 "or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */
1034 1179
1035 "or.l #0x0100,%[mask] \n" /* set mask bit */ 1180 "or.l #0x0100,%[mask] \n" /* set mask bit */
1036 1181
1037 ".ur_skip: \n" 1182 ".ur_skip: \n"
1038 "lsr.l #1,%[mask] \n" /* shift mask */ 1183 "lsr.l #1,%[mask] \n" /* shift mask */
1039 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ 1184 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
1040 1185
1041 "subq.l #1,%%d3 \n" /* decrease loop count */ 1186 "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
1042 "bne.b .ur_pre_loop\n" /* yes: loop */ 1187 "bne.b .ur_pre_loop \n"
1043 : /* outputs */ 1188 : /* outputs */
1044 [cbuf]"+a"(cbuf), 1189 [cbuf]"+a"(cbuf),
1045 [bbuf]"+a"(bbuf), 1190 [bbuf]"+a"(bbuf),
@@ -1061,79 +1206,79 @@ void gray_update_rect(int x, int y, int width, int height)
1061 * precalculated patterns on the pattern stack */ 1206 * precalculated patterns on the pattern stack */
1062 asm volatile ( 1207 asm volatile (
1063 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" 1208 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
1064 /* pop all 8 patterns */ 1209 /* pop all 8 patterns */
1065 "not.l %[mask] \n" /* set mask -> keep mask */ 1210 "not.l %[mask] \n" /* "set" mask -> "keep" mask */
1066 "and.l #0xFF,%[mask] \n" 1211 "and.l #0xFF,%[mask] \n"
1067 "beq.b .ur_sstart \n" /* yes: jump to short loop */ 1212 "beq.b .ur_sstart \n" /* short loop if nothing to keep */
1068 1213
1069 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1214 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1070 "clr.l %%d0 \n" 1215 "clr.l %%d0 \n"
1071 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1216 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1072 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1217 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1073 "lsr.l #1,%%d3 \n" 1218 "lsr.l #1,%%d3 \n"
1074 "addx.l %%d0,%%d0 \n" 1219 "addx.l %%d0,%%d0 \n"
1075 "lsr.l #1,%%d4 \n" 1220 "lsr.l #1,%%d4 \n"
1076 "addx.l %%d0,%%d0 \n" 1221 "addx.l %%d0,%%d0 \n"
1077 "lsr.l #1,%%d5 \n" 1222 "lsr.l #1,%%d5 \n"
1078 "addx.l %%d0,%%d0 \n" 1223 "addx.l %%d0,%%d0 \n"
1079 "lsr.l #1,%%d6 \n" 1224 "lsr.l #1,%%d6 \n"
1080 "addx.l %%d0,%%d0 \n" 1225 "addx.l %%d0,%%d0 \n"
1081 "move.l %%a0,%%d1 \n" 1226 "move.l %%a0,%%d1 \n"
1082 "lsr.l #1,%%d1 \n" 1227 "lsr.l #1,%%d1 \n"
1083 "addx.l %%d0,%%d0 \n" 1228 "addx.l %%d0,%%d0 \n"
1084 "move.l %%d1,%%a0 \n" 1229 "move.l %%d1,%%a0 \n"
1085 "move.l %%a1,%%d1 \n" 1230 "move.l %%a1,%%d1 \n"
1086 "lsr.l #1,%%d1 \n" 1231 "lsr.l #1,%%d1 \n"
1087 "addx.l %%d0,%%d0 \n" 1232 "addx.l %%d0,%%d0 \n"
1088 "move.l %%d1,%%a1 \n" 1233 "move.l %%d1,%%a1 \n"
1089 "move.l %[ax],%%d1 \n" 1234 "move.l %[ax],%%d1 \n"
1090 "lsr.l #1,%%d1 \n" 1235 "lsr.l #1,%%d1 \n"
1091 "addx.l %%d0,%%d0 \n" 1236 "addx.l %%d0,%%d0 \n"
1092 "move.l %%d1,%[ax] \n" 1237 "move.l %%d1,%[ax] \n"
1093 1238
1094 "move.b (%[addr]),%%d1 \n" /* read old value */ 1239 "move.b (%[addr]),%%d1 \n" /* read old value */
1095 "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ 1240 "and.l %[mask],%%d1 \n" /* mask out replaced bits */
1096 "or.l %%d0,%%d1 \n" /* set new bits */ 1241 "or.l %%d0,%%d1 \n" /* set new bits */
1097 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ 1242 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */
1098 1243
1099 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1244 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1100 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1245 "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
1101 "bhi.b .ur_floop \n" /* no: loop */ 1246 "bhi.b .ur_floop \n"
1102 1247
1103 "bra.b .ur_end \n" 1248 "bra.b .ur_end \n"
1104 1249
1105 ".ur_sstart: \n" 1250 ".ur_sstart: \n"
1106 "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ 1251 "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
1107 1252
1108 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1253 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1109 "clr.l %%d0 \n" 1254 "clr.l %%d0 \n"
1110 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1255 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1111 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1256 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1112 "lsr.l #1,%%d3 \n" 1257 "lsr.l #1,%%d3 \n"
1113 "addx.l %%d0,%%d0 \n" 1258 "addx.l %%d0,%%d0 \n"
1114 "lsr.l #1,%%d4 \n" 1259 "lsr.l #1,%%d4 \n"
1115 "addx.l %%d0,%%d0 \n" 1260 "addx.l %%d0,%%d0 \n"
1116 "lsr.l #1,%%d5 \n" 1261 "lsr.l #1,%%d5 \n"
1117 "addx.l %%d0,%%d0 \n" 1262 "addx.l %%d0,%%d0 \n"
1118 "lsr.l #1,%%d6 \n" 1263 "lsr.l #1,%%d6 \n"
1119 "addx.l %%d0,%%d0 \n" 1264 "addx.l %%d0,%%d0 \n"
1120 "lsr.l #1,%[mask] \n" 1265 "lsr.l #1,%[mask] \n"
1121 "addx.l %%d0,%%d0 \n" 1266 "addx.l %%d0,%%d0 \n"
1122 "move.l %%a1,%%d1 \n" 1267 "move.l %%a1,%%d1 \n"
1123 "lsr.l #1,%%d1 \n" 1268 "lsr.l #1,%%d1 \n"
1124 "addx.l %%d0,%%d0 \n" 1269 "addx.l %%d0,%%d0 \n"
1125 "move.l %%d1,%%a1 \n" 1270 "move.l %%d1,%%a1 \n"
1126 "move.l %[ax],%%d1 \n" 1271 "move.l %[ax],%%d1 \n"
1127 "lsr.l #1,%%d1 \n" 1272 "lsr.l #1,%%d1 \n"
1128 "addx.l %%d0,%%d0 \n" 1273 "addx.l %%d0,%%d0 \n"
1129 "move.l %%d1,%[ax] \n" 1274 "move.l %%d1,%[ax] \n"
1130 1275
1131 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ 1276 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
1132 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1277 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1133 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1278 "cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
1134 "bhi.b .ur_sloop \n" /* no: loop */ 1279 "bhi.b .ur_sloop \n"
1135 1280
1136 ".ur_end: \n" 1281 ".ur_end: \n"
1137 : /* outputs */ 1282 : /* outputs */
1138 [addr]"+a"(addr), 1283 [addr]"+a"(addr),
1139 [mask]"+d"(mask), 1284 [mask]"+d"(mask),
@@ -1151,9 +1296,7 @@ void gray_update_rect(int x, int y, int width, int height)
1151 (void)pat_ptr; 1296 (void)pat_ptr;
1152 /* check whether anything changed in the 8-pixel block */ 1297 /* check whether anything changed in the 8-pixel block */
1153 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf; 1298 change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
1154 cbuf += sizeof(uint32_t); 1299 change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
1155 bbuf += sizeof(uint32_t);
1156 change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
1157 1300
1158 if (change != 0) 1301 if (change != 0)
1159 { 1302 {
@@ -1162,9 +1305,6 @@ void gray_update_rect(int x, int y, int width, int height)
1162 unsigned test = 1; 1305 unsigned test = 1;
1163 int i; 1306 int i;
1164 1307
1165 cbuf = _gray_info.cur_buffer + srcofs_row;
1166 bbuf = _gray_info.back_buffer + srcofs_row;
1167
1168 /* precalculate the bit patterns with random shifts 1308 /* precalculate the bit patterns with random shifts
1169 * for all 8 pixels and put them on an extra "stack" */ 1309 * for all 8 pixels and put them on an extra "stack" */
1170 for (i = 0; i < 8; i++) 1310 for (i = 0; i < 8; i++)
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c
index 396046d1e6..7df3e13c56 100644
--- a/apps/plugins/lib/gray_draw.c
+++ b/apps/plugins/lib/gray_draw.c
@@ -876,8 +876,140 @@ static void _writearray(unsigned char *address, const unsigned char *src,
876 unsigned long pat_stack[8]; 876 unsigned long pat_stack[8];
877 unsigned long *pat_ptr = &pat_stack[8]; 877 unsigned long *pat_ptr = &pat_stack[8];
878 unsigned char *addr, *end; 878 unsigned char *addr, *end;
879#if 0 /* CPU specific asm versions will go here */ 879#ifdef CPU_ARM
880 const unsigned char *_src;
881 unsigned _mask, trash;
882
883 _mask = mask;
884 _src = src;
885
886 /* precalculate the bit patterns with random shifts
887 for all 8 pixels and put them on an extra "stack" */
888 asm volatile (
889 "mov %[mask], %[mask], lsl #24 \n" /* shift mask to upper byte */
890 "mov r3, #8 \n" /* loop count */
891
892 ".wa_loop: \n" /** load pattern for pixel **/
893 "mov r2, #0 \n" /* pattern for skipped pixel must be 0 */
894 "movs %[mask], %[mask], lsl #1 \n" /* shift out msb of mask */
895 "bcc .wa_skip \n" /* skip this pixel */
896
897 "ldrb r0, [%[src]] \n" /* load src byte */
898 "ldrb r0, [%[trns], r0] \n" /* idxtable into pattern index */
899 "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
900
901 "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
902 "add %[rnd], %[rnd], %[rnd], lsl #1 \n"
903 "add %[rnd], %[rnd], r0, lsl #3 \n"
904 "add %[rnd], %[rnd], #74 \n" /* add another 74 */
905 /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
906 "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
907
908 "cmp r1, %[dpth] \n" /* random >= depth ? */
909 "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
910
911 "mov r0, r2, lsl r1 \n" /** rotate pattern **/
912 "sub r1, %[dpth], r1 \n"
913 "orr r2, r0, r2, lsr r1 \n"
914
915 ".wa_skip: \n"
916 "str r2, [%[patp], #-4]! \n" /* push on pattern stack */
917
918 "add %[src], %[src], #1 \n" /* src++; */
919 "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
920 "bne .wa_loop \n"
921 : /* outputs */
922 [src] "+r"(_src),
923 [patp]"+r"(pat_ptr),
924 [rnd] "+r"(_gray_random_buffer),
925 [mask]"+r"(_mask)
926 : /* inputs */
927 [bpat]"r"(_gray_info.bitpattern),
928 [trns]"r"(_gray_info.idxtable),
929 [dpth]"r"(_gray_info.depth),
930 [rmsk]"r"(_gray_info.randmask)
931 : /* clobbers */
932 "r0", "r1", "r2", "r3"
933 );
934
935 addr = address;
936 end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
937 _mask = mask;
938
939 /* set the bits for all 8 pixels in all bytes according to the
940 * precalculated patterns on the pattern stack */
941 asm volatile (
942 "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
943
944 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
945 "ands %[mask], %[mask], #0xff \n"
946 "beq .wa_sloop \n" /* short loop if nothing to keep */
947
948 ".wa_floop: \n" /** full loop (there are bits to keep)**/
949 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
950 "adc r0, r0, r0 \n" /* put bit into LSB of byte */
951 "movs r8, r8, lsr #1 \n"
952 "adc r0, r0, r0 \n"
953 "movs r7, r7, lsr #1 \n"
954 "adc r0, r0, r0 \n"
955 "movs r6, r6, lsr #1 \n"
956 "adc r0, r0, r0 \n"
957 "movs r5, r5, lsr #1 \n"
958 "adc r0, r0, r0 \n"
959 "movs r4, r4, lsr #1 \n"
960 "adc r0, r0, r0 \n"
961 "movs r3, r3, lsr #1 \n"
962 "adc r0, r0, r0 \n"
963 "movs r2, r2, lsr #1 \n"
964 "adc r0, r0, r0 \n"
965
966 "ldrb r1, [%[addr]] \n" /* read old value */
967 "and r1, r1, %[mask] \n" /* mask out replaced bits */
968 "orr r1, r1, r0 \n" /* set new bits */
969 "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
970
971 "cmp %[end], %[addr] \n" /* loop through all bitplanes */
972 "bne .wa_floop \n"
973
974 "b .wa_end \n"
975
976 ".wa_sloop: \n" /** short loop (nothing to keep) **/
977 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
978 "adc r0, r0, r0 \n" /* put bit into LSB of byte */
979 "movs r8, r8, lsr #1 \n"
980 "adc r0, r0, r0 \n"
981 "movs r7, r7, lsr #1 \n"
982 "adc r0, r0, r0 \n"
983 "movs r6, r6, lsr #1 \n"
984 "adc r0, r0, r0 \n"
985 "movs r5, r5, lsr #1 \n"
986 "adc r0, r0, r0 \n"
987 "movs r4, r4, lsr #1 \n"
988 "adc r0, r0, r0 \n"
989 "movs r3, r3, lsr #1 \n"
990 "adc r0, r0, r0 \n"
991 "movs r2, r2, lsr #1 \n"
992 "adc r0, r0, r0 \n"
993
994 "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
995
996 "cmp %[end], %[addr] \n" /* loop through all bitplanes */
997 "bne .wa_sloop \n"
998
999 ".wa_end: \n"
1000 : /* outputs */
1001 [addr]"+r"(addr),
1002 [mask]"+r"(_mask),
1003 [rx] "=&r"(trash)
1004 : /* inputs */
1005 [psiz]"r"(_gray_info.plane_size),
1006 [end] "r"(end),
1007 [patp]"[rx]"(pat_ptr)
1008 : /* clobbers */
1009 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
1010 );
880#else /* C version, for reference*/ 1011#else /* C version, for reference*/
1012#warning C version of _writearray() used
881 unsigned test = 0x80; 1013 unsigned test = 0x80;
882 int i; 1014 int i;
883 1015
@@ -1027,52 +1159,52 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1027 /* precalculate the bit patterns with random shifts 1159 /* precalculate the bit patterns with random shifts
1028 for all 8 pixels and put them on an extra "stack" */ 1160 for all 8 pixels and put them on an extra "stack" */
1029 asm volatile ( 1161 asm volatile (
1030 "mov #8,r3 \n" /* loop count in r3: 8 pixels */ 1162 "mov #8,r3 \n" /* loop count */
1031 1163
1032 ".wa_loop: \n" /** load pattern for pixel **/ 1164 ".wa_loop: \n" /** load pattern for pixel **/
1033 "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */ 1165 "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */
1034 "shlr %[mask] \n" /* shift out lsb of mask */ 1166 "shlr %[mask] \n" /* shift out lsb of mask */
1035 "bf .wa_skip \n" /* skip this pixel */ 1167 "bf .wa_skip \n" /* skip this pixel */
1036 1168
1037 "mov.b @%[src],r0 \n" /* load src byte */ 1169 "mov.b @%[src],r0 \n" /* load src byte */
1038 "extu.b r0,r0 \n" /* extend unsigned */ 1170 "extu.b r0,r0 \n" /* extend unsigned */
1039 "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */ 1171 "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */
1040 "extu.b r0,r0 \n" /* extend unsigned */ 1172 "extu.b r0,r0 \n" /* extend unsigned */
1041 "shll2 r0 \n" 1173 "shll2 r0 \n"
1042 "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */ 1174 "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */
1043 1175
1044 "mov #75,r0 \n" 1176 "mov #75,r0 \n"
1045 "mulu r0,%[rnd] \n" /* multiply by 75 */ 1177 "mulu r0,%[rnd] \n" /* multiply by 75 */
1046 "sts macl,%[rnd] \n" 1178 "sts macl,%[rnd] \n"
1047 "add #74,%[rnd] \n" /* add another 74 */ 1179 "add #74,%[rnd] \n" /* add another 74 */
1048 /* Since the lower bits are not very random: */ 1180 /* Since the lower bits are not very random: */
1049 "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */ 1181 "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */
1050 "and %[rmsk],r1 \n" /* mask out unneeded bits */ 1182 "and %[rmsk],r1 \n" /* mask out unneeded bits */
1051 1183
1052 "cmp/hs %[dpth],r1 \n" /* random >= depth ? */ 1184 "cmp/hs %[dpth],r1 \n" /* random >= depth ? */
1053 "bf .wa_ntrim \n" 1185 "bf .wa_ntrim \n"
1054 "sub %[dpth],r1 \n" /* yes: random -= depth; */ 1186 "sub %[dpth],r1 \n" /* yes: random -= depth; */
1055 ".wa_ntrim: \n" 1187 ".wa_ntrim: \n"
1056 1188
1057 "mov.l .ashlsi3,r0 \n" /** rotate pattern **/ 1189 "mov.l .ashlsi3,r0 \n" /** rotate pattern **/
1058 "jsr @r0 \n" /* r4 -> r0, shift left by r5 */ 1190 "jsr @r0 \n" /* r4 -> r0, shift left by r5 */
1059 "mov r1,r5 \n" 1191 "mov r1,r5 \n"
1060 1192
1061 "mov %[dpth],r5 \n" 1193 "mov %[dpth],r5 \n"
1062 "sub r1,r5 \n" /* r5 = depth - r1 */ 1194 "sub r1,r5 \n" /* r5 = depth - r1 */
1063 "mov.l .lshrsi3,r1 \n" 1195 "mov.l .lshrsi3,r1 \n"
1064 "jsr @r1 \n" /* r4 -> r0, shift right by r5 */ 1196 "jsr @r1 \n" /* r4 -> r0, shift right by r5 */
1065 "mov r0,r1 \n" /* store previous result in r1 */ 1197 "mov r0,r1 \n" /* store previous result in r1 */
1066 1198
1067 "or r1,r0 \n" /* rotated_pattern = r0 | r1 */ 1199 "or r1,r0 \n" /* rotated_pattern = r0 | r1 */
1068 1200
1069 ".wa_skip: \n" 1201 ".wa_skip: \n"
1070 "mov.l r0,@-%[patp]\n" /* push on pattern stack */ 1202 "mov.l r0,@-%[patp] \n" /* push on pattern stack */
1071 1203
1072 "add %[stri],%[src] \n" /* src += stride; */ 1204 "add %[stri],%[src] \n" /* src += stride; */
1073 "add #-1,r3 \n" /* decrease loop count */ 1205 "add #-1,r3 \n" /* loop 8 times (pixel block) */
1074 "cmp/pl r3 \n" /* loop count > 0? */ 1206 "cmp/pl r3 \n"
1075 "bt .wa_loop \n" /* yes: loop */ 1207 "bt .wa_loop \n"
1076 : /* outputs */ 1208 : /* outputs */
1077 [src] "+r"(_src), 1209 [src] "+r"(_src),
1078 [rnd] "+r"(_gray_random_buffer), 1210 [rnd] "+r"(_gray_random_buffer),
@@ -1095,79 +1227,79 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1095 /* set the bits for all 8 pixels in all bytes according to the 1227 /* set the bits for all 8 pixels in all bytes according to the
1096 * precalculated patterns on the pattern stack */ 1228 * precalculated patterns on the pattern stack */
1097 asm volatile ( 1229 asm volatile (
1098 "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ 1230 "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
1099 "mov.l @%[patp]+,r2\n" 1231 "mov.l @%[patp]+,r2 \n"
1100 "mov.l @%[patp]+,r3\n" 1232 "mov.l @%[patp]+,r3 \n"
1101 "mov.l @%[patp]+,r6\n" 1233 "mov.l @%[patp]+,r6 \n"
1102 "mov.l @%[patp]+,r7\n" 1234 "mov.l @%[patp]+,r7 \n"
1103 "mov.l @%[patp]+,r8\n" 1235 "mov.l @%[patp]+,r8 \n"
1104 "mov.l @%[patp]+,r9\n" 1236 "mov.l @%[patp]+,r9 \n"
1105 "mov.l @%[patp],r10\n" 1237 "mov.l @%[patp],r10 \n"
1106 1238
1107 "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */ 1239 "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */
1108 "extu.b %[mask],%[mask] \n" /* mask out high bits */ 1240 "extu.b %[mask],%[mask] \n" /* mask out high bits */
1109 "tst %[mask],%[mask] \n" /* nothing to keep? */ 1241 "tst %[mask],%[mask] \n"
1110 "bt .wa_sloop \n" /* yes: jump to short loop */ 1242 "bt .wa_sloop \n" /* short loop if nothing to keep */
1111 1243
1112 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1244 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1113 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1245 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
1114 "rotcl r0 \n" /* rotate t bit into r0 */ 1246 "rotcl r0 \n" /* rotate t bit into r0 */
1115 "shlr r2 \n" 1247 "shlr r2 \n"
1116 "rotcl r0 \n" 1248 "rotcl r0 \n"
1117 "shlr r3 \n" 1249 "shlr r3 \n"
1118 "rotcl r0 \n" 1250 "rotcl r0 \n"
1119 "shlr r6 \n" 1251 "shlr r6 \n"
1120 "rotcl r0 \n" 1252 "rotcl r0 \n"
1121 "shlr r7 \n" 1253 "shlr r7 \n"
1122 "rotcl r0 \n" 1254 "rotcl r0 \n"
1123 "shlr r8 \n" 1255 "shlr r8 \n"
1124 "rotcl r0 \n" 1256 "rotcl r0 \n"
1125 "shlr r9 \n" 1257 "shlr r9 \n"
1126 "rotcl r0 \n" 1258 "rotcl r0 \n"
1127 "shlr r10 \n" 1259 "shlr r10 \n"
1128 "mov.b @%[addr],%[rx] \n" /* read old value */ 1260 "mov.b @%[addr],%[rx] \n" /* read old value */
1129 "rotcl r0 \n" 1261 "rotcl r0 \n"
1130 "and %[mask],%[rx] \n" /* mask out unneeded bits */ 1262 "and %[mask],%[rx] \n" /* mask out replaced bits */
1131 "or %[rx],r0 \n" /* set new bits */ 1263 "or %[rx],r0 \n" /* set new bits */
1132 "mov.b r0,@%[addr] \n" /* store value to bitplane */ 1264 "mov.b r0,@%[addr] \n" /* store value to bitplane */
1133 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1265 "add %[psiz],%[addr] \n" /* advance to next bitplane */
1134 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1266 "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
1135 "bt .wa_floop \n" /* no: loop */ 1267 "bt .wa_floop \n"
1136 1268
1137 "bra .wa_end \n" 1269 "bra .wa_end \n"
1138 "nop \n" 1270 "nop \n"
1139 1271
1140 /* References to C library routines used in the precalc block */ 1272 /* References to C library routines used in the precalc block */
1141 ".align 2 \n" 1273 ".align 2 \n"
1142 ".ashlsi3: \n" /* C library routine: */ 1274 ".ashlsi3: \n" /* C library routine: */
1143 ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */ 1275 ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */
1144 ".lshrsi3: \n" /* C library routine: */ 1276 ".lshrsi3: \n" /* C library routine: */
1145 ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */ 1277 ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */
1146 /* both routines preserve r4, destroy r5 and take ~16 cycles */ 1278 /* both routines preserve r4, destroy r5 and take ~16 cycles */
1147 1279
1148 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1280 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1149 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1281 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
1150 "rotcl r0 \n" /* rotate t bit into r0 */ 1282 "rotcl r0 \n" /* rotate t bit into r0 */
1151 "shlr r2 \n" 1283 "shlr r2 \n"
1152 "rotcl r0 \n" 1284 "rotcl r0 \n"
1153 "shlr r3 \n" 1285 "shlr r3 \n"
1154 "rotcl r0 \n" 1286 "rotcl r0 \n"
1155 "shlr r6 \n" 1287 "shlr r6 \n"
1156 "rotcl r0 \n" 1288 "rotcl r0 \n"
1157 "shlr r7 \n" 1289 "shlr r7 \n"
1158 "rotcl r0 \n" 1290 "rotcl r0 \n"
1159 "shlr r8 \n" 1291 "shlr r8 \n"
1160 "rotcl r0 \n" 1292 "rotcl r0 \n"
1161 "shlr r9 \n" 1293 "shlr r9 \n"
1162 "rotcl r0 \n" 1294 "rotcl r0 \n"
1163 "shlr r10 \n" 1295 "shlr r10 \n"
1164 "rotcl r0 \n" 1296 "rotcl r0 \n"
1165 "mov.b r0,@%[addr] \n" /* store byte to bitplane */ 1297 "mov.b r0,@%[addr] \n" /* store byte to bitplane */
1166 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1298 "add %[psiz],%[addr] \n" /* advance to next bitplane */
1167 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1299 "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
1168 "bt .wa_sloop \n" /* no: loop */ 1300 "bt .wa_sloop \n"
1169 1301
1170 ".wa_end: \n" 1302 ".wa_end: \n"
1171 : /* outputs */ 1303 : /* outputs */
1172 [addr]"+r"(addr), 1304 [addr]"+r"(addr),
1173 [mask]"+r"(_mask), 1305 [mask]"+r"(_mask),
@@ -1189,43 +1321,43 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1189 /* precalculate the bit patterns with random shifts 1321 /* precalculate the bit patterns with random shifts
1190 for all 8 pixels and put them on an extra "stack" */ 1322 for all 8 pixels and put them on an extra "stack" */
1191 asm volatile ( 1323 asm volatile (
1192 "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ 1324 "moveq.l #8,%%d3 \n" /* loop count */
1193 1325
1194 ".wa_loop: \n" /** load pattern for pixel **/ 1326 ".wa_loop: \n" /** load pattern for pixel **/
1195 "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */ 1327 "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */
1196 "lsr.l #1,%[mask] \n" /* shift out lsb of mask */ 1328 "lsr.l #1,%[mask] \n" /* shift out lsb of mask */
1197 "bcc.b .wa_skip \n" /* skip this pixel */ 1329 "bcc.b .wa_skip \n" /* skip this pixel */
1198 1330
1199 "clr.l %%d0 \n" 1331 "clr.l %%d0 \n"
1200 "move.b (%[src]),%%d0 \n" /* load src byte */ 1332 "move.b (%[src]),%%d0 \n" /* load src byte */
1201 "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */ 1333 "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */
1202 "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */ 1334 "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */
1203 1335
1204 "mulu.w #75,%[rnd] \n" /* multiply by 75 */ 1336 "mulu.w #75,%[rnd] \n" /* multiply by 75 */
1205 "add.l #74,%[rnd] \n" /* add another 74 */ 1337 "add.l #74,%[rnd] \n" /* add another 74 */
1206 /* Since the lower bits are not very random: */ 1338 /* Since the lower bits are not very random: */
1207 "move.l %[rnd],%%d1 \n" 1339 "move.l %[rnd],%%d1 \n"
1208 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ 1340 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
1209 "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ 1341 "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
1210 1342
1211 "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ 1343 "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
1212 "blo.b .wa_ntrim \n" 1344 "blo.b .wa_ntrim \n"
1213 "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ 1345 "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
1214 ".wa_ntrim: \n" 1346 ".wa_ntrim: \n"
1215 1347
1216 "move.l %%d2,%%d0 \n" 1348 "move.l %%d2,%%d0 \n" /** rotate pattern **/
1217 "lsl.l %%d1,%%d0 \n" 1349 "lsl.l %%d1,%%d0 \n"
1218 "sub.l %[dpth],%%d1\n" 1350 "sub.l %[dpth],%%d1 \n"
1219 "neg.l %%d1 \n" /* d1 = depth - d1 */ 1351 "neg.l %%d1 \n" /* d1 = depth - d1 */
1220 "lsr.l %%d1,%%d2 \n" 1352 "lsr.l %%d1,%%d2 \n"
1221 "or.l %%d0,%%d2 \n" 1353 "or.l %%d0,%%d2 \n"
1222 1354
1223 ".wa_skip: \n" 1355 ".wa_skip: \n"
1224 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ 1356 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
1225 1357
1226 "add.l %[stri],%[src] \n" /* src += stride; */ 1358 "add.l %[stri],%[src] \n" /* src += stride; */
1227 "subq.l #1,%%d3 \n" /* decrease loop count */ 1359 "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
1228 "bne.b .wa_loop \n" /* yes: loop */ 1360 "bne.b .wa_loop \n"
1229 : /* outputs */ 1361 : /* outputs */
1230 [src] "+a"(_src), 1362 [src] "+a"(_src),
1231 [patp]"+a"(pat_ptr), 1363 [patp]"+a"(pat_ptr),
@@ -1250,78 +1382,76 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1250 asm volatile ( 1382 asm volatile (
1251 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" 1383 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
1252 /* pop all 8 patterns */ 1384 /* pop all 8 patterns */
1253 "not.l %[mask] \n" /* "set" mask -> "keep" mask */ 1385 "not.l %[mask] \n" /* "set" mask -> "keep" mask */
1254 "and.l #0xFF,%[mask] \n" 1386 "and.l #0xFF,%[mask] \n"
1255 "beq.b .wa_sstart \n" /* yes: jump to short loop */ 1387 "beq.b .wa_sstart \n" /* short loop if nothing to keep */
1256 1388
1257 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1389 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1258 "clr.l %%d0 \n" 1390 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1259 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1391 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1260 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1392 "lsr.l #1,%%d3 \n"
1261 "lsr.l #1,%%d3 \n" 1393 "addx.l %%d0,%%d0 \n"
1262 "addx.l %%d0,%%d0 \n" 1394 "lsr.l #1,%%d4 \n"
1263 "lsr.l #1,%%d4 \n" 1395 "addx.l %%d0,%%d0 \n"
1264 "addx.l %%d0,%%d0 \n" 1396 "lsr.l #1,%%d5 \n"
1265 "lsr.l #1,%%d5 \n" 1397 "addx.l %%d0,%%d0 \n"
1266 "addx.l %%d0,%%d0 \n" 1398 "lsr.l #1,%%d6 \n"
1267 "lsr.l #1,%%d6 \n" 1399 "addx.l %%d0,%%d0 \n"
1268 "addx.l %%d0,%%d0 \n" 1400 "move.l %%a0,%%d1 \n"
1269 "move.l %%a0,%%d1 \n" 1401 "lsr.l #1,%%d1 \n"
1270 "lsr.l #1,%%d1 \n" 1402 "addx.l %%d0,%%d0 \n"
1271 "addx.l %%d0,%%d0 \n" 1403 "move.l %%d1,%%a0 \n"
1272 "move.l %%d1,%%a0 \n" 1404 "move.l %%a1,%%d1 \n"
1273 "move.l %%a1,%%d1 \n" 1405 "lsr.l #1,%%d1 \n"
1274 "lsr.l #1,%%d1 \n" 1406 "addx.l %%d0,%%d0 \n"
1275 "addx.l %%d0,%%d0 \n" 1407 "move.l %%d1,%%a1 \n"
1276 "move.l %%d1,%%a1 \n" 1408 "move.l %[ax],%%d1 \n"
1277 "move.l %[ax],%%d1 \n" 1409 "lsr.l #1,%%d1 \n"
1278 "lsr.l #1,%%d1 \n" 1410 "addx.l %%d0,%%d0 \n"
1279 "addx.l %%d0,%%d0 \n" 1411 "move.l %%d1,%[ax] \n"
1280 "move.l %%d1,%[ax] \n"
1281 1412
1282 "move.b (%[addr]),%%d1 \n" /* read old value */ 1413 "move.b (%[addr]),%%d1 \n" /* read old value */
1283 "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ 1414 "and.l %[mask],%%d1 \n" /* mask out replaced bits */
1284 "or.l %%d0,%%d1 \n" /* set new bits */ 1415 "or.l %%d0,%%d1 \n" /* set new bits */
1285 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ 1416 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */
1286 1417
1287 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1418 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1288 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1419 "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
1289 "bhi.b .wa_floop \n" /* no: loop */ 1420 "bhi.b .wa_floop \n"
1290 1421
1291 "bra.b .wa_end \n" 1422 "bra.b .wa_end \n"
1292 1423
1293 ".wa_sstart: \n" 1424 ".wa_sstart: \n"
1294 "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ 1425 "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
1295 1426
1296 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1427 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1297 "clr.l %%d0 \n" 1428 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1298 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1429 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1299 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1430 "lsr.l #1,%%d3 \n"
1300 "lsr.l #1,%%d3 \n" 1431 "addx.l %%d0,%%d0 \n"
1301 "addx.l %%d0,%%d0 \n" 1432 "lsr.l #1,%%d4 \n"
1302 "lsr.l #1,%%d4 \n" 1433 "addx.l %%d0,%%d0 \n"
1303 "addx.l %%d0,%%d0 \n" 1434 "lsr.l #1,%%d5 \n"
1304 "lsr.l #1,%%d5 \n" 1435 "addx.l %%d0,%%d0 \n"
1305 "addx.l %%d0,%%d0 \n" 1436 "lsr.l #1,%%d6 \n"
1306 "lsr.l #1,%%d6 \n" 1437 "addx.l %%d0,%%d0 \n"
1307 "addx.l %%d0,%%d0 \n" 1438 "lsr.l #1,%[mask] \n"
1308 "lsr.l #1,%[mask] \n" 1439 "addx.l %%d0,%%d0 \n"
1309 "addx.l %%d0,%%d0 \n" 1440 "move.l %%a1,%%d1 \n"
1310 "move.l %%a1,%%d1 \n" 1441 "lsr.l #1,%%d1 \n"
1311 "lsr.l #1,%%d1 \n" 1442 "addx.l %%d0,%%d0 \n"
1312 "addx.l %%d0,%%d0 \n" 1443 "move.l %%d1,%%a1 \n"
1313 "move.l %%d1,%%a1 \n" 1444 "move.l %[ax],%%d1 \n"
1314 "move.l %[ax],%%d1 \n" 1445 "lsr.l #1,%%d1 \n"
1315 "lsr.l #1,%%d1 \n" 1446 "addx.l %%d0,%%d0 \n"
1316 "addx.l %%d0,%%d0 \n" 1447 "move.l %%d1,%[ax] \n"
1317 "move.l %%d1,%[ax] \n"
1318 1448
1319 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ 1449 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
1320 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1450 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1321 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1451 "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
1322 "bhi.b .wa_sloop \n" /* no: loop */ 1452 "bhi.b .wa_sloop \n"
1323 1453
1324 ".wa_end: \n" 1454 ".wa_end: \n"
1325 : /* outputs */ 1455 : /* outputs */
1326 [addr]"+a"(addr), 1456 [addr]"+a"(addr),
1327 [mask]"+d"(_mask), 1457 [mask]"+d"(_mask),
diff --git a/apps/plugins/lib/gray_scroll.c b/apps/plugins/lib/gray_scroll.c
index df5dc57044..8f60e7cef1 100644
--- a/apps/plugins/lib/gray_scroll.c
+++ b/apps/plugins/lib/gray_scroll.c
@@ -283,32 +283,32 @@ void gray_ub_scroll_left(int count)
283 if (count) 283 if (count)
284 { 284 {
285 asm ( 285 asm (
286 "mov r4, %[high] \n" 286 "mov r4, %[high] \n" /* rows = height */
287 287
288 ".sl_rloop: \n" 288 ".sl_rloop: \n" /* repeat for every row */
289 "mov r5, %[addr] \n" 289 "mov r5, %[addr] \n" /* get start address */
290 "mov r2, %[dpth] \n" 290 "mov r2, %[dpth] \n" /* planes = depth */
291 291
292 ".sl_oloop: \n" 292 ".sl_oloop: \n" /* repeat for every bitplane */
293 "mov r6, r5 \n" 293 "mov r6, r5 \n" /* get start address */
294 "mov r3, %[cols] \n" 294 "mov r3, %[cols] \n" /* cols = col_count */
295 "mov r1, #0 \n" 295 "mov r1, #0 \n" /* fill with zero */
296 296
297 ".sl_iloop: \n" 297 ".sl_iloop: \n" /* repeat for all cols */
298 "mov r1, r1, lsr #8 \n" 298 "mov r1, r1, lsr #8 \n" /* shift right to get residue */
299 "ldrb r0, [r6, #-1]! \n" 299 "ldrb r0, [r6, #-1]! \n" /* decrement addr & get data byte */
300 "orr r1, r1, r0, lsl %[cnt] \n" 300 "orr r1, r1, r0, lsl %[cnt] \n" /* combine with last residue */
301 "strb r1, [r6] \n" 301 "strb r1, [r6] \n" /* store data */
302 302
303 "subs r3, r3, #1 \n" 303 "subs r3, r3, #1 \n" /* cols-- */
304 "bne .sl_iloop \n" 304 "bne .sl_iloop \n"
305 305
306 "add r5, r5, %[psiz] \n" 306 "add r5, r5, %[psiz] \n" /* start_address += plane_size */
307 "subs r2, r2, #1 \n" 307 "subs r2, r2, #1 \n" /* planes-- */
308 "bne .sl_oloop \n" 308 "bne .sl_oloop \n"
309 309
310 "add %[addr],%[addr],%[bwid] \n" 310 "add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */
311 "subs r4, r4, #1 \n" 311 "subs r4, r4, #1 \n" /* rows-- */
312 "bne .sl_rloop \n" 312 "bne .sl_rloop \n"
313 : /* outputs */ 313 : /* outputs */
314 : /* inputs */ 314 : /* inputs */
@@ -364,32 +364,32 @@ void gray_ub_scroll_right(int count)
364 if (count) 364 if (count)
365 { 365 {
366 asm ( 366 asm (
367 "mov r4, %[high] \n" 367 "mov r4, %[high] \n" /* rows = height */
368 368
369 ".sr_rloop: \n" 369 ".sr_rloop: \n" /* repeat for every row */
370 "mov r5, %[addr] \n" 370 "mov r5, %[addr] \n" /* get start address */
371 "mov r2, %[dpth] \n" 371 "mov r2, %[dpth] \n" /* planes = depth */
372 372
373 ".sr_oloop: \n" 373 ".sr_oloop: \n" /* repeat for every bitplane */
374 "mov r6, r5 \n" 374 "mov r6, r5 \n" /* get start address */
375 "mov r3, %[cols] \n" 375 "mov r3, %[cols] \n" /* cols = col_count */
376 "mov r1, #0 \n" 376 "mov r1, #0 \n" /* fill with zero */
377 377
378 ".sr_iloop: \n" 378 ".sr_iloop: \n" /* repeat for all cols */
379 "ldrb r0, [r6] \n" 379 "ldrb r0, [r6] \n" /* get data byte */
380 "orr r1, r0, r1, lsl #8 \n" 380 "orr r1, r0, r1, lsl #8 \n" /* combine w/ old data shifted to 2nd byte */
381 "mov r0, r1, lsr %[cnt] \n" 381 "mov r0, r1, lsr %[cnt] \n" /* shift right */
382 "strb r0, [r6], #1 \n" 382 "strb r0, [r6], #1 \n" /* store data, increment addr */
383 383
384 "subs r3, r3, #1 \n" 384 "subs r3, r3, #1 \n" /* cols-- */
385 "bne .sr_iloop \n" 385 "bne .sr_iloop \n"
386 386
387 "add r5, r5, %[psiz] \n" 387 "add r5, r5, %[psiz] \n" /* start_address += plane_size */
388 "subs r2, r2, #1 \n" 388 "subs r2, r2, #1 \n" /* planes-- */
389 "bne .sr_oloop \n" 389 "bne .sr_oloop \n"
390 390
391 "add %[addr],%[addr],%[bwid] \n" 391 "add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */
392 "subs r4, r4, #1 \n" 392 "subs r4, r4, #1 \n" /* rows-- */
393 "bne .sr_rloop \n" 393 "bne .sr_rloop \n"
394 : /* outputs */ 394 : /* outputs */
395 : /* inputs */ 395 : /* inputs */
@@ -714,8 +714,7 @@ void gray_ub_scroll_up(int count)
714 "move.b (%%a1),%%d0 \n" /* get data byte */ 714 "move.b (%%a1),%%d0 \n" /* get data byte */
715 "lsl.l #8,%%d1 \n" /* old data to 2nd byte */ 715 "lsl.l #8,%%d1 \n" /* old data to 2nd byte */
716 "or.l %%d1,%%d0 \n" /* combine old data */ 716 "or.l %%d1,%%d0 \n" /* combine old data */
717 "clr.l %%d1 \n" 717 "move.l %%d0,%%d1 \n" /* keep data for next round */
718 "move.b %%d0,%%d1 \n" /* keep data for next round */
719 "lsr.l %[cnt],%%d0 \n" /* shift right */ 718 "lsr.l %[cnt],%%d0 \n" /* shift right */
720 "move.b %%d0,(%%a1) \n" /* store data */ 719 "move.b %%d0,(%%a1) \n" /* store data */
721 720