summaryrefslogtreecommitdiff
path: root/apps/plugins/lib/gray_draw.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins/lib/gray_draw.c')
-rw-r--r--apps/plugins/lib/gray_draw.c496
1 files changed, 313 insertions, 183 deletions
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c
index 396046d1e6..7df3e13c56 100644
--- a/apps/plugins/lib/gray_draw.c
+++ b/apps/plugins/lib/gray_draw.c
@@ -876,8 +876,140 @@ static void _writearray(unsigned char *address, const unsigned char *src,
876 unsigned long pat_stack[8]; 876 unsigned long pat_stack[8];
877 unsigned long *pat_ptr = &pat_stack[8]; 877 unsigned long *pat_ptr = &pat_stack[8];
878 unsigned char *addr, *end; 878 unsigned char *addr, *end;
879#if 0 /* CPU specific asm versions will go here */ 879#ifdef CPU_ARM
880 const unsigned char *_src;
881 unsigned _mask, trash;
882
883 _mask = mask;
884 _src = src;
885
886 /* precalculate the bit patterns with random shifts
887 for all 8 pixels and put them on an extra "stack" */
888 asm volatile (
889 "mov %[mask], %[mask], lsl #24 \n" /* shift mask to upper byte */
890 "mov r3, #8 \n" /* loop count */
891
892 ".wa_loop: \n" /** load pattern for pixel **/
893 "mov r2, #0 \n" /* pattern for skipped pixel must be 0 */
894 "movs %[mask], %[mask], lsl #1 \n" /* shift out msb of mask */
895 "bcc .wa_skip \n" /* skip this pixel */
896
897 "ldrb r0, [%[src]] \n" /* load src byte */
898 "ldrb r0, [%[trns], r0] \n" /* idxtable into pattern index */
899 "ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
900
901 "add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
902 "add %[rnd], %[rnd], %[rnd], lsl #1 \n"
903 "add %[rnd], %[rnd], r0, lsl #3 \n"
904 "add %[rnd], %[rnd], #74 \n" /* add another 74 */
905 /* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
906 "and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
907
908 "cmp r1, %[dpth] \n" /* random >= depth ? */
909 "subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
910
911 "mov r0, r2, lsl r1 \n" /** rotate pattern **/
912 "sub r1, %[dpth], r1 \n"
913 "orr r2, r0, r2, lsr r1 \n"
914
915 ".wa_skip: \n"
916 "str r2, [%[patp], #-4]! \n" /* push on pattern stack */
917
918 "add %[src], %[src], #1 \n" /* src++; */
919 "subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
920 "bne .wa_loop \n"
921 : /* outputs */
922 [src] "+r"(_src),
923 [patp]"+r"(pat_ptr),
924 [rnd] "+r"(_gray_random_buffer),
925 [mask]"+r"(_mask)
926 : /* inputs */
927 [bpat]"r"(_gray_info.bitpattern),
928 [trns]"r"(_gray_info.idxtable),
929 [dpth]"r"(_gray_info.depth),
930 [rmsk]"r"(_gray_info.randmask)
931 : /* clobbers */
932 "r0", "r1", "r2", "r3"
933 );
934
935 addr = address;
936 end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
937 _mask = mask;
938
939 /* set the bits for all 8 pixels in all bytes according to the
940 * precalculated patterns on the pattern stack */
941 asm volatile (
942 "ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
943
944 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
945 "ands %[mask], %[mask], #0xff \n"
946 "beq .wa_sloop \n" /* short loop if nothing to keep */
947
948 ".wa_floop: \n" /** full loop (there are bits to keep)**/
949 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
950 "adc r0, r0, r0 \n" /* put bit into LSB of byte */
951 "movs r8, r8, lsr #1 \n"
952 "adc r0, r0, r0 \n"
953 "movs r7, r7, lsr #1 \n"
954 "adc r0, r0, r0 \n"
955 "movs r6, r6, lsr #1 \n"
956 "adc r0, r0, r0 \n"
957 "movs r5, r5, lsr #1 \n"
958 "adc r0, r0, r0 \n"
959 "movs r4, r4, lsr #1 \n"
960 "adc r0, r0, r0 \n"
961 "movs r3, r3, lsr #1 \n"
962 "adc r0, r0, r0 \n"
963 "movs r2, r2, lsr #1 \n"
964 "adc r0, r0, r0 \n"
965
966 "ldrb r1, [%[addr]] \n" /* read old value */
967 "and r1, r1, %[mask] \n" /* mask out replaced bits */
968 "orr r1, r1, r0 \n" /* set new bits */
969 "strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
970
971 "cmp %[end], %[addr] \n" /* loop through all bitplanes */
972 "bne .wa_floop \n"
973
974 "b .wa_end \n"
975
976 ".wa_sloop: \n" /** short loop (nothing to keep) **/
977 "movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
978 "adc r0, r0, r0 \n" /* put bit into LSB of byte */
979 "movs r8, r8, lsr #1 \n"
980 "adc r0, r0, r0 \n"
981 "movs r7, r7, lsr #1 \n"
982 "adc r0, r0, r0 \n"
983 "movs r6, r6, lsr #1 \n"
984 "adc r0, r0, r0 \n"
985 "movs r5, r5, lsr #1 \n"
986 "adc r0, r0, r0 \n"
987 "movs r4, r4, lsr #1 \n"
988 "adc r0, r0, r0 \n"
989 "movs r3, r3, lsr #1 \n"
990 "adc r0, r0, r0 \n"
991 "movs r2, r2, lsr #1 \n"
992 "adc r0, r0, r0 \n"
993
994 "strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
995
996 "cmp %[end], %[addr] \n" /* loop through all bitplanes */
997 "bne .wa_sloop \n"
998
999 ".wa_end: \n"
1000 : /* outputs */
1001 [addr]"+r"(addr),
1002 [mask]"+r"(_mask),
1003 [rx] "=&r"(trash)
1004 : /* inputs */
1005 [psiz]"r"(_gray_info.plane_size),
1006 [end] "r"(end),
1007 [patp]"[rx]"(pat_ptr)
1008 : /* clobbers */
1009 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
1010 );
880#else /* C version, for reference*/ 1011#else /* C version, for reference*/
1012#warning C version of _writearray() used
881 unsigned test = 0x80; 1013 unsigned test = 0x80;
882 int i; 1014 int i;
883 1015
@@ -1027,52 +1159,52 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1027 /* precalculate the bit patterns with random shifts 1159 /* precalculate the bit patterns with random shifts
1028 for all 8 pixels and put them on an extra "stack" */ 1160 for all 8 pixels and put them on an extra "stack" */
1029 asm volatile ( 1161 asm volatile (
1030 "mov #8,r3 \n" /* loop count in r3: 8 pixels */ 1162 "mov #8,r3 \n" /* loop count */
1031 1163
1032 ".wa_loop: \n" /** load pattern for pixel **/ 1164 ".wa_loop: \n" /** load pattern for pixel **/
1033 "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */ 1165 "mov #0,r0 \n" /* pattern for skipped pixel must be 0 */
1034 "shlr %[mask] \n" /* shift out lsb of mask */ 1166 "shlr %[mask] \n" /* shift out lsb of mask */
1035 "bf .wa_skip \n" /* skip this pixel */ 1167 "bf .wa_skip \n" /* skip this pixel */
1036 1168
1037 "mov.b @%[src],r0 \n" /* load src byte */ 1169 "mov.b @%[src],r0 \n" /* load src byte */
1038 "extu.b r0,r0 \n" /* extend unsigned */ 1170 "extu.b r0,r0 \n" /* extend unsigned */
1039 "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */ 1171 "mov.b @(r0,%[trns]),r0\n" /* idxtable into pattern index */
1040 "extu.b r0,r0 \n" /* extend unsigned */ 1172 "extu.b r0,r0 \n" /* extend unsigned */
1041 "shll2 r0 \n" 1173 "shll2 r0 \n"
1042 "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */ 1174 "mov.l @(r0,%[bpat]),r4\n" /* r4 = bitpattern[byte]; */
1043 1175
1044 "mov #75,r0 \n" 1176 "mov #75,r0 \n"
1045 "mulu r0,%[rnd] \n" /* multiply by 75 */ 1177 "mulu r0,%[rnd] \n" /* multiply by 75 */
1046 "sts macl,%[rnd] \n" 1178 "sts macl,%[rnd] \n"
1047 "add #74,%[rnd] \n" /* add another 74 */ 1179 "add #74,%[rnd] \n" /* add another 74 */
1048 /* Since the lower bits are not very random: */ 1180 /* Since the lower bits are not very random: */
1049 "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */ 1181 "swap.b %[rnd],r1 \n" /* get bits 8..15 (need max. 5) */
1050 "and %[rmsk],r1 \n" /* mask out unneeded bits */ 1182 "and %[rmsk],r1 \n" /* mask out unneeded bits */
1051 1183
1052 "cmp/hs %[dpth],r1 \n" /* random >= depth ? */ 1184 "cmp/hs %[dpth],r1 \n" /* random >= depth ? */
1053 "bf .wa_ntrim \n" 1185 "bf .wa_ntrim \n"
1054 "sub %[dpth],r1 \n" /* yes: random -= depth; */ 1186 "sub %[dpth],r1 \n" /* yes: random -= depth; */
1055 ".wa_ntrim: \n" 1187 ".wa_ntrim: \n"
1056 1188
1057 "mov.l .ashlsi3,r0 \n" /** rotate pattern **/ 1189 "mov.l .ashlsi3,r0 \n" /** rotate pattern **/
1058 "jsr @r0 \n" /* r4 -> r0, shift left by r5 */ 1190 "jsr @r0 \n" /* r4 -> r0, shift left by r5 */
1059 "mov r1,r5 \n" 1191 "mov r1,r5 \n"
1060 1192
1061 "mov %[dpth],r5 \n" 1193 "mov %[dpth],r5 \n"
1062 "sub r1,r5 \n" /* r5 = depth - r1 */ 1194 "sub r1,r5 \n" /* r5 = depth - r1 */
1063 "mov.l .lshrsi3,r1 \n" 1195 "mov.l .lshrsi3,r1 \n"
1064 "jsr @r1 \n" /* r4 -> r0, shift right by r5 */ 1196 "jsr @r1 \n" /* r4 -> r0, shift right by r5 */
1065 "mov r0,r1 \n" /* store previous result in r1 */ 1197 "mov r0,r1 \n" /* store previous result in r1 */
1066 1198
1067 "or r1,r0 \n" /* rotated_pattern = r0 | r1 */ 1199 "or r1,r0 \n" /* rotated_pattern = r0 | r1 */
1068 1200
1069 ".wa_skip: \n" 1201 ".wa_skip: \n"
1070 "mov.l r0,@-%[patp]\n" /* push on pattern stack */ 1202 "mov.l r0,@-%[patp] \n" /* push on pattern stack */
1071 1203
1072 "add %[stri],%[src] \n" /* src += stride; */ 1204 "add %[stri],%[src] \n" /* src += stride; */
1073 "add #-1,r3 \n" /* decrease loop count */ 1205 "add #-1,r3 \n" /* loop 8 times (pixel block) */
1074 "cmp/pl r3 \n" /* loop count > 0? */ 1206 "cmp/pl r3 \n"
1075 "bt .wa_loop \n" /* yes: loop */ 1207 "bt .wa_loop \n"
1076 : /* outputs */ 1208 : /* outputs */
1077 [src] "+r"(_src), 1209 [src] "+r"(_src),
1078 [rnd] "+r"(_gray_random_buffer), 1210 [rnd] "+r"(_gray_random_buffer),
@@ -1095,79 +1227,79 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1095 /* set the bits for all 8 pixels in all bytes according to the 1227 /* set the bits for all 8 pixels in all bytes according to the
1096 * precalculated patterns on the pattern stack */ 1228 * precalculated patterns on the pattern stack */
1097 asm volatile ( 1229 asm volatile (
1098 "mov.l @%[patp]+,r1\n" /* pop all 8 patterns */ 1230 "mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
1099 "mov.l @%[patp]+,r2\n" 1231 "mov.l @%[patp]+,r2 \n"
1100 "mov.l @%[patp]+,r3\n" 1232 "mov.l @%[patp]+,r3 \n"
1101 "mov.l @%[patp]+,r6\n" 1233 "mov.l @%[patp]+,r6 \n"
1102 "mov.l @%[patp]+,r7\n" 1234 "mov.l @%[patp]+,r7 \n"
1103 "mov.l @%[patp]+,r8\n" 1235 "mov.l @%[patp]+,r8 \n"
1104 "mov.l @%[patp]+,r9\n" 1236 "mov.l @%[patp]+,r9 \n"
1105 "mov.l @%[patp],r10\n" 1237 "mov.l @%[patp],r10 \n"
1106 1238
1107 "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */ 1239 "not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */
1108 "extu.b %[mask],%[mask] \n" /* mask out high bits */ 1240 "extu.b %[mask],%[mask] \n" /* mask out high bits */
1109 "tst %[mask],%[mask] \n" /* nothing to keep? */ 1241 "tst %[mask],%[mask] \n"
1110 "bt .wa_sloop \n" /* yes: jump to short loop */ 1242 "bt .wa_sloop \n" /* short loop if nothing to keep */
1111 1243
1112 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1244 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1113 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1245 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
1114 "rotcl r0 \n" /* rotate t bit into r0 */ 1246 "rotcl r0 \n" /* rotate t bit into r0 */
1115 "shlr r2 \n" 1247 "shlr r2 \n"
1116 "rotcl r0 \n" 1248 "rotcl r0 \n"
1117 "shlr r3 \n" 1249 "shlr r3 \n"
1118 "rotcl r0 \n" 1250 "rotcl r0 \n"
1119 "shlr r6 \n" 1251 "shlr r6 \n"
1120 "rotcl r0 \n" 1252 "rotcl r0 \n"
1121 "shlr r7 \n" 1253 "shlr r7 \n"
1122 "rotcl r0 \n" 1254 "rotcl r0 \n"
1123 "shlr r8 \n" 1255 "shlr r8 \n"
1124 "rotcl r0 \n" 1256 "rotcl r0 \n"
1125 "shlr r9 \n" 1257 "shlr r9 \n"
1126 "rotcl r0 \n" 1258 "rotcl r0 \n"
1127 "shlr r10 \n" 1259 "shlr r10 \n"
1128 "mov.b @%[addr],%[rx] \n" /* read old value */ 1260 "mov.b @%[addr],%[rx] \n" /* read old value */
1129 "rotcl r0 \n" 1261 "rotcl r0 \n"
1130 "and %[mask],%[rx] \n" /* mask out unneeded bits */ 1262 "and %[mask],%[rx] \n" /* mask out replaced bits */
1131 "or %[rx],r0 \n" /* set new bits */ 1263 "or %[rx],r0 \n" /* set new bits */
1132 "mov.b r0,@%[addr] \n" /* store value to bitplane */ 1264 "mov.b r0,@%[addr] \n" /* store value to bitplane */
1133 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1265 "add %[psiz],%[addr] \n" /* advance to next bitplane */
1134 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1266 "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
1135 "bt .wa_floop \n" /* no: loop */ 1267 "bt .wa_floop \n"
1136 1268
1137 "bra .wa_end \n" 1269 "bra .wa_end \n"
1138 "nop \n" 1270 "nop \n"
1139 1271
1140 /* References to C library routines used in the precalc block */ 1272 /* References to C library routines used in the precalc block */
1141 ".align 2 \n" 1273 ".align 2 \n"
1142 ".ashlsi3: \n" /* C library routine: */ 1274 ".ashlsi3: \n" /* C library routine: */
1143 ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */ 1275 ".long ___ashlsi3 \n" /* shift r4 left by r5, result in r0 */
1144 ".lshrsi3: \n" /* C library routine: */ 1276 ".lshrsi3: \n" /* C library routine: */
1145 ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */ 1277 ".long ___lshrsi3 \n" /* shift r4 right by r5, result in r0 */
1146 /* both routines preserve r4, destroy r5 and take ~16 cycles */ 1278 /* both routines preserve r4, destroy r5 and take ~16 cycles */
1147 1279
1148 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1280 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1149 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */ 1281 "shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
1150 "rotcl r0 \n" /* rotate t bit into r0 */ 1282 "rotcl r0 \n" /* rotate t bit into r0 */
1151 "shlr r2 \n" 1283 "shlr r2 \n"
1152 "rotcl r0 \n" 1284 "rotcl r0 \n"
1153 "shlr r3 \n" 1285 "shlr r3 \n"
1154 "rotcl r0 \n" 1286 "rotcl r0 \n"
1155 "shlr r6 \n" 1287 "shlr r6 \n"
1156 "rotcl r0 \n" 1288 "rotcl r0 \n"
1157 "shlr r7 \n" 1289 "shlr r7 \n"
1158 "rotcl r0 \n" 1290 "rotcl r0 \n"
1159 "shlr r8 \n" 1291 "shlr r8 \n"
1160 "rotcl r0 \n" 1292 "rotcl r0 \n"
1161 "shlr r9 \n" 1293 "shlr r9 \n"
1162 "rotcl r0 \n" 1294 "rotcl r0 \n"
1163 "shlr r10 \n" 1295 "shlr r10 \n"
1164 "rotcl r0 \n" 1296 "rotcl r0 \n"
1165 "mov.b r0,@%[addr] \n" /* store byte to bitplane */ 1297 "mov.b r0,@%[addr] \n" /* store byte to bitplane */
1166 "add %[psiz],%[addr] \n" /* advance to next bitplane */ 1298 "add %[psiz],%[addr] \n" /* advance to next bitplane */
1167 "cmp/hi %[addr],%[end] \n" /* last bitplane done? */ 1299 "cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
1168 "bt .wa_sloop \n" /* no: loop */ 1300 "bt .wa_sloop \n"
1169 1301
1170 ".wa_end: \n" 1302 ".wa_end: \n"
1171 : /* outputs */ 1303 : /* outputs */
1172 [addr]"+r"(addr), 1304 [addr]"+r"(addr),
1173 [mask]"+r"(_mask), 1305 [mask]"+r"(_mask),
@@ -1189,43 +1321,43 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1189 /* precalculate the bit patterns with random shifts 1321 /* precalculate the bit patterns with random shifts
1190 for all 8 pixels and put them on an extra "stack" */ 1322 for all 8 pixels and put them on an extra "stack" */
1191 asm volatile ( 1323 asm volatile (
1192 "moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */ 1324 "moveq.l #8,%%d3 \n" /* loop count */
1193 1325
1194 ".wa_loop: \n" /** load pattern for pixel **/ 1326 ".wa_loop: \n" /** load pattern for pixel **/
1195 "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */ 1327 "clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */
1196 "lsr.l #1,%[mask] \n" /* shift out lsb of mask */ 1328 "lsr.l #1,%[mask] \n" /* shift out lsb of mask */
1197 "bcc.b .wa_skip \n" /* skip this pixel */ 1329 "bcc.b .wa_skip \n" /* skip this pixel */
1198 1330
1199 "clr.l %%d0 \n" 1331 "clr.l %%d0 \n"
1200 "move.b (%[src]),%%d0 \n" /* load src byte */ 1332 "move.b (%[src]),%%d0 \n" /* load src byte */
1201 "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */ 1333 "move.b (%%d0:l:1,%[trns]),%%d0\n" /* idxtable into pattern index */
1202 "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */ 1334 "move.l (%%d0:l:4,%[bpat]),%%d2\n" /* d2 = bitpattern[byte]; */
1203 1335
1204 "mulu.w #75,%[rnd] \n" /* multiply by 75 */ 1336 "mulu.w #75,%[rnd] \n" /* multiply by 75 */
1205 "add.l #74,%[rnd] \n" /* add another 74 */ 1337 "add.l #74,%[rnd] \n" /* add another 74 */
1206 /* Since the lower bits are not very random: */ 1338 /* Since the lower bits are not very random: */
1207 "move.l %[rnd],%%d1 \n" 1339 "move.l %[rnd],%%d1 \n"
1208 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */ 1340 "lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
1209 "and.l %[rmsk],%%d1\n" /* mask out unneeded bits */ 1341 "and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
1210 1342
1211 "cmp.l %[dpth],%%d1\n" /* random >= depth ? */ 1343 "cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
1212 "blo.b .wa_ntrim \n" 1344 "blo.b .wa_ntrim \n"
1213 "sub.l %[dpth],%%d1\n" /* yes: random -= depth; */ 1345 "sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
1214 ".wa_ntrim: \n" 1346 ".wa_ntrim: \n"
1215 1347
1216 "move.l %%d2,%%d0 \n" 1348 "move.l %%d2,%%d0 \n" /** rotate pattern **/
1217 "lsl.l %%d1,%%d0 \n" 1349 "lsl.l %%d1,%%d0 \n"
1218 "sub.l %[dpth],%%d1\n" 1350 "sub.l %[dpth],%%d1 \n"
1219 "neg.l %%d1 \n" /* d1 = depth - d1 */ 1351 "neg.l %%d1 \n" /* d1 = depth - d1 */
1220 "lsr.l %%d1,%%d2 \n" 1352 "lsr.l %%d1,%%d2 \n"
1221 "or.l %%d0,%%d2 \n" 1353 "or.l %%d0,%%d2 \n"
1222 1354
1223 ".wa_skip: \n" 1355 ".wa_skip: \n"
1224 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */ 1356 "move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
1225 1357
1226 "add.l %[stri],%[src] \n" /* src += stride; */ 1358 "add.l %[stri],%[src] \n" /* src += stride; */
1227 "subq.l #1,%%d3 \n" /* decrease loop count */ 1359 "subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
1228 "bne.b .wa_loop \n" /* yes: loop */ 1360 "bne.b .wa_loop \n"
1229 : /* outputs */ 1361 : /* outputs */
1230 [src] "+a"(_src), 1362 [src] "+a"(_src),
1231 [patp]"+a"(pat_ptr), 1363 [patp]"+a"(pat_ptr),
@@ -1250,78 +1382,76 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1250 asm volatile ( 1382 asm volatile (
1251 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n" 1383 "movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
1252 /* pop all 8 patterns */ 1384 /* pop all 8 patterns */
1253 "not.l %[mask] \n" /* "set" mask -> "keep" mask */ 1385 "not.l %[mask] \n" /* "set" mask -> "keep" mask */
1254 "and.l #0xFF,%[mask] \n" 1386 "and.l #0xFF,%[mask] \n"
1255 "beq.b .wa_sstart \n" /* yes: jump to short loop */ 1387 "beq.b .wa_sstart \n" /* short loop if nothing to keep */
1256 1388
1257 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1389 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1258 "clr.l %%d0 \n" 1390 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1259 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1391 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1260 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1392 "lsr.l #1,%%d3 \n"
1261 "lsr.l #1,%%d3 \n" 1393 "addx.l %%d0,%%d0 \n"
1262 "addx.l %%d0,%%d0 \n" 1394 "lsr.l #1,%%d4 \n"
1263 "lsr.l #1,%%d4 \n" 1395 "addx.l %%d0,%%d0 \n"
1264 "addx.l %%d0,%%d0 \n" 1396 "lsr.l #1,%%d5 \n"
1265 "lsr.l #1,%%d5 \n" 1397 "addx.l %%d0,%%d0 \n"
1266 "addx.l %%d0,%%d0 \n" 1398 "lsr.l #1,%%d6 \n"
1267 "lsr.l #1,%%d6 \n" 1399 "addx.l %%d0,%%d0 \n"
1268 "addx.l %%d0,%%d0 \n" 1400 "move.l %%a0,%%d1 \n"
1269 "move.l %%a0,%%d1 \n" 1401 "lsr.l #1,%%d1 \n"
1270 "lsr.l #1,%%d1 \n" 1402 "addx.l %%d0,%%d0 \n"
1271 "addx.l %%d0,%%d0 \n" 1403 "move.l %%d1,%%a0 \n"
1272 "move.l %%d1,%%a0 \n" 1404 "move.l %%a1,%%d1 \n"
1273 "move.l %%a1,%%d1 \n" 1405 "lsr.l #1,%%d1 \n"
1274 "lsr.l #1,%%d1 \n" 1406 "addx.l %%d0,%%d0 \n"
1275 "addx.l %%d0,%%d0 \n" 1407 "move.l %%d1,%%a1 \n"
1276 "move.l %%d1,%%a1 \n" 1408 "move.l %[ax],%%d1 \n"
1277 "move.l %[ax],%%d1 \n" 1409 "lsr.l #1,%%d1 \n"
1278 "lsr.l #1,%%d1 \n" 1410 "addx.l %%d0,%%d0 \n"
1279 "addx.l %%d0,%%d0 \n" 1411 "move.l %%d1,%[ax] \n"
1280 "move.l %%d1,%[ax] \n"
1281 1412
1282 "move.b (%[addr]),%%d1 \n" /* read old value */ 1413 "move.b (%[addr]),%%d1 \n" /* read old value */
1283 "and.l %[mask],%%d1 \n" /* mask out unneeded bits */ 1414 "and.l %[mask],%%d1 \n" /* mask out replaced bits */
1284 "or.l %%d0,%%d1 \n" /* set new bits */ 1415 "or.l %%d0,%%d1 \n" /* set new bits */
1285 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */ 1416 "move.b %%d1,(%[addr]) \n" /* store value to bitplane */
1286 1417
1287 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1418 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1288 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1419 "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
1289 "bhi.b .wa_floop \n" /* no: loop */ 1420 "bhi.b .wa_floop \n"
1290 1421
1291 "bra.b .wa_end \n" 1422 "bra.b .wa_end \n"
1292 1423
1293 ".wa_sstart: \n" 1424 ".wa_sstart: \n"
1294 "move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */ 1425 "move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
1295 1426
1296 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1427 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1297 "clr.l %%d0 \n" 1428 "lsr.l #1,%%d2 \n" /* shift out pattern bit */
1298 "lsr.l #1,%%d2 \n" /* shift out mask bit */ 1429 "addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
1299 "addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */ 1430 "lsr.l #1,%%d3 \n"
1300 "lsr.l #1,%%d3 \n" 1431 "addx.l %%d0,%%d0 \n"
1301 "addx.l %%d0,%%d0 \n" 1432 "lsr.l #1,%%d4 \n"
1302 "lsr.l #1,%%d4 \n" 1433 "addx.l %%d0,%%d0 \n"
1303 "addx.l %%d0,%%d0 \n" 1434 "lsr.l #1,%%d5 \n"
1304 "lsr.l #1,%%d5 \n" 1435 "addx.l %%d0,%%d0 \n"
1305 "addx.l %%d0,%%d0 \n" 1436 "lsr.l #1,%%d6 \n"
1306 "lsr.l #1,%%d6 \n" 1437 "addx.l %%d0,%%d0 \n"
1307 "addx.l %%d0,%%d0 \n" 1438 "lsr.l #1,%[mask] \n"
1308 "lsr.l #1,%[mask] \n" 1439 "addx.l %%d0,%%d0 \n"
1309 "addx.l %%d0,%%d0 \n" 1440 "move.l %%a1,%%d1 \n"
1310 "move.l %%a1,%%d1 \n" 1441 "lsr.l #1,%%d1 \n"
1311 "lsr.l #1,%%d1 \n" 1442 "addx.l %%d0,%%d0 \n"
1312 "addx.l %%d0,%%d0 \n" 1443 "move.l %%d1,%%a1 \n"
1313 "move.l %%d1,%%a1 \n" 1444 "move.l %[ax],%%d1 \n"
1314 "move.l %[ax],%%d1 \n" 1445 "lsr.l #1,%%d1 \n"
1315 "lsr.l #1,%%d1 \n" 1446 "addx.l %%d0,%%d0 \n"
1316 "addx.l %%d0,%%d0 \n" 1447 "move.l %%d1,%[ax] \n"
1317 "move.l %%d1,%[ax] \n"
1318 1448
1319 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */ 1449 "move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
1320 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */ 1450 "add.l %[psiz],%[addr] \n" /* advance to next bitplane */
1321 "cmp.l %[addr],%[end] \n" /* last bitplane done? */ 1451 "cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
1322 "bhi.b .wa_sloop \n" /* no: loop */ 1452 "bhi.b .wa_sloop \n"
1323 1453
1324 ".wa_end: \n" 1454 ".wa_end: \n"
1325 : /* outputs */ 1455 : /* outputs */
1326 [addr]"+a"(addr), 1456 [addr]"+a"(addr),
1327 [mask]"+d"(_mask), 1457 [mask]"+d"(_mask),