summaryrefslogtreecommitdiff
path: root/apps/plugins/lib/gray_draw.c
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2006-08-11 23:40:05 +0000
committerJens Arnold <amiconn@rockbox.org>2006-08-11 23:40:05 +0000
commite6ed58f6c545719ed804c9ad513496f8d0fa0286 (patch)
treedb096eb64de3ec7c33682c5bb50579784c5ca633 /apps/plugins/lib/gray_draw.c
parent22860a910a7ddbb9b811fce4cf971e097c8e6eac (diff)
downloadrockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.tar.gz
rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.zip
Better implementation of the new greyscale algorithm. A bit faster (+2% on iPod and Archos, +7% on Iriver H1x0), and more straightforward. * Fixed garbage display when a plugin switches on the overlay before drawing anything.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10534 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/lib/gray_draw.c')
-rw-r--r--apps/plugins/lib/gray_draw.c494
1 files changed, 233 insertions, 261 deletions
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c
index dcc65bdd09..bd8ea4f1ce 100644
--- a/apps/plugins/lib/gray_draw.c
+++ b/apps/plugins/lib/gray_draw.c
@@ -1002,103 +1002,94 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1002 "and r0, r0, %[rx] \n" 1002 "and r0, r0, %[rx] \n"
1003 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 1003 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
1004 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1004 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1005
1006 "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
1007 "and r0, r0, #7 \n"
1008 "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
1009 "mov r8, r8, lsr #8 \n" /* r8: never reached */
1010 "mov r7, r7, lsr #8 \n"
1011 "mov r6, r6, lsr #8 \n"
1012 "mov r5, r5, lsr #8 \n"
1013 "mov r4, r4, lsr #8 \n"
1014 "mov r3, r3, lsr #8 \n"
1015 "mov r2, r2, lsr #8 \n"
1016 "mov r1, r1, lsr #8 \n"
1005 1017
1006 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 1018 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
1007 "ands %[mask], %[mask], #0xff \n" 1019 "ands %[mask], %[mask], #0xff \n"
1008 "beq .wa_sloop \n" /* short loop if no bits to keep */ 1020 "beq .wa_sstart \n" /* short loop if no bits to keep */
1009
1010 ".wa_floop: \n" /** full loop (bits to keep)**/
1011 "cmp %[dpth], #8 \n" /* 8 planes or more left? */
1012 "bhs .wa_f8 \n"
1013 1021
1014 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ 1022 "ldrb r0, [pc, r0] \n" /* jump into full loop */
1015 "add %[addr], %[addr], r0 \n" /* for this round */
1016
1017
1018 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
1019 "add pc, pc, r0 \n" 1023 "add pc, pc, r0 \n"
1020 ".wa_ftable: \n" 1024 ".wa_ftable: \n"
1021 ".byte .wa_f0 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ 1025 ".byte .wa_f1 - .wa_ftable - 4 \n" /* [jump tables are tricky] */
1022 ".byte .wa_f1 - .wa_ftable - 4 \n"
1023 ".byte .wa_f2 - .wa_ftable - 4 \n" 1026 ".byte .wa_f2 - .wa_ftable - 4 \n"
1024 ".byte .wa_f3 - .wa_ftable - 4 \n" 1027 ".byte .wa_f3 - .wa_ftable - 4 \n"
1025 ".byte .wa_f4 - .wa_ftable - 4 \n" 1028 ".byte .wa_f4 - .wa_ftable - 4 \n"
1026 ".byte .wa_f5 - .wa_ftable - 4 \n" 1029 ".byte .wa_f5 - .wa_ftable - 4 \n"
1027 ".byte .wa_f6 - .wa_ftable - 4 \n" 1030 ".byte .wa_f6 - .wa_ftable - 4 \n"
1028 ".byte .wa_f7 - .wa_ftable - 4 \n" 1031 ".byte .wa_f7 - .wa_ftable - 4 \n"
1032 ".byte .wa_f8 - .wa_ftable - 4 \n"
1029 1033
1034 ".wa_floop: \n" /** full loop (bits to keep)**/
1030 ".wa_f8: \n" 1035 ".wa_f8: \n"
1031 "add %[addr], %[addr], %[psiz], lsl #3 \n" 1036 "ldrb r0, [%[addr]] \n" /* load old byte */
1032 /* Point behind the last plane for this round. Note: We're using the
1033 * registers backwards in order to reuse the streak for the last round.
1034 * Therefore we need to go thru the bitplanes backwards too, otherwise
1035 * the bit order would be destroyed which results in more flicker. */
1036 "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */
1037 "and r0, r0, %[mask] \n" /* mask out replaced bits */ 1037 "and r0, r0, %[mask] \n" /* mask out replaced bits */
1038 "orr r0, r0, r8 \n" /* set new bits */ 1038 "orr r0, r0, r1 \n" /* set new bits */
1039 "strb r0, [%[addr]] \n" /* store byte */ 1039 "strb r0, [%[addr]], %[psiz] \n" /* store byte */
1040 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ 1040 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
1041 ".wa_f7: \n" 1041 ".wa_f7: \n"
1042 "ldrb r0, [%[addr], -%[psiz]]! \n" 1042 "ldrb r0, [%[addr]] \n"
1043 "and r0, r0, %[mask] \n" 1043 "and r0, r0, %[mask] \n"
1044 "orr r0, r0, r7 \n" 1044 "orr r0, r0, r2 \n"
1045 "strb r0, [%[addr]] \n" 1045 "strb r0, [%[addr]], %[psiz] \n"
1046 "mov r7, r7, lsr #8 \n" 1046 "mov r2, r2, lsr #8 \n"
1047 ".wa_f6: \n" 1047 ".wa_f6: \n"
1048 "ldrb r0, [%[addr], -%[psiz]]! \n" 1048 "ldrb r0, [%[addr]] \n"
1049 "and r0, r0, %[mask] \n" 1049 "and r0, r0, %[mask] \n"
1050 "orr r0, r0, r6 \n" 1050 "orr r0, r0, r3 \n"
1051 "strb r0, [%[addr]] \n" 1051 "strb r0, [%[addr]], %[psiz] \n"
1052 "mov r6, r6, lsr #8 \n" 1052 "mov r3, r3, lsr #8 \n"
1053 ".wa_f5: \n" 1053 ".wa_f5: \n"
1054 "ldrb r0, [%[addr], -%[psiz]]! \n" 1054 "ldrb r0, [%[addr]] \n"
1055 "and r0, r0, %[mask] \n"
1056 "orr r0, r0, r5 \n"
1057 "strb r0, [%[addr]] \n"
1058 "mov r5, r5, lsr #8 \n"
1059 ".wa_f4: \n"
1060 "ldrb r0, [%[addr], -%[psiz]]! \n"
1061 "and r0, r0, %[mask] \n" 1055 "and r0, r0, %[mask] \n"
1062 "orr r0, r0, r4 \n" 1056 "orr r0, r0, r4 \n"
1063 "strb r0, [%[addr]] \n" 1057 "strb r0, [%[addr]], %[psiz] \n"
1064 "mov r4, r4, lsr #8 \n" 1058 "mov r4, r4, lsr #8 \n"
1059 ".wa_f4: \n"
1060 "ldrb r0, [%[addr]] \n"
1061 "and r0, r0, %[mask] \n"
1062 "orr r0, r0, r5 \n"
1063 "strb r0, [%[addr]], %[psiz] \n"
1064 "mov r5, r5, lsr #8 \n"
1065 ".wa_f3: \n" 1065 ".wa_f3: \n"
1066 "ldrb r0, [%[addr], -%[psiz]]! \n" 1066 "ldrb r0, [%[addr]] \n"
1067 "and r0, r0, %[mask] \n" 1067 "and r0, r0, %[mask] \n"
1068 "orr r0, r0, r3 \n" 1068 "orr r0, r0, r6 \n"
1069 "strb r0, [%[addr]] \n" 1069 "strb r0, [%[addr]], %[psiz] \n"
1070 "mov r3, r3, lsr #8 \n" 1070 "mov r6, r6, lsr #8 \n"
1071 ".wa_f2: \n" 1071 ".wa_f2: \n"
1072 "ldrb r0, [%[addr], -%[psiz]]! \n" 1072 "ldrb r0, [%[addr]] \n"
1073 "and r0, r0, %[mask] \n" 1073 "and r0, r0, %[mask] \n"
1074 "orr r0, r0, r2 \n" 1074 "orr r0, r0, r7 \n"
1075 "strb r0, [%[addr]] \n" 1075 "strb r0, [%[addr]], %[psiz] \n"
1076 "mov r2, r2, lsr #8 \n" 1076 "mov r7, r7, lsr #8 \n"
1077 ".wa_f1: \n" 1077 ".wa_f1: \n"
1078 "ldrb r0, [%[addr], -%[psiz]]! \n" 1078 "ldrb r0, [%[addr]] \n"
1079 "and r0, r0, %[mask] \n" 1079 "and r0, r0, %[mask] \n"
1080 "orr r0, r0, r1 \n" 1080 "orr r0, r0, r8 \n"
1081 "strb r0, [%[addr]] \n" 1081 "strb r0, [%[addr]], %[psiz] \n"
1082 "mov r1, r1, lsr #8 \n" 1082 "mov r8, r8, lsr #8 \n"
1083 ".wa_f0: \n"
1084 1083
1085 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
1086 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 1084 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
1087 "bhi .wa_floop \n" 1085 "bhi .wa_floop \n"
1088 1086
1089 "b .wa_end \n" 1087 "b .wa_end \n"
1090 1088
1091 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1089 ".wa_sstart: \n"
1092 "cmp %[dpth], #8 \n" /* 8 planes or more left? */ 1090 "ldrb r0, [pc, r0] \n" /* jump into short loop*/
1093 "bhs .wa_s8 \n"
1094
1095 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */
1096 "add %[addr], %[addr], r0 \n" /* for this round */
1097
1098 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
1099 "add pc, pc, r0 \n" 1091 "add pc, pc, r0 \n"
1100 ".wa_stable: \n" 1092 ".wa_stable: \n"
1101 ".byte .wa_s0 - .wa_stable - 4 \n"
1102 ".byte .wa_s1 - .wa_stable - 4 \n" 1093 ".byte .wa_s1 - .wa_stable - 4 \n"
1103 ".byte .wa_s2 - .wa_stable - 4 \n" 1094 ".byte .wa_s2 - .wa_stable - 4 \n"
1104 ".byte .wa_s3 - .wa_stable - 4 \n" 1095 ".byte .wa_s3 - .wa_stable - 4 \n"
@@ -1106,36 +1097,34 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1106 ".byte .wa_s5 - .wa_stable - 4 \n" 1097 ".byte .wa_s5 - .wa_stable - 4 \n"
1107 ".byte .wa_s6 - .wa_stable - 4 \n" 1098 ".byte .wa_s6 - .wa_stable - 4 \n"
1108 ".byte .wa_s7 - .wa_stable - 4 \n" 1099 ".byte .wa_s7 - .wa_stable - 4 \n"
1100 ".byte .wa_s8 - .wa_stable - 4 \n"
1109 1101
1102 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1110 ".wa_s8: \n" 1103 ".wa_s8: \n"
1111 "add %[addr], %[addr], %[psiz], lsl #3 \n" 1104 "strb r1, [%[addr]], %[psiz] \n" /* store byte */
1112 /* Point behind the last plane for this round. See above. */ 1105 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
1113 "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */
1114 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */
1115 ".wa_s7: \n" 1106 ".wa_s7: \n"
1116 "strb r7, [%[addr], -%[psiz]]! \n" 1107 "strb r2, [%[addr]], %[psiz] \n"
1117 "mov r7, r7, lsr #8 \n" 1108 "mov r2, r2, lsr #8 \n"
1118 ".wa_s6: \n" 1109 ".wa_s6: \n"
1119 "strb r6, [%[addr], -%[psiz]]! \n" 1110 "strb r3, [%[addr]], %[psiz] \n"
1120 "mov r6, r6, lsr #8 \n" 1111 "mov r3, r3, lsr #8 \n"
1121 ".wa_s5: \n" 1112 ".wa_s5: \n"
1122 "strb r5, [%[addr], -%[psiz]]! \n" 1113 "strb r4, [%[addr]], %[psiz] \n"
1123 "mov r5, r5, lsr #8 \n"
1124 ".wa_s4: \n"
1125 "strb r4, [%[addr], -%[psiz]]! \n"
1126 "mov r4, r4, lsr #8 \n" 1114 "mov r4, r4, lsr #8 \n"
1115 ".wa_s4: \n"
1116 "strb r5, [%[addr]], %[psiz] \n"
1117 "mov r5, r5, lsr #8 \n"
1127 ".wa_s3: \n" 1118 ".wa_s3: \n"
1128 "strb r3, [%[addr], -%[psiz]]! \n" 1119 "strb r6, [%[addr]], %[psiz] \n"
1129 "mov r3, r3, lsr #8 \n" 1120 "mov r6, r6, lsr #8 \n"
1130 ".wa_s2: \n" 1121 ".wa_s2: \n"
1131 "strb r2, [%[addr], -%[psiz]]! \n" 1122 "strb r7, [%[addr]], %[psiz] \n"
1132 "mov r2, r2, lsr #8 \n" 1123 "mov r7, r7, lsr #8 \n"
1133 ".wa_s1: \n" 1124 ".wa_s1: \n"
1134 "strb r1, [%[addr], -%[psiz]]! \n" 1125 "strb r8, [%[addr]], %[psiz] \n"
1135 "mov r1, r1, lsr #8 \n" 1126 "mov r8, r8, lsr #8 \n"
1136 ".wa_s0: \n"
1137 1127
1138 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
1139 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 1128 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
1140 "bhi .wa_sloop \n" 1129 "bhi .wa_sloop \n"
1141 1130
@@ -1187,7 +1176,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1187 1176
1188 /* set the bits for all 8 pixels in all bytes according to the 1177 /* set the bits for all 8 pixels in all bytes according to the
1189 * precalculated patterns on the pattern stack */ 1178 * precalculated patterns on the pattern stack */
1190 test = 1; 1179 test = 1 << ((-_gray_info.depth) & 7);
1191 mask = (~mask & 0xff); 1180 mask = (~mask & 0xff);
1192 if (mask == 0) 1181 if (mask == 0)
1193 { 1182 {
@@ -1483,28 +1472,40 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1483 "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 1472 "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
1484 "shlr r0 \n" 1473 "shlr r0 \n"
1485 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1474 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1475
1476 "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/
1477 "add #-1, %[rx] \n"
1478 "mov #7, r0 \n"
1479 "and r0, %[rx] \n"
1480 "mova .wa_pshift, r0 \n"
1481 "add %[rx], r0 \n"
1482 "add %[rx], r0 \n"
1483 "jmp @r0 \n" /* jump into shift streak */
1484 "nop \n"
1485
1486 ".align 2 \n"
1487 ".wa_pshift: \n"
1488 "shlr8 r7 \n"
1489 "shlr8 r6 \n"
1490 "shlr8 r5 \n"
1491 "shlr8 r4 \n"
1492 "shlr8 r3 \n"
1493 "shlr8 r2 \n"
1494 "shlr8 r1 \n"
1486 1495
1487 "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 1496 "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
1488 "extu.b %[mask], %[mask] \n" /* mask out high bits */ 1497 "extu.b %[mask], %[mask] \n" /* mask out high bits */
1489 "tst %[mask], %[mask] \n" 1498 "tst %[mask], %[mask] \n"
1490 "bt .wa_sloop \n" /* short loop if nothing to keep */ 1499 "bt .wa_sstart \n" /* short loop if nothing to keep */
1491
1492 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1493 "mov #8, r0 \n"
1494 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */
1495 "bt .wa_f8 \n"
1496 1500
1497 "mulu %[psiz], %[dpth] \n" 1501 "mova .wa_ftable, r0 \n" /* jump into full loop */
1498 "mova .wa_ftable, r0 \n" 1502 "mov.b @(r0, %[rx]), %[rx] \n"
1499 "mov.b @(r0, %[dpth]), %[rx] \n"
1500 "add %[rx], r0 \n" 1503 "add %[rx], r0 \n"
1501 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1504 "jmp @r0 \n"
1502 "jmp @r0 \n" /* jump into streak */ 1505 "nop \n"
1503 "add %[rx], %[addr] \n" /* ..for this round */ 1506
1504
1505 ".align 2 \n" 1507 ".align 2 \n"
1506 ".wa_ftable: \n" 1508 ".wa_ftable: \n"
1507 ".byte .wa_f0 - .wa_ftable \n"
1508 ".byte .wa_f1 - .wa_ftable \n" 1509 ".byte .wa_f1 - .wa_ftable \n"
1509 ".byte .wa_f2 - .wa_ftable \n" 1510 ".byte .wa_f2 - .wa_ftable \n"
1510 ".byte .wa_f3 - .wa_ftable \n" 1511 ".byte .wa_f3 - .wa_ftable \n"
@@ -1512,74 +1513,66 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1512 ".byte .wa_f5 - .wa_ftable \n" 1513 ".byte .wa_f5 - .wa_ftable \n"
1513 ".byte .wa_f6 - .wa_ftable \n" 1514 ".byte .wa_f6 - .wa_ftable \n"
1514 ".byte .wa_f7 - .wa_ftable \n" 1515 ".byte .wa_f7 - .wa_ftable \n"
1516 ".byte .wa_f8 - .wa_ftable \n"
1515 1517
1518 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1516 ".wa_f8: \n" 1519 ".wa_f8: \n"
1517 "mov %[psiz], %[rx] \n"
1518 "shll2 %[rx] \n"
1519 "add %[rx], %[rx] \n"
1520 "add %[rx], %[addr] \n"
1521 /* Point behind the last plane for this round. Note: We're using the
1522 * registers backwards in order to reuse the streak for the last round.
1523 * Therefore we need to go thru the bitplanes backwards too, otherwise
1524 * the bit order would be destroyed which results in more flicker. */
1525 "sub %[psiz], %[addr] \n"
1526 "mov.b @%[addr], r0 \n" /* load old byte */ 1520 "mov.b @%[addr], r0 \n" /* load old byte */
1527 "and %[mask], r0 \n" /* mask out replaced bits */ 1521 "and %[mask], r0 \n" /* mask out replaced bits */
1528 "or r8, r0 \n" /* set new bits */ 1522 "or r1, r0 \n" /* set new bits */
1529 "mov.b r0, @%[addr] \n" /* store byte */ 1523 "mov.b r0, @%[addr] \n" /* store byte */
1530 "shlr8 r8 \n" /* shift out used-up byte */ 1524 "add %[psiz], %[addr] \n"
1525 "shlr8 r1 \n" /* shift out used-up byte */
1531 ".wa_f7: \n" 1526 ".wa_f7: \n"
1532 "sub %[psiz], %[addr] \n"
1533 "mov.b @%[addr], r0 \n" 1527 "mov.b @%[addr], r0 \n"
1534 "and %[mask], r0 \n" 1528 "and %[mask], r0 \n"
1535 "or r7, r0 \n" 1529 "or r2, r0 \n"
1536 "mov.b r0, @%[addr] \n" 1530 "mov.b r0, @%[addr] \n"
1537 "shlr8 r7 \n" 1531 "add %[psiz], %[addr] \n"
1532 "shlr8 r2 \n"
1538 ".wa_f6: \n" 1533 ".wa_f6: \n"
1539 "sub %[psiz], %[addr] \n"
1540 "mov.b @%[addr], r0 \n" 1534 "mov.b @%[addr], r0 \n"
1541 "and %[mask], r0 \n" 1535 "and %[mask], r0 \n"
1542 "or r6, r0 \n" 1536 "or r3, r0 \n"
1543 "mov.b r0, @%[addr] \n" 1537 "mov.b r0, @%[addr] \n"
1544 "shlr8 r6 \n" 1538 "add %[psiz], %[addr] \n"
1539 "shlr8 r3 \n"
1545 ".wa_f5: \n" 1540 ".wa_f5: \n"
1546 "sub %[psiz], %[addr] \n"
1547 "mov.b @%[addr], r0 \n" 1541 "mov.b @%[addr], r0 \n"
1548 "and %[mask], r0 \n" 1542 "and %[mask], r0 \n"
1549 "or r5, r0 \n" 1543 "or r4, r0 \n"
1550 "mov.b r0, @%[addr] \n" 1544 "mov.b r0, @%[addr] \n"
1551 "shlr8 r5 \n" 1545 "add %[psiz], %[addr] \n"
1546 "shlr8 r4 \n"
1552 ".wa_f4: \n" 1547 ".wa_f4: \n"
1553 "sub %[psiz], %[addr] \n"
1554 "mov.b @%[addr], r0 \n" 1548 "mov.b @%[addr], r0 \n"
1555 "and %[mask], r0 \n" 1549 "and %[mask], r0 \n"
1556 "or r4, r0 \n" 1550 "or r5, r0 \n"
1557 "mov.b r0, @%[addr] \n" 1551 "mov.b r0, @%[addr] \n"
1558 "shlr8 r4 \n" 1552 "add %[psiz], %[addr] \n"
1553 "shlr8 r5 \n"
1559 ".wa_f3: \n" 1554 ".wa_f3: \n"
1560 "sub %[psiz], %[addr] \n"
1561 "mov.b @%[addr], r0 \n" 1555 "mov.b @%[addr], r0 \n"
1562 "and %[mask], r0 \n" 1556 "and %[mask], r0 \n"
1563 "or r3, r0 \n" 1557 "or r6, r0 \n"
1564 "mov.b r0, @%[addr] \n" 1558 "mov.b r0, @%[addr] \n"
1565 "shlr8 r3 \n" 1559 "add %[psiz], %[addr] \n"
1560 "shlr8 r6 \n"
1566 ".wa_f2: \n" 1561 ".wa_f2: \n"
1567 "sub %[psiz], %[addr] \n"
1568 "mov.b @%[addr], r0 \n" 1562 "mov.b @%[addr], r0 \n"
1569 "and %[mask], r0 \n" 1563 "and %[mask], r0 \n"
1570 "or r2, r0 \n" 1564 "or r7, r0 \n"
1571 "mov.b r0, @%[addr] \n" 1565 "mov.b r0, @%[addr] \n"
1572 "shlr8 r2 \n" 1566 "add %[psiz], %[addr] \n"
1567 "shlr8 r7 \n"
1573 ".wa_f1: \n" 1568 ".wa_f1: \n"
1574 "sub %[psiz], %[addr] \n"
1575 "mov.b @%[addr], r0 \n" 1569 "mov.b @%[addr], r0 \n"
1576 "and %[mask], r0 \n" 1570 "and %[mask], r0 \n"
1577 "or r1, r0 \n" 1571 "or r8, r0 \n"
1578 "mov.b r0, @%[addr] \n" 1572 "mov.b r0, @%[addr] \n"
1579 "shlr8 r1 \n" 1573 "add %[psiz], %[addr] \n"
1580 ".wa_f0: \n" 1574 "shlr8 r8 \n"
1581 1575
1582 "add %[rx], %[addr] \n" /* correct address */
1583 "add #-8, %[dpth] \n" 1576 "add #-8, %[dpth] \n"
1584 "cmp/pl %[dpth] \n" /* next round if anything left */ 1577 "cmp/pl %[dpth] \n" /* next round if anything left */
1585 "bt .wa_floop \n" 1578 "bt .wa_floop \n"
@@ -1603,22 +1596,15 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1603 ".wa_mask1: \n" 1596 ".wa_mask1: \n"
1604 ".long 0xAAAAAAAA \n" 1597 ".long 0xAAAAAAAA \n"
1605 1598
1606 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1599 ".wa_sstart: \n"
1607 "mov #8, r0 \n" 1600 "mova .wa_stable, r0 \n" /* jump into short loop */
1608 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1601 "mov.b @(r0, %[rx]), %[rx] \n"
1609 "bt .wa_s8 \n"
1610
1611 "mulu %[psiz], %[dpth] \n"
1612 "mova .wa_stable, r0 \n"
1613 "mov.b @(r0, %[dpth]), %[rx] \n"
1614 "add %[rx], r0 \n" 1602 "add %[rx], r0 \n"
1615 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1603 "jmp @r0 \n"
1616 "jmp @r0 \n" /* jump into streak */ 1604 "nop \n"
1617 "add %[rx], %[addr] \n" /* ..for this round */
1618 1605
1619 ".align 2 \n" 1606 ".align 2 \n"
1620 ".wa_stable: \n" 1607 ".wa_stable: \n"
1621 ".byte .wa_s0 - .wa_stable \n"
1622 ".byte .wa_s1 - .wa_stable \n" 1608 ".byte .wa_s1 - .wa_stable \n"
1623 ".byte .wa_s2 - .wa_stable \n" 1609 ".byte .wa_s2 - .wa_stable \n"
1624 ".byte .wa_s3 - .wa_stable \n" 1610 ".byte .wa_s3 - .wa_stable \n"
@@ -1626,47 +1612,42 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1626 ".byte .wa_s5 - .wa_stable \n" 1612 ".byte .wa_s5 - .wa_stable \n"
1627 ".byte .wa_s6 - .wa_stable \n" 1613 ".byte .wa_s6 - .wa_stable \n"
1628 ".byte .wa_s7 - .wa_stable \n" 1614 ".byte .wa_s7 - .wa_stable \n"
1615 ".byte .wa_s8 - .wa_stable \n"
1629 1616
1617 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1630 ".wa_s8: \n" 1618 ".wa_s8: \n"
1631 "mov %[psiz], %[rx] \n" /* Point behind the last plane */ 1619 "mov.b r1, @%[addr] \n" /* store byte */
1632 "shll2 %[rx] \n" /* for this round. */ 1620 "add %[psiz], %[addr] \n"
1633 "add %[rx], %[rx] \n" /* See above. */ 1621 "shlr8 r1 \n" /* shift out used-up byte */
1634 "add %[rx], %[addr] \n"
1635
1636 "sub %[psiz], %[addr] \n"
1637 "mov.b r8, @%[addr] \n" /* store byte */
1638 "shlr8 r8 \n" /* shift out used-up byte */
1639 ".wa_s7: \n" 1622 ".wa_s7: \n"
1640 "sub %[psiz], %[addr] \n" 1623 "mov.b r2, @%[addr] \n"
1641 "mov.b r7, @%[addr] \n" 1624 "add %[psiz], %[addr] \n"
1642 "shlr8 r7 \n" 1625 "shlr8 r2 \n"
1643 ".wa_s6: \n" 1626 ".wa_s6: \n"
1644 "sub %[psiz], %[addr] \n" 1627 "mov.b r3, @%[addr] \n"
1645 "mov.b r6, @%[addr] \n" 1628 "add %[psiz], %[addr] \n"
1646 "shlr8 r6 \n" 1629 "shlr8 r3 \n"
1647 ".wa_s5: \n" 1630 ".wa_s5: \n"
1648 "sub %[psiz], %[addr] \n"
1649 "mov.b r5, @%[addr] \n"
1650 "shlr8 r5 \n"
1651 ".wa_s4: \n"
1652 "sub %[psiz], %[addr] \n"
1653 "mov.b r4, @%[addr] \n" 1631 "mov.b r4, @%[addr] \n"
1632 "add %[psiz], %[addr] \n"
1654 "shlr8 r4 \n" 1633 "shlr8 r4 \n"
1634 ".wa_s4: \n"
1635 "mov.b r5, @%[addr] \n"
1636 "add %[psiz], %[addr] \n"
1637 "shlr8 r5 \n"
1655 ".wa_s3: \n" 1638 ".wa_s3: \n"
1656 "sub %[psiz], %[addr] \n" 1639 "mov.b r6, @%[addr] \n"
1657 "mov.b r3, @%[addr] \n" 1640 "add %[psiz], %[addr] \n"
1658 "shlr8 r3 \n" 1641 "shlr8 r6 \n"
1659 ".wa_s2: \n" 1642 ".wa_s2: \n"
1660 "sub %[psiz], %[addr] \n" 1643 "mov.b r7, @%[addr] \n"
1661 "mov.b r2, @%[addr] \n" 1644 "add %[psiz], %[addr] \n"
1662 "shlr8 r2 \n" 1645 "shlr8 r7 \n"
1663 ".wa_s1: \n" 1646 ".wa_s1: \n"
1664 "sub %[psiz], %[addr] \n" 1647 "mov.b r8, @%[addr] \n"
1665 "mov.b r1, @%[addr] \n" 1648 "add %[psiz], %[addr] \n"
1666 "shlr8 r1 \n" 1649 "shlr8 r8 \n"
1667 ".wa_s0: \n"
1668 1650
1669 "add %[rx], %[addr] \n" /* correct address */
1670 "add #-8, %[dpth] \n" 1651 "add #-8, %[dpth] \n"
1671 "cmp/pl %[dpth] \n" /* next round if anything left */ 1652 "cmp/pl %[dpth] \n" /* next round if anything left */
1672 "bt .wa_sloop \n" 1653 "bt .wa_sloop \n"
@@ -1853,172 +1834,163 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1853 "move.l %%a0, %%d5 \n" 1834 "move.l %%a0, %%d5 \n"
1854 "eor.l %%d5, %%d0 \n" 1835 "eor.l %%d5, %%d0 \n"
1855 "and.l #0xAAAAAAAA, %%d0 \n" 1836 "and.l #0xAAAAAAAA, %%d0 \n"
1856 "eor.l %%d0, %%d5 \n" 1837 "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */
1857 "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ 1838 /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */
1858 "lsr.l #1, %%d0 \n" 1839 "lsr.l #1, %%d0 \n"
1859 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ 1840 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */
1841
1842 "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/
1843 "subq.l #1, %%d0 \n"
1844 "and.l #7, %%d0 \n"
1845 "move.l %%d0, %%a0 \n"
1846 "move.l %[ax], %%d0 \n" /* all data in D registers */
1847 "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */
1848 "lsr.l #8, %%d2 \n"
1849 "lsr.l #8, %%d3 \n"
1850 "lsr.l #8, %%d4 \n"
1851 "lsr.l #8, %%d0 \n"
1852 "lsr.l #8, %%d6 \n"
1853 "lsr.l #8, %%d7 \n"
1854 "lsr.l #8, %%d5 \n"
1855 "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */
1856 "move.l %%a0, %%d0 \n" /* keep the value for later */
1857 "move.l %%d5, %%a0 \n" /* ..into their A registers */
1860 1858
1861 "tst.l %[mask] \n" 1859 "tst.l %[mask] \n"
1862 "jeq .wa_sloop \n" /* short loop if nothing to keep */ 1860 "jeq .wa_sstart \n" /* short loop if nothing to keep */
1863 1861
1864 "move.l %[mask], %%d5 \n" /* need mask in data reg. */ 1862 "move.l %[mask], %%d5 \n" /* need mask in data reg. */
1865 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ 1863 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */
1866 1864
1867 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1865 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */
1868 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1866 "bra.s .wa_f1 \n"
1869 "bhs.s .wa_f8 \n"
1870
1871 "move.l %[psiz], %%d0 \n"
1872 "move.l %[dpth], %%d1 \n"
1873 "mulu.w %%d1, %%d0 \n" /* point behind the last plane */
1874 "add.l %%d0, %[addr] \n" /* for this round */
1875 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1876 "bra.s .wa_f1 \n" /* dpth == 0 should never happen */
1877 "bra.s .wa_f2 \n" 1867 "bra.s .wa_f2 \n"
1878 "bra.s .wa_f3 \n" 1868 "bra.s .wa_f3 \n"
1879 "bra.s .wa_f4 \n" 1869 "bra.s .wa_f4 \n"
1880 "bra.s .wa_f5 \n" 1870 "bra.s .wa_f5 \n"
1881 "bra.s .wa_f6 \n" 1871 "bra.s .wa_f6 \n"
1882 "bra.s .wa_f7 \n" 1872 "bra.s .wa_f7 \n"
1873 /* bra.s .wa_f8 */ /* identical with target */
1883 1874
1875 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1884 ".wa_f8: \n" 1876 ".wa_f8: \n"
1885 "move.l %[psiz], %%d0 \n"
1886 "lsl.l #3, %%d0 \n"
1887 "add.l %%d0, %[addr] \n"
1888 /* Point behind the last plane for this round. Note: We're using the
1889 * registers backwards in order to reuse the streak for the last round.
1890 * Therefore we need to go thru the bitplanes backwards too, otherwise
1891 * the bit order would be destroyed which results in more flicker. */
1892 "sub.l %[psiz], %[addr] \n"
1893 "move.b (%[addr]), %%d0 \n" /* load old byte */ 1877 "move.b (%[addr]), %%d0 \n" /* load old byte */
1894 "and.l %%d5, %%d0 \n" /* mask out replaced bits */ 1878 "and.l %%d5, %%d0 \n" /* mask out replaced bits */
1895 "move.l %[mask], %%d1 \n" 1879 "move.l %%a0, %%d1 \n"
1896 "or.l %%d1, %%d0 \n" /* set new bits */ 1880 "or.l %%d1, %%d0 \n" /* set new bits */
1897 "move.b %%d0, (%[addr]) \n" /* store byte */ 1881 "move.b %%d0, (%[addr]) \n" /* store byte */
1882 "add.l %[psiz], %[addr] \n"
1898 "lsr.l #8, %%d1 \n" /* shift out used-up byte */ 1883 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1899 "move.l %%d1, %[mask] \n" 1884 "move.l %%d1, %%a0 \n"
1900 ".wa_f7: \n" 1885 ".wa_f7: \n"
1901 "sub.l %[psiz], %[addr] \n"
1902 "move.b (%[addr]), %%d0 \n" 1886 "move.b (%[addr]), %%d0 \n"
1903 "and.l %%d5, %%d0 \n" 1887 "and.l %%d5, %%d0 \n"
1904 "or.l %%d2, %%d0 \n" 1888 "or.l %%d7, %%d0 \n"
1905 "move.b %%d0, (%[addr]) \n" 1889 "move.b %%d0, (%[addr]) \n"
1906 "lsr.l #8, %%d2 \n" 1890 "add.l %[psiz], %[addr] \n"
1891 "lsr.l #8, %%d7 \n"
1907 ".wa_f6: \n" 1892 ".wa_f6: \n"
1908 "sub.l %[psiz], %[addr] \n"
1909 "move.b (%[addr]), %%d0 \n" 1893 "move.b (%[addr]), %%d0 \n"
1910 "and.l %%d5, %%d0 \n" 1894 "and.l %%d5, %%d0 \n"
1911 "or.l %%d3, %%d0 \n" 1895 "or.l %%d6, %%d0 \n"
1912 "move.b %%d0, (%[addr]) \n" 1896 "move.b %%d0, (%[addr]) \n"
1913 "lsr.l #8, %%d3 \n" 1897 "add.l %[psiz], %[addr] \n"
1898 "lsr.l #8, %%d6 \n"
1914 ".wa_f5: \n" 1899 ".wa_f5: \n"
1915 "sub.l %[psiz], %[addr] \n"
1916 "move.b (%[addr]), %%d0 \n"
1917 "and.l %%d5, %%d0 \n"
1918 "or.l %%d4, %%d0 \n"
1919 "move.b %%d0, (%[addr]) \n"
1920 "lsr.l #8, %%d4 \n"
1921 ".wa_f4: \n"
1922 "sub.l %[psiz], %[addr] \n"
1923 "move.b (%[addr]), %%d0 \n" 1900 "move.b (%[addr]), %%d0 \n"
1924 "and.l %%d5, %%d0 \n" 1901 "and.l %%d5, %%d0 \n"
1925 "move.l %[ax], %%d1 \n" 1902 "move.l %[ax], %%d1 \n"
1926 "or.l %%d1, %%d0 \n" 1903 "or.l %%d1, %%d0 \n"
1927 "move.b %%d0, (%[addr]) \n" 1904 "move.b %%d0, (%[addr]) \n"
1905 "add.l %[psiz], %[addr] \n"
1928 "lsr.l #8, %%d1 \n" 1906 "lsr.l #8, %%d1 \n"
1929 "move.l %%d1, %[ax] \n" 1907 "move.l %%d1, %[ax] \n"
1908 ".wa_f4: \n"
1909 "move.b (%[addr]), %%d0 \n"
1910 "and.l %%d5, %%d0 \n"
1911 "or.l %%d4, %%d0 \n"
1912 "move.b %%d0, (%[addr]) \n"
1913 "add.l %[psiz], %[addr] \n"
1914 "lsr.l #8, %%d4 \n"
1930 ".wa_f3: \n" 1915 ".wa_f3: \n"
1931 "sub.l %[psiz], %[addr] \n"
1932 "move.b (%[addr]), %%d0 \n" 1916 "move.b (%[addr]), %%d0 \n"
1933 "and.l %%d5, %%d0 \n" 1917 "and.l %%d5, %%d0 \n"
1934 "or.l %%d6, %%d0 \n" 1918 "or.l %%d3, %%d0 \n"
1935 "move.b %%d0, (%[addr]) \n" 1919 "move.b %%d0, (%[addr]) \n"
1936 "lsr.l #8, %%d6 \n" 1920 "add.l %[psiz], %[addr] \n"
1921 "lsr.l #8, %%d3 \n"
1937 ".wa_f2: \n" 1922 ".wa_f2: \n"
1938 "sub.l %[psiz], %[addr] \n"
1939 "move.b (%[addr]), %%d0 \n" 1923 "move.b (%[addr]), %%d0 \n"
1940 "and.l %%d5, %%d0 \n" 1924 "and.l %%d5, %%d0 \n"
1941 "or.l %%d7, %%d0 \n" 1925 "or.l %%d2, %%d0 \n"
1942 "move.b %%d0, (%[addr]) \n" 1926 "move.b %%d0, (%[addr]) \n"
1943 "lsr.l #8, %%d7 \n" 1927 "add.l %[psiz], %[addr] \n"
1928 "lsr.l #8, %%d2 \n"
1944 ".wa_f1: \n" 1929 ".wa_f1: \n"
1945 "sub.l %[psiz], %[addr] \n"
1946 "move.b (%[addr]), %%d0 \n" 1930 "move.b (%[addr]), %%d0 \n"
1947 "and.l %%d5, %%d0 \n" 1931 "and.l %%d5, %%d0 \n"
1948 "move.l %%a0, %%d1 \n" 1932 "move.l %[mask], %%d1 \n"
1949 "or.l %%d1, %%d0 \n" 1933 "or.l %%d1, %%d0 \n"
1950 "move.b %%d0, (%[addr]) \n" 1934 "move.b %%d0, (%[addr]) \n"
1935 "add.l %[psiz], %[addr] \n"
1951 "lsr.l #8, %%d1 \n" 1936 "lsr.l #8, %%d1 \n"
1952 "move.l %%d1, %%a0 \n" 1937 "move.l %%d1, %[mask] \n"
1953 1938
1954 "move.l %[psiz], %%d0 \n"
1955 "lsl.l #3, %%d0 \n"
1956 "add.l %%d0, %[addr] \n" /* correct address */
1957 "subq.l #8, %[dpth] \n" 1939 "subq.l #8, %[dpth] \n"
1958 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1940 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1959 "jgt .wa_floop \n" /* next round if anything left */ 1941 "jgt .wa_floop \n" /* next round if anything left */
1960 1942
1961 "jra .wa_end \n" 1943 "jra .wa_end \n"
1962 1944
1963 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1945 ".wa_sstart: \n"
1964 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1946 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */
1965 "bhs.s .wa_s8 \n" 1947 "bra.s .wa_s1 \n"
1966
1967 "move.l %[psiz], %%d0 \n"
1968 "move.l %[dpth], %%d5 \n"
1969 "mulu.w %%d5, %%d0 \n" /* point behind the last plane */
1970 "add.l %%d0, %[addr] \n" /* for this round */
1971 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1972 "bra.s .wa_s1 \n" /* dpth == 0 should never happen */
1973 "bra.s .wa_s2 \n" 1948 "bra.s .wa_s2 \n"
1974 "bra.s .wa_s3 \n" 1949 "bra.s .wa_s3 \n"
1975 "bra.s .wa_s4 \n" 1950 "bra.s .wa_s4 \n"
1976 "bra.s .wa_s5 \n" 1951 "bra.s .wa_s5 \n"
1977 "bra.s .wa_s6 \n" 1952 "bra.s .wa_s6 \n"
1978 "bra.s .wa_s7 \n" 1953 "bra.s .wa_s7 \n"
1954 /* bra.s .wa_s8 */ /* identical with target */
1979 1955
1956 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1980 ".wa_s8: \n" 1957 ".wa_s8: \n"
1981 "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ 1958 "move.l %%a0, %%d5 \n"
1982 "lsl.l #3, %%d0 \n" /* for this round. */ 1959 "move.b %%d5, (%[addr]) \n" /* store byte */
1983 "add.l %%d0, %[addr] \n" /* See above. */ 1960 "add.l %[psiz], %[addr] \n"
1984 1961 "lsr.l #8, %%d5 \n" /* shift out used-up byte */
1985 "sub.l %[psiz], %[addr] \n" 1962 "move.l %%d5, %%a0 \n"
1986 "move.b %%d1, (%[addr]) \n" /* store byte */
1987 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1988 ".wa_s7: \n" 1963 ".wa_s7: \n"
1989 "sub.l %[psiz], %[addr] \n" 1964 "move.b %%d7, (%[addr]) \n"
1990 "move.b %%d2, (%[addr]) \n" 1965 "add.l %[psiz], %[addr] \n"
1991 "lsr.l #8, %%d2 \n" 1966 "lsr.l #8, %%d7 \n"
1992 ".wa_s6: \n" 1967 ".wa_s6: \n"
1993 "sub.l %[psiz], %[addr] \n" 1968 "move.b %%d6, (%[addr]) \n"
1994 "move.b %%d3, (%[addr]) \n" 1969 "add.l %[psiz], %[addr] \n"
1995 "lsr.l #8, %%d3 \n" 1970 "lsr.l #8, %%d6 \n"
1996 ".wa_s5: \n" 1971 ".wa_s5: \n"
1997 "sub.l %[psiz], %[addr] \n"
1998 "move.b %%d4, (%[addr]) \n"
1999 "lsr.l #8, %%d4 \n"
2000 ".wa_s4: \n"
2001 "sub.l %[psiz], %[addr] \n"
2002 "move.l %[ax], %%d5 \n" 1972 "move.l %[ax], %%d5 \n"
2003 "move.b %%d5, (%[addr]) \n" 1973 "move.b %%d5, (%[addr]) \n"
1974 "add.l %[psiz], %[addr] \n"
2004 "lsr.l #8, %%d5 \n" 1975 "lsr.l #8, %%d5 \n"
2005 "move.l %%d5, %[ax] \n" 1976 "move.l %%d5, %[ax] \n"
1977 ".wa_s4: \n"
1978 "move.b %%d4, (%[addr]) \n"
1979 "add.l %[psiz], %[addr] \n"
1980 "lsr.l #8, %%d4 \n"
2006 ".wa_s3: \n" 1981 ".wa_s3: \n"
2007 "sub.l %[psiz], %[addr] \n" 1982 "move.b %%d3, (%[addr]) \n"
2008 "move.b %%d6, (%[addr]) \n" 1983 "add.l %[psiz], %[addr] \n"
2009 "lsr.l #8, %%d6 \n" 1984 "lsr.l #8, %%d3 \n"
2010 ".wa_s2: \n" 1985 ".wa_s2: \n"
2011 "sub.l %[psiz], %[addr] \n" 1986 "move.b %%d2, (%[addr]) \n"
2012 "move.b %%d7, (%[addr]) \n" 1987 "add.l %[psiz], %[addr] \n"
2013 "lsr.l #8, %%d7 \n" 1988 "lsr.l #8, %%d2 \n"
2014 ".wa_s1: \n" 1989 ".wa_s1: \n"
2015 "sub.l %[psiz], %[addr] \n" 1990 "move.b %%d1, (%[addr]) \n"
2016 "move.l %%a0, %%d5 \n" 1991 "add.l %[psiz], %[addr] \n"
2017 "move.b %%d5, (%[addr]) \n" 1992 "lsr.l #8, %%d1 \n"
2018 "lsr.l #8, %%d5 \n"
2019 "move.l %%d5, %%a0 \n"
2020 1993
2021 "add.l %%d0, %[addr] \n" /* correct address */
2022 "subq.l #8, %[dpth] \n" 1994 "subq.l #8, %[dpth] \n"
2023 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1995 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
2024 "jgt .wa_sloop \n" /* next round if anything left */ 1996 "jgt .wa_sloop \n" /* next round if anything left */
@@ -2071,7 +2043,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
2071 2043
2072 /* set the bits for all 8 pixels in all bytes according to the 2044 /* set the bits for all 8 pixels in all bytes according to the
2073 * precalculated patterns on the pattern stack */ 2045 * precalculated patterns on the pattern stack */
2074 test = 1; 2046 test = 1 << ((-_gray_info.depth) & 7);
2075 mask = (~mask & 0xff); 2047 mask = (~mask & 0xff);
2076 if (mask == 0) 2048 if (mask == 0)
2077 { 2049 {