diff options
author | Jens Arnold <amiconn@rockbox.org> | 2006-08-11 23:40:05 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2006-08-11 23:40:05 +0000 |
commit | e6ed58f6c545719ed804c9ad513496f8d0fa0286 (patch) | |
tree | db096eb64de3ec7c33682c5bb50579784c5ca633 /apps/plugins/lib/gray_draw.c | |
parent | 22860a910a7ddbb9b811fce4cf971e097c8e6eac (diff) | |
download | rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.tar.gz rockbox-e6ed58f6c545719ed804c9ad513496f8d0fa0286.zip |
Better implementation of the new greyscale algorithm. A bit faster (+2% on iPod and Archos, +7% on Iriver H1x0), and more straightforward. * Fixed garbage display when a plugin switches on the overlay before drawing anything.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10534 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/plugins/lib/gray_draw.c')
-rw-r--r-- | apps/plugins/lib/gray_draw.c | 494 |
1 files changed, 233 insertions, 261 deletions
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c index dcc65bdd09..bd8ea4f1ce 100644 --- a/apps/plugins/lib/gray_draw.c +++ b/apps/plugins/lib/gray_draw.c | |||
@@ -1002,103 +1002,94 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1002 | "and r0, r0, %[rx] \n" | 1002 | "and r0, r0, %[rx] \n" |
1003 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 1003 | "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
1004 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1004 | "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1005 | |||
1006 | "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/ | ||
1007 | "and r0, r0, #7 \n" | ||
1008 | "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */ | ||
1009 | "mov r8, r8, lsr #8 \n" /* r8: never reached */ | ||
1010 | "mov r7, r7, lsr #8 \n" | ||
1011 | "mov r6, r6, lsr #8 \n" | ||
1012 | "mov r5, r5, lsr #8 \n" | ||
1013 | "mov r4, r4, lsr #8 \n" | ||
1014 | "mov r3, r3, lsr #8 \n" | ||
1015 | "mov r2, r2, lsr #8 \n" | ||
1016 | "mov r1, r1, lsr #8 \n" | ||
1005 | 1017 | ||
1006 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 1018 | "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
1007 | "ands %[mask], %[mask], #0xff \n" | 1019 | "ands %[mask], %[mask], #0xff \n" |
1008 | "beq .wa_sloop \n" /* short loop if no bits to keep */ | 1020 | "beq .wa_sstart \n" /* short loop if no bits to keep */ |
1009 | |||
1010 | ".wa_floop: \n" /** full loop (bits to keep)**/ | ||
1011 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | ||
1012 | "bhs .wa_f8 \n" | ||
1013 | 1021 | ||
1014 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | 1022 | "ldrb r0, [pc, r0] \n" /* jump into full loop */ |
1015 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
1016 | |||
1017 | |||
1018 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
1019 | "add pc, pc, r0 \n" | 1023 | "add pc, pc, r0 \n" |
1020 | ".wa_ftable: \n" | 1024 | ".wa_ftable: \n" |
1021 | ".byte .wa_f0 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ | 1025 | ".byte .wa_f1 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ |
1022 | ".byte .wa_f1 - .wa_ftable - 4 \n" | ||
1023 | ".byte .wa_f2 - .wa_ftable - 4 \n" | 1026 | ".byte .wa_f2 - .wa_ftable - 4 \n" |
1024 | ".byte .wa_f3 - .wa_ftable - 4 \n" | 1027 | ".byte .wa_f3 - .wa_ftable - 4 \n" |
1025 | ".byte .wa_f4 - .wa_ftable - 4 \n" | 1028 | ".byte .wa_f4 - .wa_ftable - 4 \n" |
1026 | ".byte .wa_f5 - .wa_ftable - 4 \n" | 1029 | ".byte .wa_f5 - .wa_ftable - 4 \n" |
1027 | ".byte .wa_f6 - .wa_ftable - 4 \n" | 1030 | ".byte .wa_f6 - .wa_ftable - 4 \n" |
1028 | ".byte .wa_f7 - .wa_ftable - 4 \n" | 1031 | ".byte .wa_f7 - .wa_ftable - 4 \n" |
1032 | ".byte .wa_f8 - .wa_ftable - 4 \n" | ||
1029 | 1033 | ||
1034 | ".wa_floop: \n" /** full loop (bits to keep)**/ | ||
1030 | ".wa_f8: \n" | 1035 | ".wa_f8: \n" |
1031 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 1036 | "ldrb r0, [%[addr]] \n" /* load old byte */ |
1032 | /* Point behind the last plane for this round. Note: We're using the | ||
1033 | * registers backwards in order to reuse the streak for the last round. | ||
1034 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1035 | * the bit order would be destroyed which results in more flicker. */ | ||
1036 | "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */ | ||
1037 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ | 1037 | "and r0, r0, %[mask] \n" /* mask out replaced bits */ |
1038 | "orr r0, r0, r8 \n" /* set new bits */ | 1038 | "orr r0, r0, r1 \n" /* set new bits */ |
1039 | "strb r0, [%[addr]] \n" /* store byte */ | 1039 | "strb r0, [%[addr]], %[psiz] \n" /* store byte */ |
1040 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | 1040 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
1041 | ".wa_f7: \n" | 1041 | ".wa_f7: \n" |
1042 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1042 | "ldrb r0, [%[addr]] \n" |
1043 | "and r0, r0, %[mask] \n" | 1043 | "and r0, r0, %[mask] \n" |
1044 | "orr r0, r0, r7 \n" | 1044 | "orr r0, r0, r2 \n" |
1045 | "strb r0, [%[addr]] \n" | 1045 | "strb r0, [%[addr]], %[psiz] \n" |
1046 | "mov r7, r7, lsr #8 \n" | 1046 | "mov r2, r2, lsr #8 \n" |
1047 | ".wa_f6: \n" | 1047 | ".wa_f6: \n" |
1048 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1048 | "ldrb r0, [%[addr]] \n" |
1049 | "and r0, r0, %[mask] \n" | 1049 | "and r0, r0, %[mask] \n" |
1050 | "orr r0, r0, r6 \n" | 1050 | "orr r0, r0, r3 \n" |
1051 | "strb r0, [%[addr]] \n" | 1051 | "strb r0, [%[addr]], %[psiz] \n" |
1052 | "mov r6, r6, lsr #8 \n" | 1052 | "mov r3, r3, lsr #8 \n" |
1053 | ".wa_f5: \n" | 1053 | ".wa_f5: \n" |
1054 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1054 | "ldrb r0, [%[addr]] \n" |
1055 | "and r0, r0, %[mask] \n" | ||
1056 | "orr r0, r0, r5 \n" | ||
1057 | "strb r0, [%[addr]] \n" | ||
1058 | "mov r5, r5, lsr #8 \n" | ||
1059 | ".wa_f4: \n" | ||
1060 | "ldrb r0, [%[addr], -%[psiz]]! \n" | ||
1061 | "and r0, r0, %[mask] \n" | 1055 | "and r0, r0, %[mask] \n" |
1062 | "orr r0, r0, r4 \n" | 1056 | "orr r0, r0, r4 \n" |
1063 | "strb r0, [%[addr]] \n" | 1057 | "strb r0, [%[addr]], %[psiz] \n" |
1064 | "mov r4, r4, lsr #8 \n" | 1058 | "mov r4, r4, lsr #8 \n" |
1059 | ".wa_f4: \n" | ||
1060 | "ldrb r0, [%[addr]] \n" | ||
1061 | "and r0, r0, %[mask] \n" | ||
1062 | "orr r0, r0, r5 \n" | ||
1063 | "strb r0, [%[addr]], %[psiz] \n" | ||
1064 | "mov r5, r5, lsr #8 \n" | ||
1065 | ".wa_f3: \n" | 1065 | ".wa_f3: \n" |
1066 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1066 | "ldrb r0, [%[addr]] \n" |
1067 | "and r0, r0, %[mask] \n" | 1067 | "and r0, r0, %[mask] \n" |
1068 | "orr r0, r0, r3 \n" | 1068 | "orr r0, r0, r6 \n" |
1069 | "strb r0, [%[addr]] \n" | 1069 | "strb r0, [%[addr]], %[psiz] \n" |
1070 | "mov r3, r3, lsr #8 \n" | 1070 | "mov r6, r6, lsr #8 \n" |
1071 | ".wa_f2: \n" | 1071 | ".wa_f2: \n" |
1072 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1072 | "ldrb r0, [%[addr]] \n" |
1073 | "and r0, r0, %[mask] \n" | 1073 | "and r0, r0, %[mask] \n" |
1074 | "orr r0, r0, r2 \n" | 1074 | "orr r0, r0, r7 \n" |
1075 | "strb r0, [%[addr]] \n" | 1075 | "strb r0, [%[addr]], %[psiz] \n" |
1076 | "mov r2, r2, lsr #8 \n" | 1076 | "mov r7, r7, lsr #8 \n" |
1077 | ".wa_f1: \n" | 1077 | ".wa_f1: \n" |
1078 | "ldrb r0, [%[addr], -%[psiz]]! \n" | 1078 | "ldrb r0, [%[addr]] \n" |
1079 | "and r0, r0, %[mask] \n" | 1079 | "and r0, r0, %[mask] \n" |
1080 | "orr r0, r0, r1 \n" | 1080 | "orr r0, r0, r8 \n" |
1081 | "strb r0, [%[addr]] \n" | 1081 | "strb r0, [%[addr]], %[psiz] \n" |
1082 | "mov r1, r1, lsr #8 \n" | 1082 | "mov r8, r8, lsr #8 \n" |
1083 | ".wa_f0: \n" | ||
1084 | 1083 | ||
1085 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
1086 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 1084 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
1087 | "bhi .wa_floop \n" | 1085 | "bhi .wa_floop \n" |
1088 | 1086 | ||
1089 | "b .wa_end \n" | 1087 | "b .wa_end \n" |
1090 | 1088 | ||
1091 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1089 | ".wa_sstart: \n" |
1092 | "cmp %[dpth], #8 \n" /* 8 planes or more left? */ | 1090 | "ldrb r0, [pc, r0] \n" /* jump into short loop*/ |
1093 | "bhs .wa_s8 \n" | ||
1094 | |||
1095 | "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ | ||
1096 | "add %[addr], %[addr], r0 \n" /* for this round */ | ||
1097 | |||
1098 | "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */ | ||
1099 | "add pc, pc, r0 \n" | 1091 | "add pc, pc, r0 \n" |
1100 | ".wa_stable: \n" | 1092 | ".wa_stable: \n" |
1101 | ".byte .wa_s0 - .wa_stable - 4 \n" | ||
1102 | ".byte .wa_s1 - .wa_stable - 4 \n" | 1093 | ".byte .wa_s1 - .wa_stable - 4 \n" |
1103 | ".byte .wa_s2 - .wa_stable - 4 \n" | 1094 | ".byte .wa_s2 - .wa_stable - 4 \n" |
1104 | ".byte .wa_s3 - .wa_stable - 4 \n" | 1095 | ".byte .wa_s3 - .wa_stable - 4 \n" |
@@ -1106,36 +1097,34 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1106 | ".byte .wa_s5 - .wa_stable - 4 \n" | 1097 | ".byte .wa_s5 - .wa_stable - 4 \n" |
1107 | ".byte .wa_s6 - .wa_stable - 4 \n" | 1098 | ".byte .wa_s6 - .wa_stable - 4 \n" |
1108 | ".byte .wa_s7 - .wa_stable - 4 \n" | 1099 | ".byte .wa_s7 - .wa_stable - 4 \n" |
1100 | ".byte .wa_s8 - .wa_stable - 4 \n" | ||
1109 | 1101 | ||
1102 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1110 | ".wa_s8: \n" | 1103 | ".wa_s8: \n" |
1111 | "add %[addr], %[addr], %[psiz], lsl #3 \n" | 1104 | "strb r1, [%[addr]], %[psiz] \n" /* store byte */ |
1112 | /* Point behind the last plane for this round. See above. */ | 1105 | "mov r1, r1, lsr #8 \n" /* shift out used-up byte */ |
1113 | "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */ | ||
1114 | "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ | ||
1115 | ".wa_s7: \n" | 1106 | ".wa_s7: \n" |
1116 | "strb r7, [%[addr], -%[psiz]]! \n" | 1107 | "strb r2, [%[addr]], %[psiz] \n" |
1117 | "mov r7, r7, lsr #8 \n" | 1108 | "mov r2, r2, lsr #8 \n" |
1118 | ".wa_s6: \n" | 1109 | ".wa_s6: \n" |
1119 | "strb r6, [%[addr], -%[psiz]]! \n" | 1110 | "strb r3, [%[addr]], %[psiz] \n" |
1120 | "mov r6, r6, lsr #8 \n" | 1111 | "mov r3, r3, lsr #8 \n" |
1121 | ".wa_s5: \n" | 1112 | ".wa_s5: \n" |
1122 | "strb r5, [%[addr], -%[psiz]]! \n" | 1113 | "strb r4, [%[addr]], %[psiz] \n" |
1123 | "mov r5, r5, lsr #8 \n" | ||
1124 | ".wa_s4: \n" | ||
1125 | "strb r4, [%[addr], -%[psiz]]! \n" | ||
1126 | "mov r4, r4, lsr #8 \n" | 1114 | "mov r4, r4, lsr #8 \n" |
1115 | ".wa_s4: \n" | ||
1116 | "strb r5, [%[addr]], %[psiz] \n" | ||
1117 | "mov r5, r5, lsr #8 \n" | ||
1127 | ".wa_s3: \n" | 1118 | ".wa_s3: \n" |
1128 | "strb r3, [%[addr], -%[psiz]]! \n" | 1119 | "strb r6, [%[addr]], %[psiz] \n" |
1129 | "mov r3, r3, lsr #8 \n" | 1120 | "mov r6, r6, lsr #8 \n" |
1130 | ".wa_s2: \n" | 1121 | ".wa_s2: \n" |
1131 | "strb r2, [%[addr], -%[psiz]]! \n" | 1122 | "strb r7, [%[addr]], %[psiz] \n" |
1132 | "mov r2, r2, lsr #8 \n" | 1123 | "mov r7, r7, lsr #8 \n" |
1133 | ".wa_s1: \n" | 1124 | ".wa_s1: \n" |
1134 | "strb r1, [%[addr], -%[psiz]]! \n" | 1125 | "strb r8, [%[addr]], %[psiz] \n" |
1135 | "mov r1, r1, lsr #8 \n" | 1126 | "mov r8, r8, lsr #8 \n" |
1136 | ".wa_s0: \n" | ||
1137 | 1127 | ||
1138 | "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */ | ||
1139 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ | 1128 | "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ |
1140 | "bhi .wa_sloop \n" | 1129 | "bhi .wa_sloop \n" |
1141 | 1130 | ||
@@ -1187,7 +1176,7 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1187 | 1176 | ||
1188 | /* set the bits for all 8 pixels in all bytes according to the | 1177 | /* set the bits for all 8 pixels in all bytes according to the |
1189 | * precalculated patterns on the pattern stack */ | 1178 | * precalculated patterns on the pattern stack */ |
1190 | test = 1; | 1179 | test = 1 << ((-_gray_info.depth) & 7); |
1191 | mask = (~mask & 0xff); | 1180 | mask = (~mask & 0xff); |
1192 | if (mask == 0) | 1181 | if (mask == 0) |
1193 | { | 1182 | { |
@@ -1483,28 +1472,40 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1483 | "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ | 1472 | "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ |
1484 | "shlr r0 \n" | 1473 | "shlr r0 \n" |
1485 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ | 1474 | "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ |
1475 | |||
1476 | "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/ | ||
1477 | "add #-1, %[rx] \n" | ||
1478 | "mov #7, r0 \n" | ||
1479 | "and r0, %[rx] \n" | ||
1480 | "mova .wa_pshift, r0 \n" | ||
1481 | "add %[rx], r0 \n" | ||
1482 | "add %[rx], r0 \n" | ||
1483 | "jmp @r0 \n" /* jump into shift streak */ | ||
1484 | "nop \n" | ||
1485 | |||
1486 | ".align 2 \n" | ||
1487 | ".wa_pshift: \n" | ||
1488 | "shlr8 r7 \n" | ||
1489 | "shlr8 r6 \n" | ||
1490 | "shlr8 r5 \n" | ||
1491 | "shlr8 r4 \n" | ||
1492 | "shlr8 r3 \n" | ||
1493 | "shlr8 r2 \n" | ||
1494 | "shlr8 r1 \n" | ||
1486 | 1495 | ||
1487 | "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ | 1496 | "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ |
1488 | "extu.b %[mask], %[mask] \n" /* mask out high bits */ | 1497 | "extu.b %[mask], %[mask] \n" /* mask out high bits */ |
1489 | "tst %[mask], %[mask] \n" | 1498 | "tst %[mask], %[mask] \n" |
1490 | "bt .wa_sloop \n" /* short loop if nothing to keep */ | 1499 | "bt .wa_sstart \n" /* short loop if nothing to keep */ |
1491 | |||
1492 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1493 | "mov #8, r0 \n" | ||
1494 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | ||
1495 | "bt .wa_f8 \n" | ||
1496 | 1500 | ||
1497 | "mulu %[psiz], %[dpth] \n" | 1501 | "mova .wa_ftable, r0 \n" /* jump into full loop */ |
1498 | "mova .wa_ftable, r0 \n" | 1502 | "mov.b @(r0, %[rx]), %[rx] \n" |
1499 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1500 | "add %[rx], r0 \n" | 1503 | "add %[rx], r0 \n" |
1501 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1504 | "jmp @r0 \n" |
1502 | "jmp @r0 \n" /* jump into streak */ | 1505 | "nop \n" |
1503 | "add %[rx], %[addr] \n" /* ..for this round */ | 1506 | |
1504 | |||
1505 | ".align 2 \n" | 1507 | ".align 2 \n" |
1506 | ".wa_ftable: \n" | 1508 | ".wa_ftable: \n" |
1507 | ".byte .wa_f0 - .wa_ftable \n" | ||
1508 | ".byte .wa_f1 - .wa_ftable \n" | 1509 | ".byte .wa_f1 - .wa_ftable \n" |
1509 | ".byte .wa_f2 - .wa_ftable \n" | 1510 | ".byte .wa_f2 - .wa_ftable \n" |
1510 | ".byte .wa_f3 - .wa_ftable \n" | 1511 | ".byte .wa_f3 - .wa_ftable \n" |
@@ -1512,74 +1513,66 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1512 | ".byte .wa_f5 - .wa_ftable \n" | 1513 | ".byte .wa_f5 - .wa_ftable \n" |
1513 | ".byte .wa_f6 - .wa_ftable \n" | 1514 | ".byte .wa_f6 - .wa_ftable \n" |
1514 | ".byte .wa_f7 - .wa_ftable \n" | 1515 | ".byte .wa_f7 - .wa_ftable \n" |
1516 | ".byte .wa_f8 - .wa_ftable \n" | ||
1515 | 1517 | ||
1518 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1516 | ".wa_f8: \n" | 1519 | ".wa_f8: \n" |
1517 | "mov %[psiz], %[rx] \n" | ||
1518 | "shll2 %[rx] \n" | ||
1519 | "add %[rx], %[rx] \n" | ||
1520 | "add %[rx], %[addr] \n" | ||
1521 | /* Point behind the last plane for this round. Note: We're using the | ||
1522 | * registers backwards in order to reuse the streak for the last round. | ||
1523 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1524 | * the bit order would be destroyed which results in more flicker. */ | ||
1525 | "sub %[psiz], %[addr] \n" | ||
1526 | "mov.b @%[addr], r0 \n" /* load old byte */ | 1520 | "mov.b @%[addr], r0 \n" /* load old byte */ |
1527 | "and %[mask], r0 \n" /* mask out replaced bits */ | 1521 | "and %[mask], r0 \n" /* mask out replaced bits */ |
1528 | "or r8, r0 \n" /* set new bits */ | 1522 | "or r1, r0 \n" /* set new bits */ |
1529 | "mov.b r0, @%[addr] \n" /* store byte */ | 1523 | "mov.b r0, @%[addr] \n" /* store byte */ |
1530 | "shlr8 r8 \n" /* shift out used-up byte */ | 1524 | "add %[psiz], %[addr] \n" |
1525 | "shlr8 r1 \n" /* shift out used-up byte */ | ||
1531 | ".wa_f7: \n" | 1526 | ".wa_f7: \n" |
1532 | "sub %[psiz], %[addr] \n" | ||
1533 | "mov.b @%[addr], r0 \n" | 1527 | "mov.b @%[addr], r0 \n" |
1534 | "and %[mask], r0 \n" | 1528 | "and %[mask], r0 \n" |
1535 | "or r7, r0 \n" | 1529 | "or r2, r0 \n" |
1536 | "mov.b r0, @%[addr] \n" | 1530 | "mov.b r0, @%[addr] \n" |
1537 | "shlr8 r7 \n" | 1531 | "add %[psiz], %[addr] \n" |
1532 | "shlr8 r2 \n" | ||
1538 | ".wa_f6: \n" | 1533 | ".wa_f6: \n" |
1539 | "sub %[psiz], %[addr] \n" | ||
1540 | "mov.b @%[addr], r0 \n" | 1534 | "mov.b @%[addr], r0 \n" |
1541 | "and %[mask], r0 \n" | 1535 | "and %[mask], r0 \n" |
1542 | "or r6, r0 \n" | 1536 | "or r3, r0 \n" |
1543 | "mov.b r0, @%[addr] \n" | 1537 | "mov.b r0, @%[addr] \n" |
1544 | "shlr8 r6 \n" | 1538 | "add %[psiz], %[addr] \n" |
1539 | "shlr8 r3 \n" | ||
1545 | ".wa_f5: \n" | 1540 | ".wa_f5: \n" |
1546 | "sub %[psiz], %[addr] \n" | ||
1547 | "mov.b @%[addr], r0 \n" | 1541 | "mov.b @%[addr], r0 \n" |
1548 | "and %[mask], r0 \n" | 1542 | "and %[mask], r0 \n" |
1549 | "or r5, r0 \n" | 1543 | "or r4, r0 \n" |
1550 | "mov.b r0, @%[addr] \n" | 1544 | "mov.b r0, @%[addr] \n" |
1551 | "shlr8 r5 \n" | 1545 | "add %[psiz], %[addr] \n" |
1546 | "shlr8 r4 \n" | ||
1552 | ".wa_f4: \n" | 1547 | ".wa_f4: \n" |
1553 | "sub %[psiz], %[addr] \n" | ||
1554 | "mov.b @%[addr], r0 \n" | 1548 | "mov.b @%[addr], r0 \n" |
1555 | "and %[mask], r0 \n" | 1549 | "and %[mask], r0 \n" |
1556 | "or r4, r0 \n" | 1550 | "or r5, r0 \n" |
1557 | "mov.b r0, @%[addr] \n" | 1551 | "mov.b r0, @%[addr] \n" |
1558 | "shlr8 r4 \n" | 1552 | "add %[psiz], %[addr] \n" |
1553 | "shlr8 r5 \n" | ||
1559 | ".wa_f3: \n" | 1554 | ".wa_f3: \n" |
1560 | "sub %[psiz], %[addr] \n" | ||
1561 | "mov.b @%[addr], r0 \n" | 1555 | "mov.b @%[addr], r0 \n" |
1562 | "and %[mask], r0 \n" | 1556 | "and %[mask], r0 \n" |
1563 | "or r3, r0 \n" | 1557 | "or r6, r0 \n" |
1564 | "mov.b r0, @%[addr] \n" | 1558 | "mov.b r0, @%[addr] \n" |
1565 | "shlr8 r3 \n" | 1559 | "add %[psiz], %[addr] \n" |
1560 | "shlr8 r6 \n" | ||
1566 | ".wa_f2: \n" | 1561 | ".wa_f2: \n" |
1567 | "sub %[psiz], %[addr] \n" | ||
1568 | "mov.b @%[addr], r0 \n" | 1562 | "mov.b @%[addr], r0 \n" |
1569 | "and %[mask], r0 \n" | 1563 | "and %[mask], r0 \n" |
1570 | "or r2, r0 \n" | 1564 | "or r7, r0 \n" |
1571 | "mov.b r0, @%[addr] \n" | 1565 | "mov.b r0, @%[addr] \n" |
1572 | "shlr8 r2 \n" | 1566 | "add %[psiz], %[addr] \n" |
1567 | "shlr8 r7 \n" | ||
1573 | ".wa_f1: \n" | 1568 | ".wa_f1: \n" |
1574 | "sub %[psiz], %[addr] \n" | ||
1575 | "mov.b @%[addr], r0 \n" | 1569 | "mov.b @%[addr], r0 \n" |
1576 | "and %[mask], r0 \n" | 1570 | "and %[mask], r0 \n" |
1577 | "or r1, r0 \n" | 1571 | "or r8, r0 \n" |
1578 | "mov.b r0, @%[addr] \n" | 1572 | "mov.b r0, @%[addr] \n" |
1579 | "shlr8 r1 \n" | 1573 | "add %[psiz], %[addr] \n" |
1580 | ".wa_f0: \n" | 1574 | "shlr8 r8 \n" |
1581 | 1575 | ||
1582 | "add %[rx], %[addr] \n" /* correct address */ | ||
1583 | "add #-8, %[dpth] \n" | 1576 | "add #-8, %[dpth] \n" |
1584 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1577 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1585 | "bt .wa_floop \n" | 1578 | "bt .wa_floop \n" |
@@ -1603,22 +1596,15 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1603 | ".wa_mask1: \n" | 1596 | ".wa_mask1: \n" |
1604 | ".long 0xAAAAAAAA \n" | 1597 | ".long 0xAAAAAAAA \n" |
1605 | 1598 | ||
1606 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1599 | ".wa_sstart: \n" |
1607 | "mov #8, r0 \n" | 1600 | "mova .wa_stable, r0 \n" /* jump into short loop */ |
1608 | "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ | 1601 | "mov.b @(r0, %[rx]), %[rx] \n" |
1609 | "bt .wa_s8 \n" | ||
1610 | |||
1611 | "mulu %[psiz], %[dpth] \n" | ||
1612 | "mova .wa_stable, r0 \n" | ||
1613 | "mov.b @(r0, %[dpth]), %[rx] \n" | ||
1614 | "add %[rx], r0 \n" | 1602 | "add %[rx], r0 \n" |
1615 | "sts macl, %[rx] \n" /* point behind the last plane.. */ | 1603 | "jmp @r0 \n" |
1616 | "jmp @r0 \n" /* jump into streak */ | 1604 | "nop \n" |
1617 | "add %[rx], %[addr] \n" /* ..for this round */ | ||
1618 | 1605 | ||
1619 | ".align 2 \n" | 1606 | ".align 2 \n" |
1620 | ".wa_stable: \n" | 1607 | ".wa_stable: \n" |
1621 | ".byte .wa_s0 - .wa_stable \n" | ||
1622 | ".byte .wa_s1 - .wa_stable \n" | 1608 | ".byte .wa_s1 - .wa_stable \n" |
1623 | ".byte .wa_s2 - .wa_stable \n" | 1609 | ".byte .wa_s2 - .wa_stable \n" |
1624 | ".byte .wa_s3 - .wa_stable \n" | 1610 | ".byte .wa_s3 - .wa_stable \n" |
@@ -1626,47 +1612,42 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1626 | ".byte .wa_s5 - .wa_stable \n" | 1612 | ".byte .wa_s5 - .wa_stable \n" |
1627 | ".byte .wa_s6 - .wa_stable \n" | 1613 | ".byte .wa_s6 - .wa_stable \n" |
1628 | ".byte .wa_s7 - .wa_stable \n" | 1614 | ".byte .wa_s7 - .wa_stable \n" |
1615 | ".byte .wa_s8 - .wa_stable \n" | ||
1629 | 1616 | ||
1617 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1630 | ".wa_s8: \n" | 1618 | ".wa_s8: \n" |
1631 | "mov %[psiz], %[rx] \n" /* Point behind the last plane */ | 1619 | "mov.b r1, @%[addr] \n" /* store byte */ |
1632 | "shll2 %[rx] \n" /* for this round. */ | 1620 | "add %[psiz], %[addr] \n" |
1633 | "add %[rx], %[rx] \n" /* See above. */ | 1621 | "shlr8 r1 \n" /* shift out used-up byte */ |
1634 | "add %[rx], %[addr] \n" | ||
1635 | |||
1636 | "sub %[psiz], %[addr] \n" | ||
1637 | "mov.b r8, @%[addr] \n" /* store byte */ | ||
1638 | "shlr8 r8 \n" /* shift out used-up byte */ | ||
1639 | ".wa_s7: \n" | 1622 | ".wa_s7: \n" |
1640 | "sub %[psiz], %[addr] \n" | 1623 | "mov.b r2, @%[addr] \n" |
1641 | "mov.b r7, @%[addr] \n" | 1624 | "add %[psiz], %[addr] \n" |
1642 | "shlr8 r7 \n" | 1625 | "shlr8 r2 \n" |
1643 | ".wa_s6: \n" | 1626 | ".wa_s6: \n" |
1644 | "sub %[psiz], %[addr] \n" | 1627 | "mov.b r3, @%[addr] \n" |
1645 | "mov.b r6, @%[addr] \n" | 1628 | "add %[psiz], %[addr] \n" |
1646 | "shlr8 r6 \n" | 1629 | "shlr8 r3 \n" |
1647 | ".wa_s5: \n" | 1630 | ".wa_s5: \n" |
1648 | "sub %[psiz], %[addr] \n" | ||
1649 | "mov.b r5, @%[addr] \n" | ||
1650 | "shlr8 r5 \n" | ||
1651 | ".wa_s4: \n" | ||
1652 | "sub %[psiz], %[addr] \n" | ||
1653 | "mov.b r4, @%[addr] \n" | 1631 | "mov.b r4, @%[addr] \n" |
1632 | "add %[psiz], %[addr] \n" | ||
1654 | "shlr8 r4 \n" | 1633 | "shlr8 r4 \n" |
1634 | ".wa_s4: \n" | ||
1635 | "mov.b r5, @%[addr] \n" | ||
1636 | "add %[psiz], %[addr] \n" | ||
1637 | "shlr8 r5 \n" | ||
1655 | ".wa_s3: \n" | 1638 | ".wa_s3: \n" |
1656 | "sub %[psiz], %[addr] \n" | 1639 | "mov.b r6, @%[addr] \n" |
1657 | "mov.b r3, @%[addr] \n" | 1640 | "add %[psiz], %[addr] \n" |
1658 | "shlr8 r3 \n" | 1641 | "shlr8 r6 \n" |
1659 | ".wa_s2: \n" | 1642 | ".wa_s2: \n" |
1660 | "sub %[psiz], %[addr] \n" | 1643 | "mov.b r7, @%[addr] \n" |
1661 | "mov.b r2, @%[addr] \n" | 1644 | "add %[psiz], %[addr] \n" |
1662 | "shlr8 r2 \n" | 1645 | "shlr8 r7 \n" |
1663 | ".wa_s1: \n" | 1646 | ".wa_s1: \n" |
1664 | "sub %[psiz], %[addr] \n" | 1647 | "mov.b r8, @%[addr] \n" |
1665 | "mov.b r1, @%[addr] \n" | 1648 | "add %[psiz], %[addr] \n" |
1666 | "shlr8 r1 \n" | 1649 | "shlr8 r8 \n" |
1667 | ".wa_s0: \n" | ||
1668 | 1650 | ||
1669 | "add %[rx], %[addr] \n" /* correct address */ | ||
1670 | "add #-8, %[dpth] \n" | 1651 | "add #-8, %[dpth] \n" |
1671 | "cmp/pl %[dpth] \n" /* next round if anything left */ | 1652 | "cmp/pl %[dpth] \n" /* next round if anything left */ |
1672 | "bt .wa_sloop \n" | 1653 | "bt .wa_sloop \n" |
@@ -1853,172 +1834,163 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
1853 | "move.l %%a0, %%d5 \n" | 1834 | "move.l %%a0, %%d5 \n" |
1854 | "eor.l %%d5, %%d0 \n" | 1835 | "eor.l %%d5, %%d0 \n" |
1855 | "and.l #0xAAAAAAAA, %%d0 \n" | 1836 | "and.l #0xAAAAAAAA, %%d0 \n" |
1856 | "eor.l %%d0, %%d5 \n" | 1837 | "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */ |
1857 | "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ | 1838 | /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */ |
1858 | "lsr.l #1, %%d0 \n" | 1839 | "lsr.l #1, %%d0 \n" |
1859 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ | 1840 | "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ |
1841 | |||
1842 | "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/ | ||
1843 | "subq.l #1, %%d0 \n" | ||
1844 | "and.l #7, %%d0 \n" | ||
1845 | "move.l %%d0, %%a0 \n" | ||
1846 | "move.l %[ax], %%d0 \n" /* all data in D registers */ | ||
1847 | "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */ | ||
1848 | "lsr.l #8, %%d2 \n" | ||
1849 | "lsr.l #8, %%d3 \n" | ||
1850 | "lsr.l #8, %%d4 \n" | ||
1851 | "lsr.l #8, %%d0 \n" | ||
1852 | "lsr.l #8, %%d6 \n" | ||
1853 | "lsr.l #8, %%d7 \n" | ||
1854 | "lsr.l #8, %%d5 \n" | ||
1855 | "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */ | ||
1856 | "move.l %%a0, %%d0 \n" /* keep the value for later */ | ||
1857 | "move.l %%d5, %%a0 \n" /* ..into their A registers */ | ||
1860 | 1858 | ||
1861 | "tst.l %[mask] \n" | 1859 | "tst.l %[mask] \n" |
1862 | "jeq .wa_sloop \n" /* short loop if nothing to keep */ | 1860 | "jeq .wa_sstart \n" /* short loop if nothing to keep */ |
1863 | 1861 | ||
1864 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ | 1862 | "move.l %[mask], %%d5 \n" /* need mask in data reg. */ |
1865 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ | 1863 | "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ |
1866 | 1864 | ||
1867 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | 1865 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */ |
1868 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1866 | "bra.s .wa_f1 \n" |
1869 | "bhs.s .wa_f8 \n" | ||
1870 | |||
1871 | "move.l %[psiz], %%d0 \n" | ||
1872 | "move.l %[dpth], %%d1 \n" | ||
1873 | "mulu.w %%d1, %%d0 \n" /* point behind the last plane */ | ||
1874 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1875 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1876 | "bra.s .wa_f1 \n" /* dpth == 0 should never happen */ | ||
1877 | "bra.s .wa_f2 \n" | 1867 | "bra.s .wa_f2 \n" |
1878 | "bra.s .wa_f3 \n" | 1868 | "bra.s .wa_f3 \n" |
1879 | "bra.s .wa_f4 \n" | 1869 | "bra.s .wa_f4 \n" |
1880 | "bra.s .wa_f5 \n" | 1870 | "bra.s .wa_f5 \n" |
1881 | "bra.s .wa_f6 \n" | 1871 | "bra.s .wa_f6 \n" |
1882 | "bra.s .wa_f7 \n" | 1872 | "bra.s .wa_f7 \n" |
1873 | /* bra.s .wa_f8 */ /* identical with target */ | ||
1883 | 1874 | ||
1875 | ".wa_floop: \n" /** full loop (there are bits to keep)**/ | ||
1884 | ".wa_f8: \n" | 1876 | ".wa_f8: \n" |
1885 | "move.l %[psiz], %%d0 \n" | ||
1886 | "lsl.l #3, %%d0 \n" | ||
1887 | "add.l %%d0, %[addr] \n" | ||
1888 | /* Point behind the last plane for this round. Note: We're using the | ||
1889 | * registers backwards in order to reuse the streak for the last round. | ||
1890 | * Therefore we need to go thru the bitplanes backwards too, otherwise | ||
1891 | * the bit order would be destroyed which results in more flicker. */ | ||
1892 | "sub.l %[psiz], %[addr] \n" | ||
1893 | "move.b (%[addr]), %%d0 \n" /* load old byte */ | 1877 | "move.b (%[addr]), %%d0 \n" /* load old byte */ |
1894 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ | 1878 | "and.l %%d5, %%d0 \n" /* mask out replaced bits */ |
1895 | "move.l %[mask], %%d1 \n" | 1879 | "move.l %%a0, %%d1 \n" |
1896 | "or.l %%d1, %%d0 \n" /* set new bits */ | 1880 | "or.l %%d1, %%d0 \n" /* set new bits */ |
1897 | "move.b %%d0, (%[addr]) \n" /* store byte */ | 1881 | "move.b %%d0, (%[addr]) \n" /* store byte */ |
1882 | "add.l %[psiz], %[addr] \n" | ||
1898 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | 1883 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ |
1899 | "move.l %%d1, %[mask] \n" | 1884 | "move.l %%d1, %%a0 \n" |
1900 | ".wa_f7: \n" | 1885 | ".wa_f7: \n" |
1901 | "sub.l %[psiz], %[addr] \n" | ||
1902 | "move.b (%[addr]), %%d0 \n" | 1886 | "move.b (%[addr]), %%d0 \n" |
1903 | "and.l %%d5, %%d0 \n" | 1887 | "and.l %%d5, %%d0 \n" |
1904 | "or.l %%d2, %%d0 \n" | 1888 | "or.l %%d7, %%d0 \n" |
1905 | "move.b %%d0, (%[addr]) \n" | 1889 | "move.b %%d0, (%[addr]) \n" |
1906 | "lsr.l #8, %%d2 \n" | 1890 | "add.l %[psiz], %[addr] \n" |
1891 | "lsr.l #8, %%d7 \n" | ||
1907 | ".wa_f6: \n" | 1892 | ".wa_f6: \n" |
1908 | "sub.l %[psiz], %[addr] \n" | ||
1909 | "move.b (%[addr]), %%d0 \n" | 1893 | "move.b (%[addr]), %%d0 \n" |
1910 | "and.l %%d5, %%d0 \n" | 1894 | "and.l %%d5, %%d0 \n" |
1911 | "or.l %%d3, %%d0 \n" | 1895 | "or.l %%d6, %%d0 \n" |
1912 | "move.b %%d0, (%[addr]) \n" | 1896 | "move.b %%d0, (%[addr]) \n" |
1913 | "lsr.l #8, %%d3 \n" | 1897 | "add.l %[psiz], %[addr] \n" |
1898 | "lsr.l #8, %%d6 \n" | ||
1914 | ".wa_f5: \n" | 1899 | ".wa_f5: \n" |
1915 | "sub.l %[psiz], %[addr] \n" | ||
1916 | "move.b (%[addr]), %%d0 \n" | ||
1917 | "and.l %%d5, %%d0 \n" | ||
1918 | "or.l %%d4, %%d0 \n" | ||
1919 | "move.b %%d0, (%[addr]) \n" | ||
1920 | "lsr.l #8, %%d4 \n" | ||
1921 | ".wa_f4: \n" | ||
1922 | "sub.l %[psiz], %[addr] \n" | ||
1923 | "move.b (%[addr]), %%d0 \n" | 1900 | "move.b (%[addr]), %%d0 \n" |
1924 | "and.l %%d5, %%d0 \n" | 1901 | "and.l %%d5, %%d0 \n" |
1925 | "move.l %[ax], %%d1 \n" | 1902 | "move.l %[ax], %%d1 \n" |
1926 | "or.l %%d1, %%d0 \n" | 1903 | "or.l %%d1, %%d0 \n" |
1927 | "move.b %%d0, (%[addr]) \n" | 1904 | "move.b %%d0, (%[addr]) \n" |
1905 | "add.l %[psiz], %[addr] \n" | ||
1928 | "lsr.l #8, %%d1 \n" | 1906 | "lsr.l #8, %%d1 \n" |
1929 | "move.l %%d1, %[ax] \n" | 1907 | "move.l %%d1, %[ax] \n" |
1908 | ".wa_f4: \n" | ||
1909 | "move.b (%[addr]), %%d0 \n" | ||
1910 | "and.l %%d5, %%d0 \n" | ||
1911 | "or.l %%d4, %%d0 \n" | ||
1912 | "move.b %%d0, (%[addr]) \n" | ||
1913 | "add.l %[psiz], %[addr] \n" | ||
1914 | "lsr.l #8, %%d4 \n" | ||
1930 | ".wa_f3: \n" | 1915 | ".wa_f3: \n" |
1931 | "sub.l %[psiz], %[addr] \n" | ||
1932 | "move.b (%[addr]), %%d0 \n" | 1916 | "move.b (%[addr]), %%d0 \n" |
1933 | "and.l %%d5, %%d0 \n" | 1917 | "and.l %%d5, %%d0 \n" |
1934 | "or.l %%d6, %%d0 \n" | 1918 | "or.l %%d3, %%d0 \n" |
1935 | "move.b %%d0, (%[addr]) \n" | 1919 | "move.b %%d0, (%[addr]) \n" |
1936 | "lsr.l #8, %%d6 \n" | 1920 | "add.l %[psiz], %[addr] \n" |
1921 | "lsr.l #8, %%d3 \n" | ||
1937 | ".wa_f2: \n" | 1922 | ".wa_f2: \n" |
1938 | "sub.l %[psiz], %[addr] \n" | ||
1939 | "move.b (%[addr]), %%d0 \n" | 1923 | "move.b (%[addr]), %%d0 \n" |
1940 | "and.l %%d5, %%d0 \n" | 1924 | "and.l %%d5, %%d0 \n" |
1941 | "or.l %%d7, %%d0 \n" | 1925 | "or.l %%d2, %%d0 \n" |
1942 | "move.b %%d0, (%[addr]) \n" | 1926 | "move.b %%d0, (%[addr]) \n" |
1943 | "lsr.l #8, %%d7 \n" | 1927 | "add.l %[psiz], %[addr] \n" |
1928 | "lsr.l #8, %%d2 \n" | ||
1944 | ".wa_f1: \n" | 1929 | ".wa_f1: \n" |
1945 | "sub.l %[psiz], %[addr] \n" | ||
1946 | "move.b (%[addr]), %%d0 \n" | 1930 | "move.b (%[addr]), %%d0 \n" |
1947 | "and.l %%d5, %%d0 \n" | 1931 | "and.l %%d5, %%d0 \n" |
1948 | "move.l %%a0, %%d1 \n" | 1932 | "move.l %[mask], %%d1 \n" |
1949 | "or.l %%d1, %%d0 \n" | 1933 | "or.l %%d1, %%d0 \n" |
1950 | "move.b %%d0, (%[addr]) \n" | 1934 | "move.b %%d0, (%[addr]) \n" |
1935 | "add.l %[psiz], %[addr] \n" | ||
1951 | "lsr.l #8, %%d1 \n" | 1936 | "lsr.l #8, %%d1 \n" |
1952 | "move.l %%d1, %%a0 \n" | 1937 | "move.l %%d1, %[mask] \n" |
1953 | 1938 | ||
1954 | "move.l %[psiz], %%d0 \n" | ||
1955 | "lsl.l #3, %%d0 \n" | ||
1956 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
1957 | "subq.l #8, %[dpth] \n" | 1939 | "subq.l #8, %[dpth] \n" |
1958 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1940 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
1959 | "jgt .wa_floop \n" /* next round if anything left */ | 1941 | "jgt .wa_floop \n" /* next round if anything left */ |
1960 | 1942 | ||
1961 | "jra .wa_end \n" | 1943 | "jra .wa_end \n" |
1962 | 1944 | ||
1963 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | 1945 | ".wa_sstart: \n" |
1964 | "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ | 1946 | "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */ |
1965 | "bhs.s .wa_s8 \n" | 1947 | "bra.s .wa_s1 \n" |
1966 | |||
1967 | "move.l %[psiz], %%d0 \n" | ||
1968 | "move.l %[dpth], %%d5 \n" | ||
1969 | "mulu.w %%d5, %%d0 \n" /* point behind the last plane */ | ||
1970 | "add.l %%d0, %[addr] \n" /* for this round */ | ||
1971 | "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */ | ||
1972 | "bra.s .wa_s1 \n" /* dpth == 0 should never happen */ | ||
1973 | "bra.s .wa_s2 \n" | 1948 | "bra.s .wa_s2 \n" |
1974 | "bra.s .wa_s3 \n" | 1949 | "bra.s .wa_s3 \n" |
1975 | "bra.s .wa_s4 \n" | 1950 | "bra.s .wa_s4 \n" |
1976 | "bra.s .wa_s5 \n" | 1951 | "bra.s .wa_s5 \n" |
1977 | "bra.s .wa_s6 \n" | 1952 | "bra.s .wa_s6 \n" |
1978 | "bra.s .wa_s7 \n" | 1953 | "bra.s .wa_s7 \n" |
1954 | /* bra.s .wa_s8 */ /* identical with target */ | ||
1979 | 1955 | ||
1956 | ".wa_sloop: \n" /** short loop (nothing to keep) **/ | ||
1980 | ".wa_s8: \n" | 1957 | ".wa_s8: \n" |
1981 | "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ | 1958 | "move.l %%a0, %%d5 \n" |
1982 | "lsl.l #3, %%d0 \n" /* for this round. */ | 1959 | "move.b %%d5, (%[addr]) \n" /* store byte */ |
1983 | "add.l %%d0, %[addr] \n" /* See above. */ | 1960 | "add.l %[psiz], %[addr] \n" |
1984 | 1961 | "lsr.l #8, %%d5 \n" /* shift out used-up byte */ | |
1985 | "sub.l %[psiz], %[addr] \n" | 1962 | "move.l %%d5, %%a0 \n" |
1986 | "move.b %%d1, (%[addr]) \n" /* store byte */ | ||
1987 | "lsr.l #8, %%d1 \n" /* shift out used-up byte */ | ||
1988 | ".wa_s7: \n" | 1963 | ".wa_s7: \n" |
1989 | "sub.l %[psiz], %[addr] \n" | 1964 | "move.b %%d7, (%[addr]) \n" |
1990 | "move.b %%d2, (%[addr]) \n" | 1965 | "add.l %[psiz], %[addr] \n" |
1991 | "lsr.l #8, %%d2 \n" | 1966 | "lsr.l #8, %%d7 \n" |
1992 | ".wa_s6: \n" | 1967 | ".wa_s6: \n" |
1993 | "sub.l %[psiz], %[addr] \n" | 1968 | "move.b %%d6, (%[addr]) \n" |
1994 | "move.b %%d3, (%[addr]) \n" | 1969 | "add.l %[psiz], %[addr] \n" |
1995 | "lsr.l #8, %%d3 \n" | 1970 | "lsr.l #8, %%d6 \n" |
1996 | ".wa_s5: \n" | 1971 | ".wa_s5: \n" |
1997 | "sub.l %[psiz], %[addr] \n" | ||
1998 | "move.b %%d4, (%[addr]) \n" | ||
1999 | "lsr.l #8, %%d4 \n" | ||
2000 | ".wa_s4: \n" | ||
2001 | "sub.l %[psiz], %[addr] \n" | ||
2002 | "move.l %[ax], %%d5 \n" | 1972 | "move.l %[ax], %%d5 \n" |
2003 | "move.b %%d5, (%[addr]) \n" | 1973 | "move.b %%d5, (%[addr]) \n" |
1974 | "add.l %[psiz], %[addr] \n" | ||
2004 | "lsr.l #8, %%d5 \n" | 1975 | "lsr.l #8, %%d5 \n" |
2005 | "move.l %%d5, %[ax] \n" | 1976 | "move.l %%d5, %[ax] \n" |
1977 | ".wa_s4: \n" | ||
1978 | "move.b %%d4, (%[addr]) \n" | ||
1979 | "add.l %[psiz], %[addr] \n" | ||
1980 | "lsr.l #8, %%d4 \n" | ||
2006 | ".wa_s3: \n" | 1981 | ".wa_s3: \n" |
2007 | "sub.l %[psiz], %[addr] \n" | 1982 | "move.b %%d3, (%[addr]) \n" |
2008 | "move.b %%d6, (%[addr]) \n" | 1983 | "add.l %[psiz], %[addr] \n" |
2009 | "lsr.l #8, %%d6 \n" | 1984 | "lsr.l #8, %%d3 \n" |
2010 | ".wa_s2: \n" | 1985 | ".wa_s2: \n" |
2011 | "sub.l %[psiz], %[addr] \n" | 1986 | "move.b %%d2, (%[addr]) \n" |
2012 | "move.b %%d7, (%[addr]) \n" | 1987 | "add.l %[psiz], %[addr] \n" |
2013 | "lsr.l #8, %%d7 \n" | 1988 | "lsr.l #8, %%d2 \n" |
2014 | ".wa_s1: \n" | 1989 | ".wa_s1: \n" |
2015 | "sub.l %[psiz], %[addr] \n" | 1990 | "move.b %%d1, (%[addr]) \n" |
2016 | "move.l %%a0, %%d5 \n" | 1991 | "add.l %[psiz], %[addr] \n" |
2017 | "move.b %%d5, (%[addr]) \n" | 1992 | "lsr.l #8, %%d1 \n" |
2018 | "lsr.l #8, %%d5 \n" | ||
2019 | "move.l %%d5, %%a0 \n" | ||
2020 | 1993 | ||
2021 | "add.l %%d0, %[addr] \n" /* correct address */ | ||
2022 | "subq.l #8, %[dpth] \n" | 1994 | "subq.l #8, %[dpth] \n" |
2023 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ | 1995 | "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ |
2024 | "jgt .wa_sloop \n" /* next round if anything left */ | 1996 | "jgt .wa_sloop \n" /* next round if anything left */ |
@@ -2071,7 +2043,7 @@ static void _writearray(unsigned char *address, const unsigned char *src, | |||
2071 | 2043 | ||
2072 | /* set the bits for all 8 pixels in all bytes according to the | 2044 | /* set the bits for all 8 pixels in all bytes according to the |
2073 | * precalculated patterns on the pattern stack */ | 2045 | * precalculated patterns on the pattern stack */ |
2074 | test = 1; | 2046 | test = 1 << ((-_gray_info.depth) & 7); |
2075 | mask = (~mask & 0xff); | 2047 | mask = (~mask & 0xff); |
2076 | if (mask == 0) | 2048 | if (mask == 0) |
2077 | { | 2049 | { |