summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/plugins/lib/gray_core.c501
-rw-r--r--apps/plugins/lib/gray_draw.c494
2 files changed, 471 insertions, 524 deletions
diff --git a/apps/plugins/lib/gray_core.c b/apps/plugins/lib/gray_core.c
index 809e88dba1..413b66c65d 100644
--- a/apps/plugins/lib/gray_core.c
+++ b/apps/plugins/lib/gray_core.c
@@ -348,7 +348,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
348 long plane_size, buftaken; 348 long plane_size, buftaken;
349 unsigned data; 349 unsigned data;
350#ifndef SIMULATOR 350#ifndef SIMULATOR
351 int j; 351 int j, bitfill;
352#endif 352#endif
353 353
354 _gray_rb = newrb; 354 _gray_rb = newrb;
@@ -439,6 +439,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
439 _gray_info.cur_plane = 0; 439 _gray_info.cur_plane = 0;
440 _gray_info.plane_size = plane_size; 440 _gray_info.plane_size = plane_size;
441 _gray_info.plane_data = gbuf; 441 _gray_info.plane_data = gbuf;
442 _gray_rb->memset(gbuf, 0, depth * plane_size);
442 gbuf += depth * plane_size; 443 gbuf += depth * plane_size;
443 _gray_info.bitpattern = (unsigned long *)gbuf; 444 _gray_info.bitpattern = (unsigned long *)gbuf;
444 445
@@ -449,7 +450,8 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
449 i >>= 1; 450 i >>= 1;
450 j--; 451 j--;
451 } 452 }
452 _gray_info.randmask = 0xFFu >> j; 453 _gray_info.randmask = 0xFFu >> j;
454 bitfill = (-depth) & 7;
453 455
454 /* Precalculate the bit patterns for all possible pixel values */ 456 /* Precalculate the bit patterns for all possible pixel values */
455 for (i = 0; i <= depth; i++) 457 for (i = 0; i <= depth; i++)
@@ -469,7 +471,7 @@ int gray_init(struct plugin_api* newrb, unsigned char *gbuf, long gbuf_size,
469 } 471 }
470 /* now the lower <depth> bits contain the pattern */ 472 /* now the lower <depth> bits contain the pattern */
471 473
472 _gray_info.bitpattern[i] = pattern; 474 _gray_info.bitpattern[i] = pattern << bitfill;
473 } 475 }
474#endif 476#endif
475 477
@@ -797,101 +799,93 @@ void gray_update_rect(int x, int y, int width, int height)
797 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 799 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
798 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 800 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
799 801
802 "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
803 "and r0, r0, #7 \n"
804 "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
805 "mov r8, r8, lsr #8 \n" /* r8: never reached */
806 "mov r7, r7, lsr #8 \n"
807 "mov r6, r6, lsr #8 \n"
808 "mov r5, r5, lsr #8 \n"
809 "mov r4, r4, lsr #8 \n"
810 "mov r3, r3, lsr #8 \n"
811 "mov r2, r2, lsr #8 \n"
812 "mov r1, r1, lsr #8 \n"
813
800 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 814 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
801 "ands %[mask], %[mask], #0xff \n" 815 "ands %[mask], %[mask], #0xff \n"
802 "beq .ur_sloop \n" /* short loop if no bits to keep */ 816 "beq .ur_sstart \n" /* short loop if no bits to keep */
803
804 ".ur_floop: \n" /** full loop (bits to keep)**/
805 "cmp %[dpth], #8 \n" /* 8 planes or more left? */
806 "bhs .ur_f8 \n"
807 817
808 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ 818 "ldrb r0, [pc, r0] \n" /* jump into full loop */
809 "add %[addr], %[addr], r0 \n" /* for this round */
810
811 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
812 "add pc, pc, r0 \n" 819 "add pc, pc, r0 \n"
813 ".ur_ftable: \n" 820 ".ur_ftable: \n"
814 ".byte .ur_f0 - .ur_ftable - 4 \n" /* [jump tables are tricky] */ 821 ".byte .ur_f1 - .ur_ftable - 4 \n" /* [jump tables are tricky] */
815 ".byte .ur_f1 - .ur_ftable - 4 \n"
816 ".byte .ur_f2 - .ur_ftable - 4 \n" 822 ".byte .ur_f2 - .ur_ftable - 4 \n"
817 ".byte .ur_f3 - .ur_ftable - 4 \n" 823 ".byte .ur_f3 - .ur_ftable - 4 \n"
818 ".byte .ur_f4 - .ur_ftable - 4 \n" 824 ".byte .ur_f4 - .ur_ftable - 4 \n"
819 ".byte .ur_f5 - .ur_ftable - 4 \n" 825 ".byte .ur_f5 - .ur_ftable - 4 \n"
820 ".byte .ur_f6 - .ur_ftable - 4 \n" 826 ".byte .ur_f6 - .ur_ftable - 4 \n"
821 ".byte .ur_f7 - .ur_ftable - 4 \n" 827 ".byte .ur_f7 - .ur_ftable - 4 \n"
828 ".byte .ur_f8 - .ur_ftable - 4 \n"
822 829
830 ".ur_floop: \n" /** full loop (bits to keep)**/
823 ".ur_f8: \n" 831 ".ur_f8: \n"
824 "add %[addr], %[addr], %[psiz], lsl #3 \n" 832 "ldrb r0, [%[addr]] \n" /* load old byte */
825 /* Point behind the last plane for this round. Note: We're using the
826 * registers backwards in order to reuse the streak for the last round.
827 * Therefore we need to go thru the bitplanes backwards too, otherwise
828 * the bit order would be destroyed which results in more flicker. */
829 "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */
830 "and r0, r0, %[mask] \n" /* mask out replaced bits */ 833 "and r0, r0, %[mask] \n" /* mask out replaced bits */
831 "orr r0, r0, r8 \n" /* set new bits */ 834 "orr r0, r0, r1 \n" /* set new bits */
832 "strb r0, [%[addr]] \n" /* store byte */ 835 "strb r0, [%[addr]], %[psiz] \n" /* store byte */
833 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ 836 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
834 ".ur_f7: \n" 837 ".ur_f7: \n"
835 "ldrb r0, [%[addr], -%[psiz]]! \n" 838 "ldrb r0, [%[addr]] \n"
836 "and r0, r0, %[mask] \n" 839 "and r0, r0, %[mask] \n"
837 "orr r0, r0, r7 \n" 840 "orr r0, r0, r2 \n"
838 "strb r0, [%[addr]] \n" 841 "strb r0, [%[addr]], %[psiz] \n"
839 "mov r7, r7, lsr #8 \n" 842 "mov r2, r2, lsr #8 \n"
840 ".ur_f6: \n" 843 ".ur_f6: \n"
841 "ldrb r0, [%[addr], -%[psiz]]! \n" 844 "ldrb r0, [%[addr]] \n"
842 "and r0, r0, %[mask] \n" 845 "and r0, r0, %[mask] \n"
843 "orr r0, r0, r6 \n" 846 "orr r0, r0, r3 \n"
844 "strb r0, [%[addr]] \n" 847 "strb r0, [%[addr]], %[psiz] \n"
845 "mov r6, r6, lsr #8 \n" 848 "mov r3, r3, lsr #8 \n"
846 ".ur_f5: \n" 849 ".ur_f5: \n"
847 "ldrb r0, [%[addr], -%[psiz]]! \n" 850 "ldrb r0, [%[addr]] \n"
848 "and r0, r0, %[mask] \n"
849 "orr r0, r0, r5 \n"
850 "strb r0, [%[addr]] \n"
851 "mov r5, r5, lsr #8 \n"
852 ".ur_f4: \n"
853 "ldrb r0, [%[addr], -%[psiz]]! \n"
854 "and r0, r0, %[mask] \n" 851 "and r0, r0, %[mask] \n"
855 "orr r0, r0, r4 \n" 852 "orr r0, r0, r4 \n"
856 "strb r0, [%[addr]] \n" 853 "strb r0, [%[addr]], %[psiz] \n"
857 "mov r4, r4, lsr #8 \n" 854 "mov r4, r4, lsr #8 \n"
855 ".ur_f4: \n"
856 "ldrb r0, [%[addr]] \n"
857 "and r0, r0, %[mask] \n"
858 "orr r0, r0, r5 \n"
859 "strb r0, [%[addr]], %[psiz] \n"
860 "mov r5, r5, lsr #8 \n"
858 ".ur_f3: \n" 861 ".ur_f3: \n"
859 "ldrb r0, [%[addr], -%[psiz]]! \n" 862 "ldrb r0, [%[addr]] \n"
860 "and r0, r0, %[mask] \n" 863 "and r0, r0, %[mask] \n"
861 "orr r0, r0, r3 \n" 864 "orr r0, r0, r6 \n"
862 "strb r0, [%[addr]] \n" 865 "strb r0, [%[addr]], %[psiz] \n"
863 "mov r3, r3, lsr #8 \n" 866 "mov r6, r6, lsr #8 \n"
864 ".ur_f2: \n" 867 ".ur_f2: \n"
865 "ldrb r0, [%[addr], -%[psiz]]! \n" 868 "ldrb r0, [%[addr]] \n"
866 "and r0, r0, %[mask] \n" 869 "and r0, r0, %[mask] \n"
867 "orr r0, r0, r2 \n" 870 "orr r0, r0, r7 \n"
868 "strb r0, [%[addr]] \n" 871 "strb r0, [%[addr]], %[psiz] \n"
869 "mov r2, r2, lsr #8 \n" 872 "mov r7, r7, lsr #8 \n"
870 ".ur_f1: \n" 873 ".ur_f1: \n"
871 "ldrb r0, [%[addr], -%[psiz]]! \n" 874 "ldrb r0, [%[addr]] \n"
872 "and r0, r0, %[mask] \n" 875 "and r0, r0, %[mask] \n"
873 "orr r0, r0, r1 \n" 876 "orr r0, r0, r8 \n"
874 "strb r0, [%[addr]] \n" 877 "strb r0, [%[addr]], %[psiz] \n"
875 "mov r1, r1, lsr #8 \n" 878 "mov r8, r8, lsr #8 \n"
876 ".ur_f0: \n"
877 879
878 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
879 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 880 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
880 "bhi .ur_floop \n" 881 "bhi .ur_floop \n"
881 882
882 "b .ur_end \n" 883 "b .ur_end \n"
883 884
884 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 885 ".ur_sstart: \n"
885 "cmp %[dpth], #8 \n" /* 8 planes or more left? */ 886 "ldrb r0, [pc, r0] \n" /* jump into short loop*/
886 "bhs .ur_s8 \n"
887
888 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */
889 "add %[addr], %[addr], r0 \n" /* for this round */
890
891 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
892 "add pc, pc, r0 \n" 887 "add pc, pc, r0 \n"
893 ".ur_stable: \n" 888 ".ur_stable: \n"
894 ".byte .ur_s0 - .ur_stable - 4 \n"
895 ".byte .ur_s1 - .ur_stable - 4 \n" 889 ".byte .ur_s1 - .ur_stable - 4 \n"
896 ".byte .ur_s2 - .ur_stable - 4 \n" 890 ".byte .ur_s2 - .ur_stable - 4 \n"
897 ".byte .ur_s3 - .ur_stable - 4 \n" 891 ".byte .ur_s3 - .ur_stable - 4 \n"
@@ -899,36 +893,34 @@ void gray_update_rect(int x, int y, int width, int height)
899 ".byte .ur_s5 - .ur_stable - 4 \n" 893 ".byte .ur_s5 - .ur_stable - 4 \n"
900 ".byte .ur_s6 - .ur_stable - 4 \n" 894 ".byte .ur_s6 - .ur_stable - 4 \n"
901 ".byte .ur_s7 - .ur_stable - 4 \n" 895 ".byte .ur_s7 - .ur_stable - 4 \n"
896 ".byte .ur_s8 - .ur_stable - 4 \n"
902 897
898 ".ur_sloop: \n" /** short loop (nothing to keep) **/
903 ".ur_s8: \n" 899 ".ur_s8: \n"
904 "add %[addr], %[addr], %[psiz], lsl #3 \n" 900 "strb r1, [%[addr]], %[psiz] \n" /* store byte */
905 /* Point behind the last plane for this round. See above. */ 901 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
906 "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */
907 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */
908 ".ur_s7: \n" 902 ".ur_s7: \n"
909 "strb r7, [%[addr], -%[psiz]]! \n" 903 "strb r2, [%[addr]], %[psiz] \n"
910 "mov r7, r7, lsr #8 \n" 904 "mov r2, r2, lsr #8 \n"
911 ".ur_s6: \n" 905 ".ur_s6: \n"
912 "strb r6, [%[addr], -%[psiz]]! \n" 906 "strb r3, [%[addr]], %[psiz] \n"
913 "mov r6, r6, lsr #8 \n" 907 "mov r3, r3, lsr #8 \n"
914 ".ur_s5: \n" 908 ".ur_s5: \n"
915 "strb r5, [%[addr], -%[psiz]]! \n" 909 "strb r4, [%[addr]], %[psiz] \n"
916 "mov r5, r5, lsr #8 \n"
917 ".ur_s4: \n"
918 "strb r4, [%[addr], -%[psiz]]! \n"
919 "mov r4, r4, lsr #8 \n" 910 "mov r4, r4, lsr #8 \n"
911 ".ur_s4: \n"
912 "strb r5, [%[addr]], %[psiz] \n"
913 "mov r5, r5, lsr #8 \n"
920 ".ur_s3: \n" 914 ".ur_s3: \n"
921 "strb r3, [%[addr], -%[psiz]]! \n" 915 "strb r6, [%[addr]], %[psiz] \n"
922 "mov r3, r3, lsr #8 \n" 916 "mov r6, r6, lsr #8 \n"
923 ".ur_s2: \n" 917 ".ur_s2: \n"
924 "strb r2, [%[addr], -%[psiz]]! \n" 918 "strb r7, [%[addr]], %[psiz] \n"
925 "mov r2, r2, lsr #8 \n" 919 "mov r7, r7, lsr #8 \n"
926 ".ur_s1: \n" 920 ".ur_s1: \n"
927 "strb r1, [%[addr], -%[psiz]]! \n" 921 "strb r8, [%[addr]], %[psiz] \n"
928 "mov r1, r1, lsr #8 \n" 922 "mov r8, r8, lsr #8 \n"
929 ".ur_s0: \n"
930 923
931 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
932 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 924 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
933 "bhi .ur_sloop \n" 925 "bhi .ur_sloop \n"
934 926
@@ -956,7 +948,7 @@ void gray_update_rect(int x, int y, int width, int height)
956 { 948 {
957 unsigned char *addr, *end; 949 unsigned char *addr, *end;
958 unsigned mask = 0; 950 unsigned mask = 0;
959 unsigned test = 1; 951 unsigned test = 1 << ((-_gray_info.depth) & 7);
960 int i; 952 int i;
961 953
962 /* precalculate the bit patterns with random shifts 954 /* precalculate the bit patterns with random shifts
@@ -1287,25 +1279,37 @@ void gray_update_rect(int x, int y, int width, int height)
1287 "shlr r0 \n" 1279 "shlr r0 \n"
1288 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1280 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1289 1281
1290 "tst %[mask], %[mask] \n" 1282 "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/
1291 "bt .ur_sloop \n" /* short loop if nothing to keep */ 1283 "add #-1, %[rx] \n"
1284 "mov #7, r0 \n"
1285 "and r0, %[rx] \n"
1286 "mova .ur_pshift, r0 \n"
1287 "add %[rx], r0 \n"
1288 "add %[rx], r0 \n"
1289 "jmp @r0 \n" /* jump into shift streak */
1290 "nop \n"
1292 1291
1293 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1292 ".align 2 \n"
1294 "mov #8, r0 \n" 1293 ".ur_pshift: \n"
1295 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1294 "shlr8 r7 \n"
1296 "bt .ur_f8 \n" 1295 "shlr8 r6 \n"
1296 "shlr8 r5 \n"
1297 "shlr8 r4 \n"
1298 "shlr8 r3 \n"
1299 "shlr8 r2 \n"
1300 "shlr8 r1 \n"
1301
1302 "tst %[mask], %[mask] \n"
1303 "bt .ur_sstart \n" /* short loop if nothing to keep */
1297 1304
1298 "mulu %[psiz], %[dpth] \n" 1305 "mova .ur_ftable, r0 \n" /* jump into full loop */
1299 "mova .ur_ftable, r0 \n" 1306 "mov.b @(r0, %[rx]), %[rx] \n"
1300 "mov.b @(r0, %[dpth]), %[rx] \n"
1301 "add %[rx], r0 \n" 1307 "add %[rx], r0 \n"
1302 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1308 "jmp @r0 \n"
1303 "jmp @r0 \n" /* jump into streak */ 1309 "nop \n"
1304 "add %[rx], %[addr] \n" /* ..for this round */ 1310
1305
1306 ".align 2 \n" 1311 ".align 2 \n"
1307 ".ur_ftable: \n" 1312 ".ur_ftable: \n"
1308 ".byte .ur_f0 - .ur_ftable \n"
1309 ".byte .ur_f1 - .ur_ftable \n" 1313 ".byte .ur_f1 - .ur_ftable \n"
1310 ".byte .ur_f2 - .ur_ftable \n" 1314 ".byte .ur_f2 - .ur_ftable \n"
1311 ".byte .ur_f3 - .ur_ftable \n" 1315 ".byte .ur_f3 - .ur_ftable \n"
@@ -1313,74 +1317,66 @@ void gray_update_rect(int x, int y, int width, int height)
1313 ".byte .ur_f5 - .ur_ftable \n" 1317 ".byte .ur_f5 - .ur_ftable \n"
1314 ".byte .ur_f6 - .ur_ftable \n" 1318 ".byte .ur_f6 - .ur_ftable \n"
1315 ".byte .ur_f7 - .ur_ftable \n" 1319 ".byte .ur_f7 - .ur_ftable \n"
1320 ".byte .ur_f8 - .ur_ftable \n"
1316 1321
1322 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1317 ".ur_f8: \n" 1323 ".ur_f8: \n"
1318 "mov %[psiz], %[rx] \n"
1319 "shll2 %[rx] \n"
1320 "add %[rx], %[rx] \n"
1321 "add %[rx], %[addr] \n"
1322 /* Point behind the last plane for this round. Note: We're using the
1323 * registers backwards in order to reuse the streak for the last round.
1324 * Therefore we need to go thru the bitplanes backwards too, otherwise
1325 * the bit order would be destroyed which results in more flicker. */
1326 "sub %[psiz], %[addr] \n"
1327 "mov.b @%[addr], r0 \n" /* load old byte */ 1324 "mov.b @%[addr], r0 \n" /* load old byte */
1328 "and %[mask], r0 \n" /* mask out replaced bits */ 1325 "and %[mask], r0 \n" /* mask out replaced bits */
1329 "or r8, r0 \n" /* set new bits */ 1326 "or r1, r0 \n" /* set new bits */
1330 "mov.b r0, @%[addr] \n" /* store byte */ 1327 "mov.b r0, @%[addr] \n" /* store byte */
1331 "shlr8 r8 \n" /* shift out used-up byte */ 1328 "add %[psiz], %[addr] \n"
1329 "shlr8 r1 \n" /* shift out used-up byte */
1332 ".ur_f7: \n" 1330 ".ur_f7: \n"
1333 "sub %[psiz], %[addr] \n"
1334 "mov.b @%[addr], r0 \n" 1331 "mov.b @%[addr], r0 \n"
1335 "and %[mask], r0 \n" 1332 "and %[mask], r0 \n"
1336 "or r7, r0 \n" 1333 "or r2, r0 \n"
1337 "mov.b r0, @%[addr] \n" 1334 "mov.b r0, @%[addr] \n"
1338 "shlr8 r7 \n" 1335 "add %[psiz], %[addr] \n"
1336 "shlr8 r2 \n"
1339 ".ur_f6: \n" 1337 ".ur_f6: \n"
1340 "sub %[psiz], %[addr] \n"
1341 "mov.b @%[addr], r0 \n" 1338 "mov.b @%[addr], r0 \n"
1342 "and %[mask], r0 \n" 1339 "and %[mask], r0 \n"
1343 "or r6, r0 \n" 1340 "or r3, r0 \n"
1344 "mov.b r0, @%[addr] \n" 1341 "mov.b r0, @%[addr] \n"
1345 "shlr8 r6 \n" 1342 "add %[psiz], %[addr] \n"
1343 "shlr8 r3 \n"
1346 ".ur_f5: \n" 1344 ".ur_f5: \n"
1347 "sub %[psiz], %[addr] \n"
1348 "mov.b @%[addr], r0 \n" 1345 "mov.b @%[addr], r0 \n"
1349 "and %[mask], r0 \n" 1346 "and %[mask], r0 \n"
1350 "or r5, r0 \n" 1347 "or r4, r0 \n"
1351 "mov.b r0, @%[addr] \n" 1348 "mov.b r0, @%[addr] \n"
1352 "shlr8 r5 \n" 1349 "add %[psiz], %[addr] \n"
1350 "shlr8 r4 \n"
1353 ".ur_f4: \n" 1351 ".ur_f4: \n"
1354 "sub %[psiz], %[addr] \n"
1355 "mov.b @%[addr], r0 \n" 1352 "mov.b @%[addr], r0 \n"
1356 "and %[mask], r0 \n" 1353 "and %[mask], r0 \n"
1357 "or r4, r0 \n" 1354 "or r5, r0 \n"
1358 "mov.b r0, @%[addr] \n" 1355 "mov.b r0, @%[addr] \n"
1359 "shlr8 r4 \n" 1356 "add %[psiz], %[addr] \n"
1357 "shlr8 r5 \n"
1360 ".ur_f3: \n" 1358 ".ur_f3: \n"
1361 "sub %[psiz], %[addr] \n"
1362 "mov.b @%[addr], r0 \n" 1359 "mov.b @%[addr], r0 \n"
1363 "and %[mask], r0 \n" 1360 "and %[mask], r0 \n"
1364 "or r3, r0 \n" 1361 "or r6, r0 \n"
1365 "mov.b r0, @%[addr] \n" 1362 "mov.b r0, @%[addr] \n"
1366 "shlr8 r3 \n" 1363 "add %[psiz], %[addr] \n"
1364 "shlr8 r6 \n"
1367 ".ur_f2: \n" 1365 ".ur_f2: \n"
1368 "sub %[psiz], %[addr] \n"
1369 "mov.b @%[addr], r0 \n" 1366 "mov.b @%[addr], r0 \n"
1370 "and %[mask], r0 \n" 1367 "and %[mask], r0 \n"
1371 "or r2, r0 \n" 1368 "or r7, r0 \n"
1372 "mov.b r0, @%[addr] \n" 1369 "mov.b r0, @%[addr] \n"
1373 "shlr8 r2 \n" 1370 "add %[psiz], %[addr] \n"
1371 "shlr8 r7 \n"
1374 ".ur_f1: \n" 1372 ".ur_f1: \n"
1375 "sub %[psiz], %[addr] \n"
1376 "mov.b @%[addr], r0 \n" 1373 "mov.b @%[addr], r0 \n"
1377 "and %[mask], r0 \n" 1374 "and %[mask], r0 \n"
1378 "or r1, r0 \n" 1375 "or r8, r0 \n"
1379 "mov.b r0, @%[addr] \n" 1376 "mov.b r0, @%[addr] \n"
1380 "shlr8 r1 \n" 1377 "add %[psiz], %[addr] \n"
1381 ".ur_f0: \n" 1378 "shlr8 r8 \n"
1382 1379
1383 "add %[rx], %[addr] \n" /* correct address */
1384 "add #-8, %[dpth] \n" 1380 "add #-8, %[dpth] \n"
1385 "cmp/pl %[dpth] \n" /* next round if anything left */ 1381 "cmp/pl %[dpth] \n" /* next round if anything left */
1386 "bt .ur_floop \n" 1382 "bt .ur_floop \n"
@@ -1404,22 +1400,15 @@ void gray_update_rect(int x, int y, int width, int height)
1404 ".ur_mask1: \n" 1400 ".ur_mask1: \n"
1405 ".long 0xAAAAAAAA \n" 1401 ".long 0xAAAAAAAA \n"
1406 1402
1407 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1403 ".ur_sstart: \n"
1408 "mov #8, r0 \n" 1404 "mova .ur_stable, r0 \n" /* jump into short loop */
1409 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1405 "mov.b @(r0, %[rx]), %[rx] \n"
1410 "bt .ur_s8 \n"
1411
1412 "mulu %[psiz], %[dpth] \n"
1413 "mova .ur_stable, r0 \n"
1414 "mov.b @(r0, %[dpth]), %[rx] \n"
1415 "add %[rx], r0 \n" 1406 "add %[rx], r0 \n"
1416 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1407 "jmp @r0 \n"
1417 "jmp @r0 \n" /* jump into streak */ 1408 "nop \n"
1418 "add %[rx], %[addr] \n" /* ..for this round */
1419 1409
1420 ".align 2 \n" 1410 ".align 2 \n"
1421 ".ur_stable: \n" 1411 ".ur_stable: \n"
1422 ".byte .ur_s0 - .ur_stable \n"
1423 ".byte .ur_s1 - .ur_stable \n" 1412 ".byte .ur_s1 - .ur_stable \n"
1424 ".byte .ur_s2 - .ur_stable \n" 1413 ".byte .ur_s2 - .ur_stable \n"
1425 ".byte .ur_s3 - .ur_stable \n" 1414 ".byte .ur_s3 - .ur_stable \n"
@@ -1427,47 +1416,42 @@ void gray_update_rect(int x, int y, int width, int height)
1427 ".byte .ur_s5 - .ur_stable \n" 1416 ".byte .ur_s5 - .ur_stable \n"
1428 ".byte .ur_s6 - .ur_stable \n" 1417 ".byte .ur_s6 - .ur_stable \n"
1429 ".byte .ur_s7 - .ur_stable \n" 1418 ".byte .ur_s7 - .ur_stable \n"
1419 ".byte .ur_s8 - .ur_stable \n"
1430 1420
1421 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1431 ".ur_s8: \n" 1422 ".ur_s8: \n"
1432 "mov %[psiz], %[rx] \n" /* Point behind the last plane */ 1423 "mov.b r1, @%[addr] \n" /* store byte */
1433 "shll2 %[rx] \n" /* for this round. */ 1424 "add %[psiz], %[addr] \n"
1434 "add %[rx], %[rx] \n" /* See above. */ 1425 "shlr8 r1 \n" /* shift out used-up byte */
1435 "add %[rx], %[addr] \n"
1436
1437 "sub %[psiz], %[addr] \n"
1438 "mov.b r8, @%[addr] \n" /* store byte */
1439 "shlr8 r8 \n" /* shift out used-up byte */
1440 ".ur_s7: \n" 1426 ".ur_s7: \n"
1441 "sub %[psiz], %[addr] \n" 1427 "mov.b r2, @%[addr] \n"
1442 "mov.b r7, @%[addr] \n" 1428 "add %[psiz], %[addr] \n"
1443 "shlr8 r7 \n" 1429 "shlr8 r2 \n"
1444 ".ur_s6: \n" 1430 ".ur_s6: \n"
1445 "sub %[psiz], %[addr] \n" 1431 "mov.b r3, @%[addr] \n"
1446 "mov.b r6, @%[addr] \n" 1432 "add %[psiz], %[addr] \n"
1447 "shlr8 r6 \n" 1433 "shlr8 r3 \n"
1448 ".ur_s5: \n" 1434 ".ur_s5: \n"
1449 "sub %[psiz], %[addr] \n"
1450 "mov.b r5, @%[addr] \n"
1451 "shlr8 r5 \n"
1452 ".ur_s4: \n"
1453 "sub %[psiz], %[addr] \n"
1454 "mov.b r4, @%[addr] \n" 1435 "mov.b r4, @%[addr] \n"
1436 "add %[psiz], %[addr] \n"
1455 "shlr8 r4 \n" 1437 "shlr8 r4 \n"
1438 ".ur_s4: \n"
1439 "mov.b r5, @%[addr] \n"
1440 "add %[psiz], %[addr] \n"
1441 "shlr8 r5 \n"
1456 ".ur_s3: \n" 1442 ".ur_s3: \n"
1457 "sub %[psiz], %[addr] \n" 1443 "mov.b r6, @%[addr] \n"
1458 "mov.b r3, @%[addr] \n" 1444 "add %[psiz], %[addr] \n"
1459 "shlr8 r3 \n" 1445 "shlr8 r6 \n"
1460 ".ur_s2: \n" 1446 ".ur_s2: \n"
1461 "sub %[psiz], %[addr] \n" 1447 "mov.b r7, @%[addr] \n"
1462 "mov.b r2, @%[addr] \n" 1448 "add %[psiz], %[addr] \n"
1463 "shlr8 r2 \n" 1449 "shlr8 r7 \n"
1464 ".ur_s1: \n" 1450 ".ur_s1: \n"
1465 "sub %[psiz], %[addr] \n" 1451 "mov.b r8, @%[addr] \n"
1466 "mov.b r1, @%[addr] \n" 1452 "add %[psiz], %[addr] \n"
1467 "shlr8 r1 \n" 1453 "shlr8 r8 \n"
1468 ".ur_s0: \n"
1469 1454
1470 "add %[rx], %[addr] \n" /* correct address */
1471 "add #-8, %[dpth] \n" 1455 "add #-8, %[dpth] \n"
1472 "cmp/pl %[dpth] \n" /* next round if anything left */ 1456 "cmp/pl %[dpth] \n" /* next round if anything left */
1473 "bt .ur_sloop \n" 1457 "bt .ur_sloop \n"
@@ -1677,172 +1661,163 @@ void gray_update_rect(int x, int y, int width, int height)
1677 "move.l %%a0, %%d5 \n" 1661 "move.l %%a0, %%d5 \n"
1678 "eor.l %%d5, %%d0 \n" 1662 "eor.l %%d5, %%d0 \n"
1679 "and.l #0xAAAAAAAA, %%d0 \n" 1663 "and.l #0xAAAAAAAA, %%d0 \n"
1680 "eor.l %%d0, %%d5 \n" 1664 "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */
1681 "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ 1665 /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */
1682 "lsr.l #1, %%d0 \n" 1666 "lsr.l #1, %%d0 \n"
1683 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ 1667 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */
1668
1669 "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/
1670 "subq.l #1, %%d0 \n"
1671 "and.l #7, %%d0 \n"
1672 "move.l %%d0, %%a0 \n"
1673 "move.l %[ax], %%d0 \n" /* all data in D registers */
1674 "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */
1675 "lsr.l #8, %%d2 \n"
1676 "lsr.l #8, %%d3 \n"
1677 "lsr.l #8, %%d4 \n"
1678 "lsr.l #8, %%d0 \n"
1679 "lsr.l #8, %%d6 \n"
1680 "lsr.l #8, %%d7 \n"
1681 "lsr.l #8, %%d5 \n"
1682 "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */
1683 "move.l %%a0, %%d0 \n" /* keep the value for later */
1684 "move.l %%d5, %%a0 \n" /* ..into their A registers */
1684 1685
1685 "tst.l %[mask] \n" 1686 "tst.l %[mask] \n"
1686 "jeq .ur_sloop \n" /* short loop if nothing to keep */ 1687 "jeq .ur_sstart \n" /* short loop if nothing to keep */
1687 1688
1688 "move.l %[mask], %%d5 \n" /* need mask in data reg. */ 1689 "move.l %[mask], %%d5 \n" /* need mask in data reg. */
1689 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ 1690 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */
1690 1691
1691 ".ur_floop: \n" /** full loop (there are bits to keep)**/ 1692 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */
1692 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1693 "bra.s .ur_f1 \n"
1693 "bhs.s .ur_f8 \n"
1694
1695 "move.l %[psiz], %%d0 \n"
1696 "move.l %[dpth], %%d1 \n"
1697 "mulu.w %%d1, %%d0 \n" /* point behind the last plane */
1698 "add.l %%d0, %[addr] \n" /* for this round */
1699 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1700 "bra.s .ur_f1 \n" /* dpth == 0 should never happen */
1701 "bra.s .ur_f2 \n" 1694 "bra.s .ur_f2 \n"
1702 "bra.s .ur_f3 \n" 1695 "bra.s .ur_f3 \n"
1703 "bra.s .ur_f4 \n" 1696 "bra.s .ur_f4 \n"
1704 "bra.s .ur_f5 \n" 1697 "bra.s .ur_f5 \n"
1705 "bra.s .ur_f6 \n" 1698 "bra.s .ur_f6 \n"
1706 "bra.s .ur_f7 \n" 1699 "bra.s .ur_f7 \n"
1700 /* bra.s .ur_f8 */ /* identical with target */
1707 1701
1702 ".ur_floop: \n" /** full loop (there are bits to keep)**/
1708 ".ur_f8: \n" 1703 ".ur_f8: \n"
1709 "move.l %[psiz], %%d0 \n"
1710 "lsl.l #3, %%d0 \n"
1711 "add.l %%d0, %[addr] \n"
1712 /* Point behind the last plane for this round. Note: We're using the
1713 * registers backwards in order to reuse the streak for the last round.
1714 * Therefore we need to go thru the bitplanes backwards too, otherwise
1715 * the bit order would be destroyed which results in more flicker. */
1716 "sub.l %[psiz], %[addr] \n"
1717 "move.b (%[addr]), %%d0 \n" /* load old byte */ 1704 "move.b (%[addr]), %%d0 \n" /* load old byte */
1718 "and.l %%d5, %%d0 \n" /* mask out replaced bits */ 1705 "and.l %%d5, %%d0 \n" /* mask out replaced bits */
1719 "move.l %[mask], %%d1 \n" 1706 "move.l %%a0, %%d1 \n"
1720 "or.l %%d1, %%d0 \n" /* set new bits */ 1707 "or.l %%d1, %%d0 \n" /* set new bits */
1721 "move.b %%d0, (%[addr]) \n" /* store byte */ 1708 "move.b %%d0, (%[addr]) \n" /* store byte */
1709 "add.l %[psiz], %[addr] \n"
1722 "lsr.l #8, %%d1 \n" /* shift out used-up byte */ 1710 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1723 "move.l %%d1, %[mask] \n" 1711 "move.l %%d1, %%a0 \n"
1724 ".ur_f7: \n" 1712 ".ur_f7: \n"
1725 "sub.l %[psiz], %[addr] \n"
1726 "move.b (%[addr]), %%d0 \n" 1713 "move.b (%[addr]), %%d0 \n"
1727 "and.l %%d5, %%d0 \n" 1714 "and.l %%d5, %%d0 \n"
1728 "or.l %%d2, %%d0 \n" 1715 "or.l %%d7, %%d0 \n"
1729 "move.b %%d0, (%[addr]) \n" 1716 "move.b %%d0, (%[addr]) \n"
1730 "lsr.l #8, %%d2 \n" 1717 "add.l %[psiz], %[addr] \n"
1718 "lsr.l #8, %%d7 \n"
1731 ".ur_f6: \n" 1719 ".ur_f6: \n"
1732 "sub.l %[psiz], %[addr] \n"
1733 "move.b (%[addr]), %%d0 \n" 1720 "move.b (%[addr]), %%d0 \n"
1734 "and.l %%d5, %%d0 \n" 1721 "and.l %%d5, %%d0 \n"
1735 "or.l %%d3, %%d0 \n" 1722 "or.l %%d6, %%d0 \n"
1736 "move.b %%d0, (%[addr]) \n" 1723 "move.b %%d0, (%[addr]) \n"
1737 "lsr.l #8, %%d3 \n" 1724 "add.l %[psiz], %[addr] \n"
1725 "lsr.l #8, %%d6 \n"
1738 ".ur_f5: \n" 1726 ".ur_f5: \n"
1739 "sub.l %[psiz], %[addr] \n"
1740 "move.b (%[addr]), %%d0 \n"
1741 "and.l %%d5, %%d0 \n"
1742 "or.l %%d4, %%d0 \n"
1743 "move.b %%d0, (%[addr]) \n"
1744 "lsr.l #8, %%d4 \n"
1745 ".ur_f4: \n"
1746 "sub.l %[psiz], %[addr] \n"
1747 "move.b (%[addr]), %%d0 \n" 1727 "move.b (%[addr]), %%d0 \n"
1748 "and.l %%d5, %%d0 \n" 1728 "and.l %%d5, %%d0 \n"
1749 "move.l %[ax], %%d1 \n" 1729 "move.l %[ax], %%d1 \n"
1750 "or.l %%d1, %%d0 \n" 1730 "or.l %%d1, %%d0 \n"
1751 "move.b %%d0, (%[addr]) \n" 1731 "move.b %%d0, (%[addr]) \n"
1732 "add.l %[psiz], %[addr] \n"
1752 "lsr.l #8, %%d1 \n" 1733 "lsr.l #8, %%d1 \n"
1753 "move.l %%d1, %[ax] \n" 1734 "move.l %%d1, %[ax] \n"
1735 ".ur_f4: \n"
1736 "move.b (%[addr]), %%d0 \n"
1737 "and.l %%d5, %%d0 \n"
1738 "or.l %%d4, %%d0 \n"
1739 "move.b %%d0, (%[addr]) \n"
1740 "add.l %[psiz], %[addr] \n"
1741 "lsr.l #8, %%d4 \n"
1754 ".ur_f3: \n" 1742 ".ur_f3: \n"
1755 "sub.l %[psiz], %[addr] \n"
1756 "move.b (%[addr]), %%d0 \n" 1743 "move.b (%[addr]), %%d0 \n"
1757 "and.l %%d5, %%d0 \n" 1744 "and.l %%d5, %%d0 \n"
1758 "or.l %%d6, %%d0 \n" 1745 "or.l %%d3, %%d0 \n"
1759 "move.b %%d0, (%[addr]) \n" 1746 "move.b %%d0, (%[addr]) \n"
1760 "lsr.l #8, %%d6 \n" 1747 "add.l %[psiz], %[addr] \n"
1748 "lsr.l #8, %%d3 \n"
1761 ".ur_f2: \n" 1749 ".ur_f2: \n"
1762 "sub.l %[psiz], %[addr] \n"
1763 "move.b (%[addr]), %%d0 \n" 1750 "move.b (%[addr]), %%d0 \n"
1764 "and.l %%d5, %%d0 \n" 1751 "and.l %%d5, %%d0 \n"
1765 "or.l %%d7, %%d0 \n" 1752 "or.l %%d2, %%d0 \n"
1766 "move.b %%d0, (%[addr]) \n" 1753 "move.b %%d0, (%[addr]) \n"
1767 "lsr.l #8, %%d7 \n" 1754 "add.l %[psiz], %[addr] \n"
1755 "lsr.l #8, %%d2 \n"
1768 ".ur_f1: \n" 1756 ".ur_f1: \n"
1769 "sub.l %[psiz], %[addr] \n"
1770 "move.b (%[addr]), %%d0 \n" 1757 "move.b (%[addr]), %%d0 \n"
1771 "and.l %%d5, %%d0 \n" 1758 "and.l %%d5, %%d0 \n"
1772 "move.l %%a0, %%d1 \n" 1759 "move.l %[mask], %%d1 \n"
1773 "or.l %%d1, %%d0 \n" 1760 "or.l %%d1, %%d0 \n"
1774 "move.b %%d0, (%[addr]) \n" 1761 "move.b %%d0, (%[addr]) \n"
1762 "add.l %[psiz], %[addr] \n"
1775 "lsr.l #8, %%d1 \n" 1763 "lsr.l #8, %%d1 \n"
1776 "move.l %%d1, %%a0 \n" 1764 "move.l %%d1, %[mask] \n"
1777 1765
1778 "move.l %[psiz], %%d0 \n"
1779 "lsl.l #3, %%d0 \n"
1780 "add.l %%d0, %[addr] \n" /* correct address */
1781 "subq.l #8, %[dpth] \n" 1766 "subq.l #8, %[dpth] \n"
1782 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1767 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1783 "jgt .ur_floop \n" /* next round if anything left */ 1768 "jgt .ur_floop \n" /* next round if anything left */
1784 1769
1785 "jra .ur_end \n" 1770 "jra .ur_end \n"
1786 1771
1787 ".ur_sloop: \n" /** short loop (nothing to keep) **/ 1772 ".ur_sstart: \n"
1788 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1773 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */
1789 "bhs.s .ur_s8 \n" 1774 "bra.s .ur_s1 \n"
1790
1791 "move.l %[psiz], %%d0 \n"
1792 "move.l %[dpth], %%d5 \n"
1793 "mulu.w %%d5, %%d0 \n" /* point behind the last plane */
1794 "add.l %%d0, %[addr] \n" /* for this round */
1795 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1796 "bra.s .ur_s1 \n" /* dpth == 0 should never happen */
1797 "bra.s .ur_s2 \n" 1775 "bra.s .ur_s2 \n"
1798 "bra.s .ur_s3 \n" 1776 "bra.s .ur_s3 \n"
1799 "bra.s .ur_s4 \n" 1777 "bra.s .ur_s4 \n"
1800 "bra.s .ur_s5 \n" 1778 "bra.s .ur_s5 \n"
1801 "bra.s .ur_s6 \n" 1779 "bra.s .ur_s6 \n"
1802 "bra.s .ur_s7 \n" 1780 "bra.s .ur_s7 \n"
1781 /* bra.s .ur_s8 */ /* identical with target */
1803 1782
1783 ".ur_sloop: \n" /** short loop (nothing to keep) **/
1804 ".ur_s8: \n" 1784 ".ur_s8: \n"
1805 "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ 1785 "move.l %%a0, %%d5 \n"
1806 "lsl.l #3, %%d0 \n" /* for this round. */ 1786 "move.b %%d5, (%[addr]) \n" /* store byte */
1807 "add.l %%d0, %[addr] \n" /* See above. */ 1787 "add.l %[psiz], %[addr] \n"
1808 1788 "lsr.l #8, %%d5 \n" /* shift out used-up byte */
1809 "sub.l %[psiz], %[addr] \n" 1789 "move.l %%d5, %%a0 \n"
1810 "move.b %%d1, (%[addr]) \n" /* store byte */
1811 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1812 ".ur_s7: \n" 1790 ".ur_s7: \n"
1813 "sub.l %[psiz], %[addr] \n" 1791 "move.b %%d7, (%[addr]) \n"
1814 "move.b %%d2, (%[addr]) \n" 1792 "add.l %[psiz], %[addr] \n"
1815 "lsr.l #8, %%d2 \n" 1793 "lsr.l #8, %%d7 \n"
1816 ".ur_s6: \n" 1794 ".ur_s6: \n"
1817 "sub.l %[psiz], %[addr] \n" 1795 "move.b %%d6, (%[addr]) \n"
1818 "move.b %%d3, (%[addr]) \n" 1796 "add.l %[psiz], %[addr] \n"
1819 "lsr.l #8, %%d3 \n" 1797 "lsr.l #8, %%d6 \n"
1820 ".ur_s5: \n" 1798 ".ur_s5: \n"
1821 "sub.l %[psiz], %[addr] \n"
1822 "move.b %%d4, (%[addr]) \n"
1823 "lsr.l #8, %%d4 \n"
1824 ".ur_s4: \n"
1825 "sub.l %[psiz], %[addr] \n"
1826 "move.l %[ax], %%d5 \n" 1799 "move.l %[ax], %%d5 \n"
1827 "move.b %%d5, (%[addr]) \n" 1800 "move.b %%d5, (%[addr]) \n"
1801 "add.l %[psiz], %[addr] \n"
1828 "lsr.l #8, %%d5 \n" 1802 "lsr.l #8, %%d5 \n"
1829 "move.l %%d5, %[ax] \n" 1803 "move.l %%d5, %[ax] \n"
1804 ".ur_s4: \n"
1805 "move.b %%d4, (%[addr]) \n"
1806 "add.l %[psiz], %[addr] \n"
1807 "lsr.l #8, %%d4 \n"
1830 ".ur_s3: \n" 1808 ".ur_s3: \n"
1831 "sub.l %[psiz], %[addr] \n" 1809 "move.b %%d3, (%[addr]) \n"
1832 "move.b %%d6, (%[addr]) \n" 1810 "add.l %[psiz], %[addr] \n"
1833 "lsr.l #8, %%d6 \n" 1811 "lsr.l #8, %%d3 \n"
1834 ".ur_s2: \n" 1812 ".ur_s2: \n"
1835 "sub.l %[psiz], %[addr] \n" 1813 "move.b %%d2, (%[addr]) \n"
1836 "move.b %%d7, (%[addr]) \n" 1814 "add.l %[psiz], %[addr] \n"
1837 "lsr.l #8, %%d7 \n" 1815 "lsr.l #8, %%d2 \n"
1838 ".ur_s1: \n" 1816 ".ur_s1: \n"
1839 "sub.l %[psiz], %[addr] \n" 1817 "move.b %%d1, (%[addr]) \n"
1840 "move.l %%a0, %%d5 \n" 1818 "add.l %[psiz], %[addr] \n"
1841 "move.b %%d5, (%[addr]) \n" 1819 "lsr.l #8, %%d1 \n"
1842 "lsr.l #8, %%d5 \n"
1843 "move.l %%d5, %%a0 \n"
1844 1820
1845 "add.l %%d0, %[addr] \n" /* correct address */
1846 "subq.l #8, %[dpth] \n" 1821 "subq.l #8, %[dpth] \n"
1847 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1822 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1848 "jgt .ur_sloop \n" /* next round if anything left */ 1823 "jgt .ur_sloop \n" /* next round if anything left */
@@ -1871,7 +1846,7 @@ void gray_update_rect(int x, int y, int width, int height)
1871 { 1846 {
1872 unsigned char *addr, *end; 1847 unsigned char *addr, *end;
1873 unsigned mask = 0; 1848 unsigned mask = 0;
1874 unsigned test = 1; 1849 unsigned test = 1 << ((-_gray_info.depth) & 7);
1875 int i; 1850 int i;
1876 1851
1877 /* precalculate the bit patterns with random shifts 1852 /* precalculate the bit patterns with random shifts
diff --git a/apps/plugins/lib/gray_draw.c b/apps/plugins/lib/gray_draw.c
index dcc65bdd09..bd8ea4f1ce 100644
--- a/apps/plugins/lib/gray_draw.c
+++ b/apps/plugins/lib/gray_draw.c
@@ -1002,103 +1002,94 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1002 "and r0, r0, %[rx] \n" 1002 "and r0, r0, %[rx] \n"
1003 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 1003 "eor r7, r7, r0 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
1004 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1004 "eor r8, r8, r0, lsr #1 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1005
1006 "sub r0, %[dpth], #1 \n" /** shift out unused low bytes **/
1007 "and r0, r0, #7 \n"
1008 "add pc, pc, r0, lsl #2 \n" /* jump into shift streak */
1009 "mov r8, r8, lsr #8 \n" /* r8: never reached */
1010 "mov r7, r7, lsr #8 \n"
1011 "mov r6, r6, lsr #8 \n"
1012 "mov r5, r5, lsr #8 \n"
1013 "mov r4, r4, lsr #8 \n"
1014 "mov r3, r3, lsr #8 \n"
1015 "mov r2, r2, lsr #8 \n"
1016 "mov r1, r1, lsr #8 \n"
1005 1017
1006 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 1018 "mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
1007 "ands %[mask], %[mask], #0xff \n" 1019 "ands %[mask], %[mask], #0xff \n"
1008 "beq .wa_sloop \n" /* short loop if no bits to keep */ 1020 "beq .wa_sstart \n" /* short loop if no bits to keep */
1009
1010 ".wa_floop: \n" /** full loop (bits to keep)**/
1011 "cmp %[dpth], #8 \n" /* 8 planes or more left? */
1012 "bhs .wa_f8 \n"
1013 1021
1014 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */ 1022 "ldrb r0, [pc, r0] \n" /* jump into full loop */
1015 "add %[addr], %[addr], r0 \n" /* for this round */
1016
1017
1018 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
1019 "add pc, pc, r0 \n" 1023 "add pc, pc, r0 \n"
1020 ".wa_ftable: \n" 1024 ".wa_ftable: \n"
1021 ".byte .wa_f0 - .wa_ftable - 4 \n" /* [jump tables are tricky] */ 1025 ".byte .wa_f1 - .wa_ftable - 4 \n" /* [jump tables are tricky] */
1022 ".byte .wa_f1 - .wa_ftable - 4 \n"
1023 ".byte .wa_f2 - .wa_ftable - 4 \n" 1026 ".byte .wa_f2 - .wa_ftable - 4 \n"
1024 ".byte .wa_f3 - .wa_ftable - 4 \n" 1027 ".byte .wa_f3 - .wa_ftable - 4 \n"
1025 ".byte .wa_f4 - .wa_ftable - 4 \n" 1028 ".byte .wa_f4 - .wa_ftable - 4 \n"
1026 ".byte .wa_f5 - .wa_ftable - 4 \n" 1029 ".byte .wa_f5 - .wa_ftable - 4 \n"
1027 ".byte .wa_f6 - .wa_ftable - 4 \n" 1030 ".byte .wa_f6 - .wa_ftable - 4 \n"
1028 ".byte .wa_f7 - .wa_ftable - 4 \n" 1031 ".byte .wa_f7 - .wa_ftable - 4 \n"
1032 ".byte .wa_f8 - .wa_ftable - 4 \n"
1029 1033
1034 ".wa_floop: \n" /** full loop (bits to keep)**/
1030 ".wa_f8: \n" 1035 ".wa_f8: \n"
1031 "add %[addr], %[addr], %[psiz], lsl #3 \n" 1036 "ldrb r0, [%[addr]] \n" /* load old byte */
1032 /* Point behind the last plane for this round. Note: We're using the
1033 * registers backwards in order to reuse the streak for the last round.
1034 * Therefore we need to go thru the bitplanes backwards too, otherwise
1035 * the bit order would be destroyed which results in more flicker. */
1036 "ldrb r0, [%[addr], -%[psiz]]! \n" /* load old byte */
1037 "and r0, r0, %[mask] \n" /* mask out replaced bits */ 1037 "and r0, r0, %[mask] \n" /* mask out replaced bits */
1038 "orr r0, r0, r8 \n" /* set new bits */ 1038 "orr r0, r0, r1 \n" /* set new bits */
1039 "strb r0, [%[addr]] \n" /* store byte */ 1039 "strb r0, [%[addr]], %[psiz] \n" /* store byte */
1040 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */ 1040 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
1041 ".wa_f7: \n" 1041 ".wa_f7: \n"
1042 "ldrb r0, [%[addr], -%[psiz]]! \n" 1042 "ldrb r0, [%[addr]] \n"
1043 "and r0, r0, %[mask] \n" 1043 "and r0, r0, %[mask] \n"
1044 "orr r0, r0, r7 \n" 1044 "orr r0, r0, r2 \n"
1045 "strb r0, [%[addr]] \n" 1045 "strb r0, [%[addr]], %[psiz] \n"
1046 "mov r7, r7, lsr #8 \n" 1046 "mov r2, r2, lsr #8 \n"
1047 ".wa_f6: \n" 1047 ".wa_f6: \n"
1048 "ldrb r0, [%[addr], -%[psiz]]! \n" 1048 "ldrb r0, [%[addr]] \n"
1049 "and r0, r0, %[mask] \n" 1049 "and r0, r0, %[mask] \n"
1050 "orr r0, r0, r6 \n" 1050 "orr r0, r0, r3 \n"
1051 "strb r0, [%[addr]] \n" 1051 "strb r0, [%[addr]], %[psiz] \n"
1052 "mov r6, r6, lsr #8 \n" 1052 "mov r3, r3, lsr #8 \n"
1053 ".wa_f5: \n" 1053 ".wa_f5: \n"
1054 "ldrb r0, [%[addr], -%[psiz]]! \n" 1054 "ldrb r0, [%[addr]] \n"
1055 "and r0, r0, %[mask] \n"
1056 "orr r0, r0, r5 \n"
1057 "strb r0, [%[addr]] \n"
1058 "mov r5, r5, lsr #8 \n"
1059 ".wa_f4: \n"
1060 "ldrb r0, [%[addr], -%[psiz]]! \n"
1061 "and r0, r0, %[mask] \n" 1055 "and r0, r0, %[mask] \n"
1062 "orr r0, r0, r4 \n" 1056 "orr r0, r0, r4 \n"
1063 "strb r0, [%[addr]] \n" 1057 "strb r0, [%[addr]], %[psiz] \n"
1064 "mov r4, r4, lsr #8 \n" 1058 "mov r4, r4, lsr #8 \n"
1059 ".wa_f4: \n"
1060 "ldrb r0, [%[addr]] \n"
1061 "and r0, r0, %[mask] \n"
1062 "orr r0, r0, r5 \n"
1063 "strb r0, [%[addr]], %[psiz] \n"
1064 "mov r5, r5, lsr #8 \n"
1065 ".wa_f3: \n" 1065 ".wa_f3: \n"
1066 "ldrb r0, [%[addr], -%[psiz]]! \n" 1066 "ldrb r0, [%[addr]] \n"
1067 "and r0, r0, %[mask] \n" 1067 "and r0, r0, %[mask] \n"
1068 "orr r0, r0, r3 \n" 1068 "orr r0, r0, r6 \n"
1069 "strb r0, [%[addr]] \n" 1069 "strb r0, [%[addr]], %[psiz] \n"
1070 "mov r3, r3, lsr #8 \n" 1070 "mov r6, r6, lsr #8 \n"
1071 ".wa_f2: \n" 1071 ".wa_f2: \n"
1072 "ldrb r0, [%[addr], -%[psiz]]! \n" 1072 "ldrb r0, [%[addr]] \n"
1073 "and r0, r0, %[mask] \n" 1073 "and r0, r0, %[mask] \n"
1074 "orr r0, r0, r2 \n" 1074 "orr r0, r0, r7 \n"
1075 "strb r0, [%[addr]] \n" 1075 "strb r0, [%[addr]], %[psiz] \n"
1076 "mov r2, r2, lsr #8 \n" 1076 "mov r7, r7, lsr #8 \n"
1077 ".wa_f1: \n" 1077 ".wa_f1: \n"
1078 "ldrb r0, [%[addr], -%[psiz]]! \n" 1078 "ldrb r0, [%[addr]] \n"
1079 "and r0, r0, %[mask] \n" 1079 "and r0, r0, %[mask] \n"
1080 "orr r0, r0, r1 \n" 1080 "orr r0, r0, r8 \n"
1081 "strb r0, [%[addr]] \n" 1081 "strb r0, [%[addr]], %[psiz] \n"
1082 "mov r1, r1, lsr #8 \n" 1082 "mov r8, r8, lsr #8 \n"
1083 ".wa_f0: \n"
1084 1083
1085 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
1086 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 1084 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
1087 "bhi .wa_floop \n" 1085 "bhi .wa_floop \n"
1088 1086
1089 "b .wa_end \n" 1087 "b .wa_end \n"
1090 1088
1091 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1089 ".wa_sstart: \n"
1092 "cmp %[dpth], #8 \n" /* 8 planes or more left? */ 1090 "ldrb r0, [pc, r0] \n" /* jump into short loop*/
1093 "bhs .wa_s8 \n"
1094
1095 "mul r0, %[psiz], %[dpth] \n" /* point behind the last plane */
1096 "add %[addr], %[addr], r0 \n" /* for this round */
1097
1098 "ldrb r0, [pc, %[dpth]] \n" /* jump into streak */
1099 "add pc, pc, r0 \n" 1091 "add pc, pc, r0 \n"
1100 ".wa_stable: \n" 1092 ".wa_stable: \n"
1101 ".byte .wa_s0 - .wa_stable - 4 \n"
1102 ".byte .wa_s1 - .wa_stable - 4 \n" 1093 ".byte .wa_s1 - .wa_stable - 4 \n"
1103 ".byte .wa_s2 - .wa_stable - 4 \n" 1094 ".byte .wa_s2 - .wa_stable - 4 \n"
1104 ".byte .wa_s3 - .wa_stable - 4 \n" 1095 ".byte .wa_s3 - .wa_stable - 4 \n"
@@ -1106,36 +1097,34 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1106 ".byte .wa_s5 - .wa_stable - 4 \n" 1097 ".byte .wa_s5 - .wa_stable - 4 \n"
1107 ".byte .wa_s6 - .wa_stable - 4 \n" 1098 ".byte .wa_s6 - .wa_stable - 4 \n"
1108 ".byte .wa_s7 - .wa_stable - 4 \n" 1099 ".byte .wa_s7 - .wa_stable - 4 \n"
1100 ".byte .wa_s8 - .wa_stable - 4 \n"
1109 1101
1102 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1110 ".wa_s8: \n" 1103 ".wa_s8: \n"
1111 "add %[addr], %[addr], %[psiz], lsl #3 \n" 1104 "strb r1, [%[addr]], %[psiz] \n" /* store byte */
1112 /* Point behind the last plane for this round. See above. */ 1105 "mov r1, r1, lsr #8 \n" /* shift out used-up byte */
1113 "strb r8, [%[addr], -%[psiz]]! \n" /* store byte */
1114 "mov r8, r8, lsr #8 \n" /* shift out used-up byte */
1115 ".wa_s7: \n" 1106 ".wa_s7: \n"
1116 "strb r7, [%[addr], -%[psiz]]! \n" 1107 "strb r2, [%[addr]], %[psiz] \n"
1117 "mov r7, r7, lsr #8 \n" 1108 "mov r2, r2, lsr #8 \n"
1118 ".wa_s6: \n" 1109 ".wa_s6: \n"
1119 "strb r6, [%[addr], -%[psiz]]! \n" 1110 "strb r3, [%[addr]], %[psiz] \n"
1120 "mov r6, r6, lsr #8 \n" 1111 "mov r3, r3, lsr #8 \n"
1121 ".wa_s5: \n" 1112 ".wa_s5: \n"
1122 "strb r5, [%[addr], -%[psiz]]! \n" 1113 "strb r4, [%[addr]], %[psiz] \n"
1123 "mov r5, r5, lsr #8 \n"
1124 ".wa_s4: \n"
1125 "strb r4, [%[addr], -%[psiz]]! \n"
1126 "mov r4, r4, lsr #8 \n" 1114 "mov r4, r4, lsr #8 \n"
1115 ".wa_s4: \n"
1116 "strb r5, [%[addr]], %[psiz] \n"
1117 "mov r5, r5, lsr #8 \n"
1127 ".wa_s3: \n" 1118 ".wa_s3: \n"
1128 "strb r3, [%[addr], -%[psiz]]! \n" 1119 "strb r6, [%[addr]], %[psiz] \n"
1129 "mov r3, r3, lsr #8 \n" 1120 "mov r6, r6, lsr #8 \n"
1130 ".wa_s2: \n" 1121 ".wa_s2: \n"
1131 "strb r2, [%[addr], -%[psiz]]! \n" 1122 "strb r7, [%[addr]], %[psiz] \n"
1132 "mov r2, r2, lsr #8 \n" 1123 "mov r7, r7, lsr #8 \n"
1133 ".wa_s1: \n" 1124 ".wa_s1: \n"
1134 "strb r1, [%[addr], -%[psiz]]! \n" 1125 "strb r8, [%[addr]], %[psiz] \n"
1135 "mov r1, r1, lsr #8 \n" 1126 "mov r8, r8, lsr #8 \n"
1136 ".wa_s0: \n"
1137 1127
1138 "add %[addr], %[addr], %[psiz], lsl #3 \n" /* correct address */
1139 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */ 1128 "subs %[dpth], %[dpth], #8 \n" /* next round if anything left */
1140 "bhi .wa_sloop \n" 1129 "bhi .wa_sloop \n"
1141 1130
@@ -1187,7 +1176,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1187 1176
1188 /* set the bits for all 8 pixels in all bytes according to the 1177 /* set the bits for all 8 pixels in all bytes according to the
1189 * precalculated patterns on the pattern stack */ 1178 * precalculated patterns on the pattern stack */
1190 test = 1; 1179 test = 1 << ((-_gray_info.depth) & 7);
1191 mask = (~mask & 0xff); 1180 mask = (~mask & 0xff);
1192 if (mask == 0) 1181 if (mask == 0)
1193 { 1182 {
@@ -1483,28 +1472,40 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1483 "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */ 1472 "xor r0, r7 \n" /* r7 = ...h6g6f6e6d6c6b6a6 */
1484 "shlr r0 \n" 1473 "shlr r0 \n"
1485 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */ 1474 "xor r0, r8 \n" /* r8 = ...h7g7f7e7d7c7b7a7 */
1475
1476 "mov %[dpth], %[rx] \n" /** shift out unused low bytes **/
1477 "add #-1, %[rx] \n"
1478 "mov #7, r0 \n"
1479 "and r0, %[rx] \n"
1480 "mova .wa_pshift, r0 \n"
1481 "add %[rx], r0 \n"
1482 "add %[rx], r0 \n"
1483 "jmp @r0 \n" /* jump into shift streak */
1484 "nop \n"
1485
1486 ".align 2 \n"
1487 ".wa_pshift: \n"
1488 "shlr8 r7 \n"
1489 "shlr8 r6 \n"
1490 "shlr8 r5 \n"
1491 "shlr8 r4 \n"
1492 "shlr8 r3 \n"
1493 "shlr8 r2 \n"
1494 "shlr8 r1 \n"
1486 1495
1487 "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */ 1496 "not %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
1488 "extu.b %[mask], %[mask] \n" /* mask out high bits */ 1497 "extu.b %[mask], %[mask] \n" /* mask out high bits */
1489 "tst %[mask], %[mask] \n" 1498 "tst %[mask], %[mask] \n"
1490 "bt .wa_sloop \n" /* short loop if nothing to keep */ 1499 "bt .wa_sstart \n" /* short loop if nothing to keep */
1491
1492 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1493 "mov #8, r0 \n"
1494 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */
1495 "bt .wa_f8 \n"
1496 1500
1497 "mulu %[psiz], %[dpth] \n" 1501 "mova .wa_ftable, r0 \n" /* jump into full loop */
1498 "mova .wa_ftable, r0 \n" 1502 "mov.b @(r0, %[rx]), %[rx] \n"
1499 "mov.b @(r0, %[dpth]), %[rx] \n"
1500 "add %[rx], r0 \n" 1503 "add %[rx], r0 \n"
1501 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1504 "jmp @r0 \n"
1502 "jmp @r0 \n" /* jump into streak */ 1505 "nop \n"
1503 "add %[rx], %[addr] \n" /* ..for this round */ 1506
1504
1505 ".align 2 \n" 1507 ".align 2 \n"
1506 ".wa_ftable: \n" 1508 ".wa_ftable: \n"
1507 ".byte .wa_f0 - .wa_ftable \n"
1508 ".byte .wa_f1 - .wa_ftable \n" 1509 ".byte .wa_f1 - .wa_ftable \n"
1509 ".byte .wa_f2 - .wa_ftable \n" 1510 ".byte .wa_f2 - .wa_ftable \n"
1510 ".byte .wa_f3 - .wa_ftable \n" 1511 ".byte .wa_f3 - .wa_ftable \n"
@@ -1512,74 +1513,66 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1512 ".byte .wa_f5 - .wa_ftable \n" 1513 ".byte .wa_f5 - .wa_ftable \n"
1513 ".byte .wa_f6 - .wa_ftable \n" 1514 ".byte .wa_f6 - .wa_ftable \n"
1514 ".byte .wa_f7 - .wa_ftable \n" 1515 ".byte .wa_f7 - .wa_ftable \n"
1516 ".byte .wa_f8 - .wa_ftable \n"
1515 1517
1518 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1516 ".wa_f8: \n" 1519 ".wa_f8: \n"
1517 "mov %[psiz], %[rx] \n"
1518 "shll2 %[rx] \n"
1519 "add %[rx], %[rx] \n"
1520 "add %[rx], %[addr] \n"
1521 /* Point behind the last plane for this round. Note: We're using the
1522 * registers backwards in order to reuse the streak for the last round.
1523 * Therefore we need to go thru the bitplanes backwards too, otherwise
1524 * the bit order would be destroyed which results in more flicker. */
1525 "sub %[psiz], %[addr] \n"
1526 "mov.b @%[addr], r0 \n" /* load old byte */ 1520 "mov.b @%[addr], r0 \n" /* load old byte */
1527 "and %[mask], r0 \n" /* mask out replaced bits */ 1521 "and %[mask], r0 \n" /* mask out replaced bits */
1528 "or r8, r0 \n" /* set new bits */ 1522 "or r1, r0 \n" /* set new bits */
1529 "mov.b r0, @%[addr] \n" /* store byte */ 1523 "mov.b r0, @%[addr] \n" /* store byte */
1530 "shlr8 r8 \n" /* shift out used-up byte */ 1524 "add %[psiz], %[addr] \n"
1525 "shlr8 r1 \n" /* shift out used-up byte */
1531 ".wa_f7: \n" 1526 ".wa_f7: \n"
1532 "sub %[psiz], %[addr] \n"
1533 "mov.b @%[addr], r0 \n" 1527 "mov.b @%[addr], r0 \n"
1534 "and %[mask], r0 \n" 1528 "and %[mask], r0 \n"
1535 "or r7, r0 \n" 1529 "or r2, r0 \n"
1536 "mov.b r0, @%[addr] \n" 1530 "mov.b r0, @%[addr] \n"
1537 "shlr8 r7 \n" 1531 "add %[psiz], %[addr] \n"
1532 "shlr8 r2 \n"
1538 ".wa_f6: \n" 1533 ".wa_f6: \n"
1539 "sub %[psiz], %[addr] \n"
1540 "mov.b @%[addr], r0 \n" 1534 "mov.b @%[addr], r0 \n"
1541 "and %[mask], r0 \n" 1535 "and %[mask], r0 \n"
1542 "or r6, r0 \n" 1536 "or r3, r0 \n"
1543 "mov.b r0, @%[addr] \n" 1537 "mov.b r0, @%[addr] \n"
1544 "shlr8 r6 \n" 1538 "add %[psiz], %[addr] \n"
1539 "shlr8 r3 \n"
1545 ".wa_f5: \n" 1540 ".wa_f5: \n"
1546 "sub %[psiz], %[addr] \n"
1547 "mov.b @%[addr], r0 \n" 1541 "mov.b @%[addr], r0 \n"
1548 "and %[mask], r0 \n" 1542 "and %[mask], r0 \n"
1549 "or r5, r0 \n" 1543 "or r4, r0 \n"
1550 "mov.b r0, @%[addr] \n" 1544 "mov.b r0, @%[addr] \n"
1551 "shlr8 r5 \n" 1545 "add %[psiz], %[addr] \n"
1546 "shlr8 r4 \n"
1552 ".wa_f4: \n" 1547 ".wa_f4: \n"
1553 "sub %[psiz], %[addr] \n"
1554 "mov.b @%[addr], r0 \n" 1548 "mov.b @%[addr], r0 \n"
1555 "and %[mask], r0 \n" 1549 "and %[mask], r0 \n"
1556 "or r4, r0 \n" 1550 "or r5, r0 \n"
1557 "mov.b r0, @%[addr] \n" 1551 "mov.b r0, @%[addr] \n"
1558 "shlr8 r4 \n" 1552 "add %[psiz], %[addr] \n"
1553 "shlr8 r5 \n"
1559 ".wa_f3: \n" 1554 ".wa_f3: \n"
1560 "sub %[psiz], %[addr] \n"
1561 "mov.b @%[addr], r0 \n" 1555 "mov.b @%[addr], r0 \n"
1562 "and %[mask], r0 \n" 1556 "and %[mask], r0 \n"
1563 "or r3, r0 \n" 1557 "or r6, r0 \n"
1564 "mov.b r0, @%[addr] \n" 1558 "mov.b r0, @%[addr] \n"
1565 "shlr8 r3 \n" 1559 "add %[psiz], %[addr] \n"
1560 "shlr8 r6 \n"
1566 ".wa_f2: \n" 1561 ".wa_f2: \n"
1567 "sub %[psiz], %[addr] \n"
1568 "mov.b @%[addr], r0 \n" 1562 "mov.b @%[addr], r0 \n"
1569 "and %[mask], r0 \n" 1563 "and %[mask], r0 \n"
1570 "or r2, r0 \n" 1564 "or r7, r0 \n"
1571 "mov.b r0, @%[addr] \n" 1565 "mov.b r0, @%[addr] \n"
1572 "shlr8 r2 \n" 1566 "add %[psiz], %[addr] \n"
1567 "shlr8 r7 \n"
1573 ".wa_f1: \n" 1568 ".wa_f1: \n"
1574 "sub %[psiz], %[addr] \n"
1575 "mov.b @%[addr], r0 \n" 1569 "mov.b @%[addr], r0 \n"
1576 "and %[mask], r0 \n" 1570 "and %[mask], r0 \n"
1577 "or r1, r0 \n" 1571 "or r8, r0 \n"
1578 "mov.b r0, @%[addr] \n" 1572 "mov.b r0, @%[addr] \n"
1579 "shlr8 r1 \n" 1573 "add %[psiz], %[addr] \n"
1580 ".wa_f0: \n" 1574 "shlr8 r8 \n"
1581 1575
1582 "add %[rx], %[addr] \n" /* correct address */
1583 "add #-8, %[dpth] \n" 1576 "add #-8, %[dpth] \n"
1584 "cmp/pl %[dpth] \n" /* next round if anything left */ 1577 "cmp/pl %[dpth] \n" /* next round if anything left */
1585 "bt .wa_floop \n" 1578 "bt .wa_floop \n"
@@ -1603,22 +1596,15 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1603 ".wa_mask1: \n" 1596 ".wa_mask1: \n"
1604 ".long 0xAAAAAAAA \n" 1597 ".long 0xAAAAAAAA \n"
1605 1598
1606 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1599 ".wa_sstart: \n"
1607 "mov #8, r0 \n" 1600 "mova .wa_stable, r0 \n" /* jump into short loop */
1608 "cmp/hs r0, %[dpth] \n" /* 8 planes or more left? */ 1601 "mov.b @(r0, %[rx]), %[rx] \n"
1609 "bt .wa_s8 \n"
1610
1611 "mulu %[psiz], %[dpth] \n"
1612 "mova .wa_stable, r0 \n"
1613 "mov.b @(r0, %[dpth]), %[rx] \n"
1614 "add %[rx], r0 \n" 1602 "add %[rx], r0 \n"
1615 "sts macl, %[rx] \n" /* point behind the last plane.. */ 1603 "jmp @r0 \n"
1616 "jmp @r0 \n" /* jump into streak */ 1604 "nop \n"
1617 "add %[rx], %[addr] \n" /* ..for this round */
1618 1605
1619 ".align 2 \n" 1606 ".align 2 \n"
1620 ".wa_stable: \n" 1607 ".wa_stable: \n"
1621 ".byte .wa_s0 - .wa_stable \n"
1622 ".byte .wa_s1 - .wa_stable \n" 1608 ".byte .wa_s1 - .wa_stable \n"
1623 ".byte .wa_s2 - .wa_stable \n" 1609 ".byte .wa_s2 - .wa_stable \n"
1624 ".byte .wa_s3 - .wa_stable \n" 1610 ".byte .wa_s3 - .wa_stable \n"
@@ -1626,47 +1612,42 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1626 ".byte .wa_s5 - .wa_stable \n" 1612 ".byte .wa_s5 - .wa_stable \n"
1627 ".byte .wa_s6 - .wa_stable \n" 1613 ".byte .wa_s6 - .wa_stable \n"
1628 ".byte .wa_s7 - .wa_stable \n" 1614 ".byte .wa_s7 - .wa_stable \n"
1615 ".byte .wa_s8 - .wa_stable \n"
1629 1616
1617 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1630 ".wa_s8: \n" 1618 ".wa_s8: \n"
1631 "mov %[psiz], %[rx] \n" /* Point behind the last plane */ 1619 "mov.b r1, @%[addr] \n" /* store byte */
1632 "shll2 %[rx] \n" /* for this round. */ 1620 "add %[psiz], %[addr] \n"
1633 "add %[rx], %[rx] \n" /* See above. */ 1621 "shlr8 r1 \n" /* shift out used-up byte */
1634 "add %[rx], %[addr] \n"
1635
1636 "sub %[psiz], %[addr] \n"
1637 "mov.b r8, @%[addr] \n" /* store byte */
1638 "shlr8 r8 \n" /* shift out used-up byte */
1639 ".wa_s7: \n" 1622 ".wa_s7: \n"
1640 "sub %[psiz], %[addr] \n" 1623 "mov.b r2, @%[addr] \n"
1641 "mov.b r7, @%[addr] \n" 1624 "add %[psiz], %[addr] \n"
1642 "shlr8 r7 \n" 1625 "shlr8 r2 \n"
1643 ".wa_s6: \n" 1626 ".wa_s6: \n"
1644 "sub %[psiz], %[addr] \n" 1627 "mov.b r3, @%[addr] \n"
1645 "mov.b r6, @%[addr] \n" 1628 "add %[psiz], %[addr] \n"
1646 "shlr8 r6 \n" 1629 "shlr8 r3 \n"
1647 ".wa_s5: \n" 1630 ".wa_s5: \n"
1648 "sub %[psiz], %[addr] \n"
1649 "mov.b r5, @%[addr] \n"
1650 "shlr8 r5 \n"
1651 ".wa_s4: \n"
1652 "sub %[psiz], %[addr] \n"
1653 "mov.b r4, @%[addr] \n" 1631 "mov.b r4, @%[addr] \n"
1632 "add %[psiz], %[addr] \n"
1654 "shlr8 r4 \n" 1633 "shlr8 r4 \n"
1634 ".wa_s4: \n"
1635 "mov.b r5, @%[addr] \n"
1636 "add %[psiz], %[addr] \n"
1637 "shlr8 r5 \n"
1655 ".wa_s3: \n" 1638 ".wa_s3: \n"
1656 "sub %[psiz], %[addr] \n" 1639 "mov.b r6, @%[addr] \n"
1657 "mov.b r3, @%[addr] \n" 1640 "add %[psiz], %[addr] \n"
1658 "shlr8 r3 \n" 1641 "shlr8 r6 \n"
1659 ".wa_s2: \n" 1642 ".wa_s2: \n"
1660 "sub %[psiz], %[addr] \n" 1643 "mov.b r7, @%[addr] \n"
1661 "mov.b r2, @%[addr] \n" 1644 "add %[psiz], %[addr] \n"
1662 "shlr8 r2 \n" 1645 "shlr8 r7 \n"
1663 ".wa_s1: \n" 1646 ".wa_s1: \n"
1664 "sub %[psiz], %[addr] \n" 1647 "mov.b r8, @%[addr] \n"
1665 "mov.b r1, @%[addr] \n" 1648 "add %[psiz], %[addr] \n"
1666 "shlr8 r1 \n" 1649 "shlr8 r8 \n"
1667 ".wa_s0: \n"
1668 1650
1669 "add %[rx], %[addr] \n" /* correct address */
1670 "add #-8, %[dpth] \n" 1651 "add #-8, %[dpth] \n"
1671 "cmp/pl %[dpth] \n" /* next round if anything left */ 1652 "cmp/pl %[dpth] \n" /* next round if anything left */
1672 "bt .wa_sloop \n" 1653 "bt .wa_sloop \n"
@@ -1853,172 +1834,163 @@ static void _writearray(unsigned char *address, const unsigned char *src,
1853 "move.l %%a0, %%d5 \n" 1834 "move.l %%a0, %%d5 \n"
1854 "eor.l %%d5, %%d0 \n" 1835 "eor.l %%d5, %%d0 \n"
1855 "and.l #0xAAAAAAAA, %%d0 \n" 1836 "and.l #0xAAAAAAAA, %%d0 \n"
1856 "eor.l %%d0, %%d5 \n" 1837 "eor.l %%d0, %%d5 \n" /* (a0 = ...h0g0f0e0d0c0b0a0) */
1857 "move.l %%d5, %%a0 \n" /* a0 = ...h0g0f0e0d0c0b0a0 */ 1838 /* move.l %%d5, %%a0 */ /* but keep in d5 for shift streak */
1858 "lsr.l #1, %%d0 \n" 1839 "lsr.l #1, %%d0 \n"
1859 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */ 1840 "eor.l %%d0, %%d7 \n" /* d7 = ...h1g1f1e1d1c1b1a1 */
1841
1842 "move.l %[dpth], %%d0 \n" /** shift out unused low bytes **/
1843 "subq.l #1, %%d0 \n"
1844 "and.l #7, %%d0 \n"
1845 "move.l %%d0, %%a0 \n"
1846 "move.l %[ax], %%d0 \n" /* all data in D registers */
1847 "jmp (2, %%pc, %%a0:l:2) \n" /* jump into shift streak */
1848 "lsr.l #8, %%d2 \n"
1849 "lsr.l #8, %%d3 \n"
1850 "lsr.l #8, %%d4 \n"
1851 "lsr.l #8, %%d0 \n"
1852 "lsr.l #8, %%d6 \n"
1853 "lsr.l #8, %%d7 \n"
1854 "lsr.l #8, %%d5 \n"
1855 "move.l %%d0, %[ax] \n" /* put the 2 extra words back.. */
1856 "move.l %%a0, %%d0 \n" /* keep the value for later */
1857 "move.l %%d5, %%a0 \n" /* ..into their A registers */
1860 1858
1861 "tst.l %[mask] \n" 1859 "tst.l %[mask] \n"
1862 "jeq .wa_sloop \n" /* short loop if nothing to keep */ 1860 "jeq .wa_sstart \n" /* short loop if nothing to keep */
1863 1861
1864 "move.l %[mask], %%d5 \n" /* need mask in data reg. */ 1862 "move.l %[mask], %%d5 \n" /* need mask in data reg. */
1865 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */ 1863 "move.l %%d1, %[mask] \n" /* free d1 as working reg. */
1866 1864
1867 ".wa_floop: \n" /** full loop (there are bits to keep)**/ 1865 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into full loop */
1868 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1866 "bra.s .wa_f1 \n"
1869 "bhs.s .wa_f8 \n"
1870
1871 "move.l %[psiz], %%d0 \n"
1872 "move.l %[dpth], %%d1 \n"
1873 "mulu.w %%d1, %%d0 \n" /* point behind the last plane */
1874 "add.l %%d0, %[addr] \n" /* for this round */
1875 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1876 "bra.s .wa_f1 \n" /* dpth == 0 should never happen */
1877 "bra.s .wa_f2 \n" 1867 "bra.s .wa_f2 \n"
1878 "bra.s .wa_f3 \n" 1868 "bra.s .wa_f3 \n"
1879 "bra.s .wa_f4 \n" 1869 "bra.s .wa_f4 \n"
1880 "bra.s .wa_f5 \n" 1870 "bra.s .wa_f5 \n"
1881 "bra.s .wa_f6 \n" 1871 "bra.s .wa_f6 \n"
1882 "bra.s .wa_f7 \n" 1872 "bra.s .wa_f7 \n"
1873 /* bra.s .wa_f8 */ /* identical with target */
1883 1874
1875 ".wa_floop: \n" /** full loop (there are bits to keep)**/
1884 ".wa_f8: \n" 1876 ".wa_f8: \n"
1885 "move.l %[psiz], %%d0 \n"
1886 "lsl.l #3, %%d0 \n"
1887 "add.l %%d0, %[addr] \n"
1888 /* Point behind the last plane for this round. Note: We're using the
1889 * registers backwards in order to reuse the streak for the last round.
1890 * Therefore we need to go thru the bitplanes backwards too, otherwise
1891 * the bit order would be destroyed which results in more flicker. */
1892 "sub.l %[psiz], %[addr] \n"
1893 "move.b (%[addr]), %%d0 \n" /* load old byte */ 1877 "move.b (%[addr]), %%d0 \n" /* load old byte */
1894 "and.l %%d5, %%d0 \n" /* mask out replaced bits */ 1878 "and.l %%d5, %%d0 \n" /* mask out replaced bits */
1895 "move.l %[mask], %%d1 \n" 1879 "move.l %%a0, %%d1 \n"
1896 "or.l %%d1, %%d0 \n" /* set new bits */ 1880 "or.l %%d1, %%d0 \n" /* set new bits */
1897 "move.b %%d0, (%[addr]) \n" /* store byte */ 1881 "move.b %%d0, (%[addr]) \n" /* store byte */
1882 "add.l %[psiz], %[addr] \n"
1898 "lsr.l #8, %%d1 \n" /* shift out used-up byte */ 1883 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1899 "move.l %%d1, %[mask] \n" 1884 "move.l %%d1, %%a0 \n"
1900 ".wa_f7: \n" 1885 ".wa_f7: \n"
1901 "sub.l %[psiz], %[addr] \n"
1902 "move.b (%[addr]), %%d0 \n" 1886 "move.b (%[addr]), %%d0 \n"
1903 "and.l %%d5, %%d0 \n" 1887 "and.l %%d5, %%d0 \n"
1904 "or.l %%d2, %%d0 \n" 1888 "or.l %%d7, %%d0 \n"
1905 "move.b %%d0, (%[addr]) \n" 1889 "move.b %%d0, (%[addr]) \n"
1906 "lsr.l #8, %%d2 \n" 1890 "add.l %[psiz], %[addr] \n"
1891 "lsr.l #8, %%d7 \n"
1907 ".wa_f6: \n" 1892 ".wa_f6: \n"
1908 "sub.l %[psiz], %[addr] \n"
1909 "move.b (%[addr]), %%d0 \n" 1893 "move.b (%[addr]), %%d0 \n"
1910 "and.l %%d5, %%d0 \n" 1894 "and.l %%d5, %%d0 \n"
1911 "or.l %%d3, %%d0 \n" 1895 "or.l %%d6, %%d0 \n"
1912 "move.b %%d0, (%[addr]) \n" 1896 "move.b %%d0, (%[addr]) \n"
1913 "lsr.l #8, %%d3 \n" 1897 "add.l %[psiz], %[addr] \n"
1898 "lsr.l #8, %%d6 \n"
1914 ".wa_f5: \n" 1899 ".wa_f5: \n"
1915 "sub.l %[psiz], %[addr] \n"
1916 "move.b (%[addr]), %%d0 \n"
1917 "and.l %%d5, %%d0 \n"
1918 "or.l %%d4, %%d0 \n"
1919 "move.b %%d0, (%[addr]) \n"
1920 "lsr.l #8, %%d4 \n"
1921 ".wa_f4: \n"
1922 "sub.l %[psiz], %[addr] \n"
1923 "move.b (%[addr]), %%d0 \n" 1900 "move.b (%[addr]), %%d0 \n"
1924 "and.l %%d5, %%d0 \n" 1901 "and.l %%d5, %%d0 \n"
1925 "move.l %[ax], %%d1 \n" 1902 "move.l %[ax], %%d1 \n"
1926 "or.l %%d1, %%d0 \n" 1903 "or.l %%d1, %%d0 \n"
1927 "move.b %%d0, (%[addr]) \n" 1904 "move.b %%d0, (%[addr]) \n"
1905 "add.l %[psiz], %[addr] \n"
1928 "lsr.l #8, %%d1 \n" 1906 "lsr.l #8, %%d1 \n"
1929 "move.l %%d1, %[ax] \n" 1907 "move.l %%d1, %[ax] \n"
1908 ".wa_f4: \n"
1909 "move.b (%[addr]), %%d0 \n"
1910 "and.l %%d5, %%d0 \n"
1911 "or.l %%d4, %%d0 \n"
1912 "move.b %%d0, (%[addr]) \n"
1913 "add.l %[psiz], %[addr] \n"
1914 "lsr.l #8, %%d4 \n"
1930 ".wa_f3: \n" 1915 ".wa_f3: \n"
1931 "sub.l %[psiz], %[addr] \n"
1932 "move.b (%[addr]), %%d0 \n" 1916 "move.b (%[addr]), %%d0 \n"
1933 "and.l %%d5, %%d0 \n" 1917 "and.l %%d5, %%d0 \n"
1934 "or.l %%d6, %%d0 \n" 1918 "or.l %%d3, %%d0 \n"
1935 "move.b %%d0, (%[addr]) \n" 1919 "move.b %%d0, (%[addr]) \n"
1936 "lsr.l #8, %%d6 \n" 1920 "add.l %[psiz], %[addr] \n"
1921 "lsr.l #8, %%d3 \n"
1937 ".wa_f2: \n" 1922 ".wa_f2: \n"
1938 "sub.l %[psiz], %[addr] \n"
1939 "move.b (%[addr]), %%d0 \n" 1923 "move.b (%[addr]), %%d0 \n"
1940 "and.l %%d5, %%d0 \n" 1924 "and.l %%d5, %%d0 \n"
1941 "or.l %%d7, %%d0 \n" 1925 "or.l %%d2, %%d0 \n"
1942 "move.b %%d0, (%[addr]) \n" 1926 "move.b %%d0, (%[addr]) \n"
1943 "lsr.l #8, %%d7 \n" 1927 "add.l %[psiz], %[addr] \n"
1928 "lsr.l #8, %%d2 \n"
1944 ".wa_f1: \n" 1929 ".wa_f1: \n"
1945 "sub.l %[psiz], %[addr] \n"
1946 "move.b (%[addr]), %%d0 \n" 1930 "move.b (%[addr]), %%d0 \n"
1947 "and.l %%d5, %%d0 \n" 1931 "and.l %%d5, %%d0 \n"
1948 "move.l %%a0, %%d1 \n" 1932 "move.l %[mask], %%d1 \n"
1949 "or.l %%d1, %%d0 \n" 1933 "or.l %%d1, %%d0 \n"
1950 "move.b %%d0, (%[addr]) \n" 1934 "move.b %%d0, (%[addr]) \n"
1935 "add.l %[psiz], %[addr] \n"
1951 "lsr.l #8, %%d1 \n" 1936 "lsr.l #8, %%d1 \n"
1952 "move.l %%d1, %%a0 \n" 1937 "move.l %%d1, %[mask] \n"
1953 1938
1954 "move.l %[psiz], %%d0 \n"
1955 "lsl.l #3, %%d0 \n"
1956 "add.l %%d0, %[addr] \n" /* correct address */
1957 "subq.l #8, %[dpth] \n" 1939 "subq.l #8, %[dpth] \n"
1958 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1940 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
1959 "jgt .wa_floop \n" /* next round if anything left */ 1941 "jgt .wa_floop \n" /* next round if anything left */
1960 1942
1961 "jra .wa_end \n" 1943 "jra .wa_end \n"
1962 1944
1963 ".wa_sloop: \n" /** short loop (nothing to keep) **/ 1945 ".wa_sstart: \n"
1964 "cmp.l #8, %[dpth] \n" /* 8 planes or more left? */ 1946 "jmp (2, %%pc, %%d0:l:2) \n" /* jump into short loop */
1965 "bhs.s .wa_s8 \n" 1947 "bra.s .wa_s1 \n"
1966
1967 "move.l %[psiz], %%d0 \n"
1968 "move.l %[dpth], %%d5 \n"
1969 "mulu.w %%d5, %%d0 \n" /* point behind the last plane */
1970 "add.l %%d0, %[addr] \n" /* for this round */
1971 "jmp (%%pc, %[dpth]:l:2) \n" /* jump into streak */
1972 "bra.s .wa_s1 \n" /* dpth == 0 should never happen */
1973 "bra.s .wa_s2 \n" 1948 "bra.s .wa_s2 \n"
1974 "bra.s .wa_s3 \n" 1949 "bra.s .wa_s3 \n"
1975 "bra.s .wa_s4 \n" 1950 "bra.s .wa_s4 \n"
1976 "bra.s .wa_s5 \n" 1951 "bra.s .wa_s5 \n"
1977 "bra.s .wa_s6 \n" 1952 "bra.s .wa_s6 \n"
1978 "bra.s .wa_s7 \n" 1953 "bra.s .wa_s7 \n"
1954 /* bra.s .wa_s8 */ /* identical with target */
1979 1955
1956 ".wa_sloop: \n" /** short loop (nothing to keep) **/
1980 ".wa_s8: \n" 1957 ".wa_s8: \n"
1981 "move.l %[psiz], %%d0 \n" /* Point behind the last plane */ 1958 "move.l %%a0, %%d5 \n"
1982 "lsl.l #3, %%d0 \n" /* for this round. */ 1959 "move.b %%d5, (%[addr]) \n" /* store byte */
1983 "add.l %%d0, %[addr] \n" /* See above. */ 1960 "add.l %[psiz], %[addr] \n"
1984 1961 "lsr.l #8, %%d5 \n" /* shift out used-up byte */
1985 "sub.l %[psiz], %[addr] \n" 1962 "move.l %%d5, %%a0 \n"
1986 "move.b %%d1, (%[addr]) \n" /* store byte */
1987 "lsr.l #8, %%d1 \n" /* shift out used-up byte */
1988 ".wa_s7: \n" 1963 ".wa_s7: \n"
1989 "sub.l %[psiz], %[addr] \n" 1964 "move.b %%d7, (%[addr]) \n"
1990 "move.b %%d2, (%[addr]) \n" 1965 "add.l %[psiz], %[addr] \n"
1991 "lsr.l #8, %%d2 \n" 1966 "lsr.l #8, %%d7 \n"
1992 ".wa_s6: \n" 1967 ".wa_s6: \n"
1993 "sub.l %[psiz], %[addr] \n" 1968 "move.b %%d6, (%[addr]) \n"
1994 "move.b %%d3, (%[addr]) \n" 1969 "add.l %[psiz], %[addr] \n"
1995 "lsr.l #8, %%d3 \n" 1970 "lsr.l #8, %%d6 \n"
1996 ".wa_s5: \n" 1971 ".wa_s5: \n"
1997 "sub.l %[psiz], %[addr] \n"
1998 "move.b %%d4, (%[addr]) \n"
1999 "lsr.l #8, %%d4 \n"
2000 ".wa_s4: \n"
2001 "sub.l %[psiz], %[addr] \n"
2002 "move.l %[ax], %%d5 \n" 1972 "move.l %[ax], %%d5 \n"
2003 "move.b %%d5, (%[addr]) \n" 1973 "move.b %%d5, (%[addr]) \n"
1974 "add.l %[psiz], %[addr] \n"
2004 "lsr.l #8, %%d5 \n" 1975 "lsr.l #8, %%d5 \n"
2005 "move.l %%d5, %[ax] \n" 1976 "move.l %%d5, %[ax] \n"
1977 ".wa_s4: \n"
1978 "move.b %%d4, (%[addr]) \n"
1979 "add.l %[psiz], %[addr] \n"
1980 "lsr.l #8, %%d4 \n"
2006 ".wa_s3: \n" 1981 ".wa_s3: \n"
2007 "sub.l %[psiz], %[addr] \n" 1982 "move.b %%d3, (%[addr]) \n"
2008 "move.b %%d6, (%[addr]) \n" 1983 "add.l %[psiz], %[addr] \n"
2009 "lsr.l #8, %%d6 \n" 1984 "lsr.l #8, %%d3 \n"
2010 ".wa_s2: \n" 1985 ".wa_s2: \n"
2011 "sub.l %[psiz], %[addr] \n" 1986 "move.b %%d2, (%[addr]) \n"
2012 "move.b %%d7, (%[addr]) \n" 1987 "add.l %[psiz], %[addr] \n"
2013 "lsr.l #8, %%d7 \n" 1988 "lsr.l #8, %%d2 \n"
2014 ".wa_s1: \n" 1989 ".wa_s1: \n"
2015 "sub.l %[psiz], %[addr] \n" 1990 "move.b %%d1, (%[addr]) \n"
2016 "move.l %%a0, %%d5 \n" 1991 "add.l %[psiz], %[addr] \n"
2017 "move.b %%d5, (%[addr]) \n" 1992 "lsr.l #8, %%d1 \n"
2018 "lsr.l #8, %%d5 \n"
2019 "move.l %%d5, %%a0 \n"
2020 1993
2021 "add.l %%d0, %[addr] \n" /* correct address */
2022 "subq.l #8, %[dpth] \n" 1994 "subq.l #8, %[dpth] \n"
2023 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */ 1995 "tst.l %[dpth] \n" /* subq doesn't set flags for A reg */
2024 "jgt .wa_sloop \n" /* next round if anything left */ 1996 "jgt .wa_sloop \n" /* next round if anything left */
@@ -2071,7 +2043,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
2071 2043
2072 /* set the bits for all 8 pixels in all bytes according to the 2044 /* set the bits for all 8 pixels in all bytes according to the
2073 * precalculated patterns on the pattern stack */ 2045 * precalculated patterns on the pattern stack */
2074 test = 1; 2046 test = 1 << ((-_gray_info.depth) & 7);
2075 mask = (~mask & 0xff); 2047 mask = (~mask & 0xff);
2076 if (mask == 0) 2048 if (mask == 0)
2077 { 2049 {